<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Electron.</journal-id>
<journal-title>Frontiers in Electronics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Electron.</abbrev-journal-title>
<issn pub-type="epub">2673-5857</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">877629</article-id>
<article-id pub-id-type="doi">10.3389/felec.2022.877629</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Electronics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Energy-efficient neural network design using memristive MAC unit</article-title>
<alt-title alt-title-type="left-running-head">Yu et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/felec.2022.877629">10.3389/felec.2022.877629</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Yu</surname>
<given-names>Shengqi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1404675/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bunnam</surname>
<given-names>Thanasin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1683019/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Triamlumlerd</surname>
<given-names>Sirichai</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Pracha</surname>
<given-names>Manoch</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1759810/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xia</surname>
<given-names>Fei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1850186/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Shafik</surname>
<given-names>Rishad</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1168686/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Yakovlev</surname>
<given-names>Alex</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Microsystems Group</institution>, <institution>School of Engineering</institution>, <institution>Newcastle University</institution>, <addr-line>Newcastle Upon Tyne</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Computer Engineering</institution>, <institution>Faculty of Engineering</institution>, <institution>Rajamangala University of Technology Thanyaburi</institution>, <addr-line>Pathum Thani</addr-line>, <country>Thailand</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1318154/overview">Yu Cao</ext-link>, Arizona State University, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/990346/overview">Mohammed Fouda</ext-link>, University of California, Irvine, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1341219/overview">Xueqing Li</ext-link>, Tsinghua University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Shengqi Yu, <email>s.yu10@newcastle.ac.uk</email>; Rishad Shafik, <email>rishad.shafik@newcastle.ac.uk</email>; Alex Yakovlev, <email>alex.yakovlev@newcastle.ac.uk</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Integrated Circuits and VLSI, a section of the journal Frontiers in Electronics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>26</day>
<month>09</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>3</volume>
<elocation-id>877629</elocation-id>
<history>
<date date-type="received">
<day>16</day>
<month>02</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>31</day>
<month>08</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Yu, Bunnam, Triamlumlerd, Pracha, Xia, Shafik and Yakovlev.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Yu, Bunnam, Triamlumlerd, Pracha, Xia, Shafik and Yakovlev</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Artificial intelligence applications implemented with neural networks require extensive arithmetic capabilities through multiply-accumulate (MAC) units. Traditional designs based on voltage-mode circuits feature complex logic chains for such purposes as carry processing. Additionally, as a separate memory block is used (e.g., in a von Neumann architecture), data movements incur on-chip communication bottlenecks. Furthermore, conventional multipliers have both operands encoded in the same physical quantity, which is either low cost to update or low cost to hold, but not both. This may be significant for low-energy edge operations. In this paper, we propose and present a mixed-signal multiply-accumulate unit design with in-memory computing to improve both latency and energy. This design is based on a single-bit multiplication cell consisting of a number of memristors and a single transistor switch (1TxM), arranged in a crossbar structure implementing the long-multiplication algorithm. The key innovation is that one of the operands is encoded in easy to update voltage and the other is encoded in non-volatile memristor conductance. This targets operations such as machine learning which feature asymmetric requirements for operand updates. Ohm&#x2019;s Law and KCL take care of the multiplication in analog. When implemented as part of a NN, the MAC unit incorporates a current to digital stage to produce multi-bit voltage-mode output, in the same format as the input. The computation latency consists of memory writing and result encoding operations, with the Ohm&#x2019;s Law and KCL operations contributing negligible delay. When compared with other memristor-based multipliers, the proposed work shows an order of magnitude of latency improvement in 4-bit implementations partly because of the Ohm&#x2019;s Law and KCL time savings and partly because of the short writing operations for the frequently updated operand represented by voltages. In addition, the energy consumption per multiplication cycle of the proposed work is shown to improve by 74%&#x2013;99% in corner cases. To investigate the usefulness of this MAC design in machine learning applications, its input/output relationships is characterized using multi-layer perceptrons to classify the well-known hand-writing digit dataset MNIST. This case study implements a quantization-aware training and includes the non-ideal effect of our MAC unit to allow the NN to learn and preserve its high accuracy. The simulation results show the NN using the proposed MAC unit yields an accuracy of 93%, which is only 1% lower than its baseline.</p>
</abstract>
<kwd-group>
<kwd>in memory computing</kwd>
<kwd>energy-efficient</kwd>
<kwd>neural network</kwd>
<kwd>multiply-accumulate (MAC) unit</kwd>
<kwd>quantization-aware training (QAT)</kwd>
<kwd>mixed-signal (MS)</kwd>
</kwd-group>
<contract-num rid="cn001">NU-007755 NU-009397</contract-num>
<contract-sponsor id="cn001">Engineering and Physical Sciences Research Council<named-content content-type="fundref-id">10.13039/501100000266</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Arithmetic operations are central to modern artificial intelligence applications implementing neural networks (NNs) (<xref ref-type="bibr" rid="B24">Park et al., 2018</xref>; <xref ref-type="bibr" rid="B30">Shafik et al., 2018</xref>). In these operations, multiplication plays a crucial role with significant impact on performance and energy efficiency, especially because traditional multiplier circuits feature complex partial product generation and carry propagation logic chains (<xref ref-type="bibr" rid="B26">Qiqieh et al., 2018</xref>). As such, reducing the energy consumption of multipliers, when used in NNs, is an ongoing design challenge.</p>
<p>For low-complexity multiplication, reducing precision is a viable method. For this, pruning the carry chains to a minimum proportion while also maintaining an acceptable precision has been proposed by numerous approximate and speculative circuit designs (<xref ref-type="bibr" rid="B8">Cilardo et al., 2014</xref>). However, these designs require careful synergy of operating voltages and frequencies to balance energy and performance trade-offs (<xref ref-type="bibr" rid="B29">Shafik et al., 2016</xref>). Moreover, the accumulation of imprecision and errors in cascaded workloads needs mitigation strategies which adds more complexity to the logic chains (<xref ref-type="bibr" rid="B39">Yakovlev, 2015</xref>). Consequently, the usability of voltage-mode proportional carry pruning schemes is still limited. On the other hand, using multipliers with very low precision may be a viable solution for certain applications. For instance, multipliers with 4-bit precision have been shown to be useful for machine learning applications (<xref ref-type="bibr" rid="B5">Chahal, 2019</xref>), including deep learning with datasets of significant sizes and complexity (<xref ref-type="bibr" rid="B34">Sun et al., 2020</xref>) and targeting datasets more relevant for mobile applications at the edge (<xref ref-type="bibr" rid="B36">Trusov et al., 2021</xref>). In this paper we target the design and implementation of low-precision MACs for low-energy edge applications.</p>
<p>Another problem with using existing arithmetic methods is related to the approach of running AI software on conventional computers based on von Neumann or Harvard architectures (<xref ref-type="bibr" rid="B44">Zheng and Mazumder, 2019</xref>). Machine learning using NNs and other AI methods involves multiple iterations of arithmetic operations with data flow between processing elements and memory being a significant bottleneck for conventional computers (<xref ref-type="bibr" rid="B44">Zheng and Mazumder, 2019</xref>; <xref ref-type="bibr" rid="B10">Fujiki et al., 2021</xref>). In-memory computing, especially using non-volatile memory technologies may provide ways of reducing the amounts of data flow required for AI applications including NNs (<xref ref-type="bibr" rid="B10">Fujiki et al., 2021</xref>; <xref ref-type="bibr" rid="B14">Hung et al., 2021</xref>). Additionally, the non-volatile property of memories can reduce the number of data movements, even when the computing system sustains power cuts or interruptions.</p>
<p>Recently mixed-signal multiplier designs based on non-volatile memory (memristor cells) have been proposed (<xref ref-type="bibr" rid="B40">Yu et al., 2021</xref>), where the operands are expressed in multiple modes, e.g., voltage, conductance and current. Single-bit multiplication for partial product terms is performed in current mode, which naturally follows Ohm&#x2019;s law. In this mode, the voltage input and conductance represent the two operands and the resulting current represents the output. Multiple partial product terms can be accumulated using Kirchhoff&#x2019;s Current Law (KCL) by organizing the single-bit cells in a crossbar structure. With KCL, addition and subtraction are, respectively equivalent to joining multiple current paths into a node and removing current paths from a node.</p>
<p>This type of mixed-signal multiplier is digital-in/analog-out. Because transistor switching happens when setting the memristor values and connecting the input voltages, a delay is associated with making the operands (multiplier and multiplicand) ready. After that, the single-bit multiplication operation itself only involves resistive Ohm&#x2019;s Law which can be regarded as instantaneous. This means that the partial products are immediately obtained once the operands are ready. The addition of partial products through KCL across the crossbar requires current amplification and has a delay associated solely with the amplifiers. These current amplifiers can be implemented with current mirrors, in which case there is transistor-related delays. However, they may also be implemented with memristor cell topologies in a pure resistive fashion, again achieving negligible delay. This compares to regular digital schemes which have to go through multi-stage addition and carry-handling operations with a substantial number of transistor switchings once the bit products appear (<xref ref-type="bibr" rid="B40">Yu et al., 2021</xref>). In this paper we use the second method to drastically reduce multiplication delay, which results in significant reductions in energy per operation.</p>
<p>Another advantage of such transistor-memristor crossbar multipliers is that one of the operands is represented by memristor conductance <inline-formula id="inf1">
<mml:math id="m1">
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:math>
</inline-formula>, which is non-volatile with very low holding cost but costly to update, whilst the other is represented by voltage, which is fast-updating but volatile. This is a good match for such applications as NNs and reference-based arithmetic where one of the operands tends to be relatively stable and requires only sporadic change (<xref ref-type="bibr" rid="B44">Zheng and Mazumder, 2019</xref>; <xref ref-type="bibr" rid="B10">Fujiki et al., 2021</xref>; <xref ref-type="bibr" rid="B14">Hung et al., 2021</xref>), but the other requires a high update rate. This matching is especially relevant for edge applications where power may be unreliable and energy is crucial. In comparison, conventional multiplication schemes, where both operands are represented by the same physical quantity (voltage, current, or resistance/conductance), do not take advantage of operational asymmetry in operand updating.</p>
<p>On the other hand, for multi-stage operations such as NNs in a non Von Neumann neuromorphic architecture (<xref ref-type="bibr" rid="B44">Zheng and Mazumder, 2019</xref>), a digital-in/digital-out MAC unit is required. If this type of mixed-signal multiplier is to be used, additional circuits are needed to generate the appropriate digital output from the intermediate current which encodes the product.</p>
<p>Memristor-based digital multipliers exist in the literature (<xref ref-type="bibr" rid="B11">Guckert and Swartzlander, 2017</xref>) and these will be comparatively studied with our method in the paper (in e.g., <xref ref-type="sec" rid="s6-1">Section 6.1</xref>).</p>
<p>This paper describes a novel MAC unit based on mixed-signal multipliers using transistor-memristor cells on crossbar intersections. In this design, one operand is encoded in voltages for frequent value updates and the other is encoded in memristor conductance values for infrequent value updates. The intermediate products are represented by currents. By representing binary values with high and low values of these analog parameters and signals, the multipliers implement bit multiplication through Ohm&#x2019;s Law and the summation of partial products through KCL, combining both steps into the long multiplication algorithm. The intermediate current product is then converted to voltage-encoded multi-bit digital format. Targeting multi-MAC applications such as NNs, there is a built-in bit-precision reduction which makes the output the same bit resolution as the inputs, unlike typical digital multipliers which have double the number of bits in their products compared with the operands. These techniques combine to reduce the latency per multiplication for our method compared with existing memristor-based multipliers, and the latency reduction in turn causes a reduction in the energy consumption. Our main contributions are:<list list-type="simple">
<list-item>
<p>&#x2022; Developing a high energy efficiency end-to-end multiplication accumulation unit based on the transistor-memristor crossbar multiplier with mode transition for such applications as classification NNs.</p>
</list-item>
<list-item>
<p>&#x2022; Developing optimization methods such as the elimination of current mirrors by changing the topologies of memristor cells and investigating different memristor technologies resulting in an order of magnitude improvements in accuracy, speed and energy.</p>
</list-item>
<list-item>
<p>&#x2022; Demonstrating the advantages of our multipliers over existing designs through extensive theoretical and experimental investigations.</p>
</list-item>
<list-item>
<p>&#x2022; Validating the MAC design by using it as a perception in a non Von Nuemann neural network implementation with quantization-aware training solving an example machine learning problem of a size relevant for low-energy edge applications (MNIST hand-writing classification).</p>
</list-item>
</list>
</p>
<p>The rest of the paper is organized as follows: <xref ref-type="sec" rid="s2">Section 2</xref> describes the research baseline and technological foundations, and discusses existing related work. <xref ref-type="sec" rid="s3">Section 3</xref> presents the first of our MAC unit designs, based on the memristor multiplication cell. <xref ref-type="sec" rid="s4">Section 4</xref> describes component and circuit implementation details, and explores different multiplication cell designs in an extensive comparative study, validating the advantages of our multiplier design method on all major fronts. <xref ref-type="sec" rid="s5">Section 5</xref> presents a machine learning case study with the proposed MAC acting as perceptrons in an MLA NN. <xref ref-type="sec" rid="s7">Section 7</xref> then concludes the paper.</p>
</sec>
<sec id="s2">
<title>2 Background</title>
<p>The memristor, proposed by Chua as the fourth element in the charge and flux taxonomy (<xref ref-type="bibr" rid="B7">Chua, 1971</xref>), has a number of promising characteristics. One of these is its potential in replacing semiconductor components in processing circuits. That is because, as a switchable device, a memristor can perform similar ON-OFF operations to a transistor with adjustable doped/undoped regions, which turn memristor to ON/OFF states (See <xref ref-type="fig" rid="F1">Figure 1</xref>). This became more significant when practical memristor implementation examples appeared (<xref ref-type="bibr" rid="B33">Strukov et al., 2008</xref>; <xref ref-type="bibr" rid="B27">Radwan et al., 2012</xref>). As a nonvolatile component, the memristor has been used in memory device design, which is now called &#x201c;resistive memory&#x201d; (<xref ref-type="bibr" rid="B13">Ho et al., 2009</xref>). At the same time, the possibilities for performing arithmetic with memristors have also been explored, with multiplication being viewed as especially promising (<xref ref-type="bibr" rid="B28">Reid, 2009</xref>). Memristor cell methods have also featured in complex logic calculations such as &#x201c;material implication&#x201d; (IMP) (<xref ref-type="bibr" rid="B3">Borghetti et al., 2010</xref>). Computation processing units based on memristors have been designed for multiple applications, such as signal processing, artificial intelligence training, hardware acceleration, and encoding/decoding (<xref ref-type="bibr" rid="B12">Gupta et al., 2016</xref>; <xref ref-type="bibr" rid="B16">Krestinskaya et al., 2020</xref>). All these have multiply-accumulate operation at the centre, for which the memristor unit is well suited.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Memristor structure and component details. Memristor resistance depends on the with of doped/undoped region, the region width changed by the electrical potential difference on the component terminals, if over threshold, the higher potential terminal will extend respective region width.</p>
</caption>
<graphic xlink:href="felec-03-877629-g001.tif"/>
</fig>
<p>The crossbar structure has been used to implement, in a straightforward manner, various types of multiplication including matrix multiplication (<xref ref-type="bibr" rid="B21">Li et al., 2018</xref>) and Shift-and-Add multiplication (<xref ref-type="bibr" rid="B11">Guckert and Swartzlander, 2017</xref>). The transistor-memristor pair has shown high precision in controlling operations, making it useful in large scale circuits, demonstrating high potential in performance, efficiency, and latency (<xref ref-type="bibr" rid="B6">Chen et al., 2019</xref>). One transistor one memristor (1T1M) was also favourably compared with Wallace-tree methods and conventional CMOS approach for the same metrics (<xref ref-type="bibr" rid="B42">Yu et al., 2020b</xref>). A crossbar multiplier approach with one transistor multiple memristor (1TxM) cells additionally demonstrated higher precision (<xref ref-type="bibr" rid="B41">Yu et al., 2020a</xref>). Space taken by the 1TxM cell may be reduced by merging memristors with higher margin values (<xref ref-type="bibr" rid="B40">Yu et al., 2021</xref>).</p>
<p>In (<xref ref-type="bibr" rid="B40">Yu et al., 2021</xref>), on which this paper is partially based, a memristor-cell crossbar structure implements both the single-bit multiplication at each cell. This is followed by the addition part of multi-bit multiplication algorithm across the crossbar with significance-related current amplification for different bit positions so that the total summed current corresponds to the correct final product for output. A crossbar multiplier is potentially an area-saving solution because the memristor crossbar can be built on top of the transistor-related layers using a back-end-of-line process (<xref ref-type="bibr" rid="B9">Constantoudis et al., 2019</xref>). Therefore, the area can be smaller than that used by the traditional CMOS multiplier.</p>
<p>Several designs of memristor cells have been proposed in the literature. Example cell structures include the single memristor (1M) cell, the multiple memristor (xM) cell, the single transistor single memristor (1T1M) cell, and single transistor multiple memristor (1TxM) cell. These designs focus on generating different combinations of memristor resistance (<italic>R</italic>
<sub>
<italic>M</italic>
</sub>) for respective memristor conductance (<italic>D</italic>
<sub>
<italic>M</italic>
</sub>) to achieve target arithmetic expressions. Usually, logic operation on a memristor is achieved by adjusting the voltage across it. A crossbar based on 1M cells cannot provide the correct currents for digits of different significance without additional current multiplication, usually with current mirrors. The xM-cell crossbar is able to generate the required output current without additional circuits (<xref ref-type="bibr" rid="B42">Yu et al., 2020b</xref>).</p>
<p>Moreover, current amplification is necessary in a crossbar mixed-signal multi-bit multiplier where current represents product. With each cell producing a current representing the value of Boolean 0 or 1, KCL can only work to produce a total current representing the multi-bit product if the current value at any bit position is amplified correctly according to the bit&#x2019;s value significance. In other words, any bit should be twice the value of the bit to its right. This is conventionally implemented with a current-mirror-based current amplifier at each bit on 1M cell crossbar. By tuning the output transistor size in a current mirror, a bit&#x2019;s correct significance can be set. However, this scheme results in extra area cost from potentially very large transistors. In addition, power and latency requirements of large transistors in current mirrors also limit system efficiency and performance (<xref ref-type="bibr" rid="B43">Yuan, 2006</xref>).</p>
<p>Taking advantage of memristor resistivity, the resistive xM cell can perform amplification by adjusting cell <italic>R</italic>
<sub>
<italic>M</italic>
</sub> for the target operand. The most straightforward method is to keep single-memristor resistances the same across the multiplier, but build 1TxM cells with different numbers (<italic>x</italic> values) of parallel memristors corresponding to their bit significances. For instance, we may use 1M for bit 0, 2M for bit 1, 4M for bit 2, 8M for bit 3, etc. In this way, the cells perform the required current amplification, removing the need for current mirrors. When applied to the crossbar architecture, both 1M and xM cells help reduce the energy cost and latency. Meanwhile, the space cost of multipliers based on these cells can also be lower (<xref ref-type="bibr" rid="B21">Li et al., 2018</xref>).</p>
</sec>
<sec id="s3">
<title>3 Multiply and accumulation unit</title>
<p>Our MAC unit consists of memristor-transistor crossbar multiplier and mixed-signal Flash analog to digital converter (ADC) Which is show in <xref ref-type="fig" rid="F2">Figure 2</xref>. In this section, the main parts of this MAC unit will be introduced.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>MAC units structure.</p>
</caption>
<graphic xlink:href="felec-03-877629-g002.tif"/>
</fig>
<sec id="s3-1">
<title>3.1 Transistor-memristor crossbar multiplier</title>
<sec id="s3-1-1">
<title>3.1.1 Memristor</title>
<p>Decades after the inception of the memristor, in addition to the general mathematical model, analog behavioral models (ABM) were also developed for deeper research on memristor characteristics in circuits. The linear ion drift model was first developed from the basic memristive definition of memristor current-voltage relationship. This model uses the current-control method to adjust doped region width for changing memristor resistance (<xref ref-type="bibr" rid="B33">Strukov et al., 2008</xref>). However, the ideal assumption that the doped region width changes linearly is unrealistic, especially, undesirable for logic circuits.</p>
<p>As a result, with assistance of window function to regulate relation between physical device size and resistance variation, the nonlinear ion drift model attempts to represent the complexity of fabricated memristive device state drift (<xref ref-type="bibr" rid="B20">Lehtonen and Laiho, 2010</xref>). As early stage models, both the linear ion drift model and the nonlinear ion drift model offer low accuracy for the building of oxide region and doped oxide region like two series connected resistors. Aiming at building a more realistic model, a more accurate physical model is built by connecting an electron tunnel barrier with a resistor in series.</p>
<p>This one is called the Simmons tunnel barrier model, it shows a relatively high level of accuracy among TiO<sub>2</sub> memristive device at the same level of complexity (<xref ref-type="bibr" rid="B2">Berdan et al., 2014</xref>). To balance accuracy and complexity, Kavatinsky makes a simplification about the physical behavior and mathematical functions complexity in the Simmons tunnel barrier model, then the threshold adaptive memristor model (TEAM) is generated with a reasonable balance between accuracy performance and computational efficiency (<xref ref-type="bibr" rid="B18">Kvatinsky et al., 2013</xref>). Since the existence of the threshold voltage is found from memristive devices, Kavtinsky updated ABM TEAM to voltage threshold adaptive memristor (VTEAM) (<xref ref-type="bibr" rid="B19">Kvatinsky et al., 2015</xref>). As a threshold-based voltage-driven model, VTEAM combines the advantage of the TEAM model with multiple freely chosen current-voltage characteristics. This helps to precisely estimate all reported physical device behaviours, such as linear ion drift (<xref ref-type="bibr" rid="B33">Strukov et al., 2008</xref>), nonlinear ion drift (<xref ref-type="bibr" rid="B20">Lehtonen and Laiho, 2010</xref>) and the Simmons tunnel barrier (<xref ref-type="bibr" rid="B2">Berdan et al., 2014</xref>), yet it exhibits superior computation efficiency especially for memory and logic applications (<xref ref-type="bibr" rid="B19">Kvatinsky et al., 2015</xref>; <xref ref-type="bibr" rid="B32">Singh et al., 2016</xref>). This paper makes use of the VTEAM memristor model in design and analysis.</p>
</sec>
<sec id="s3-1-2">
<title>3.1.2 Memristor-transistor multiplication cell</title>
<p>In <xref ref-type="fig" rid="F3">Figure 3</xref>, the single-bit multiplication cell is represented. The serial connection of multiple memristors (xM) and multiple transistors (yT) generates the basic multiplication cell in the proposed multiplier.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>yTxM multiplication cell structure.</p>
</caption>
<graphic xlink:href="felec-03-877629-g003.tif"/>
</fig>
<p>A memristor can be set in two interchangeable states: high conductance state (HCS) and low conductance state (LCS). These two states are used to represent the value on one of the two single-bit operands (inputs). When providing/preparing the value of this operand, the cell works in writing mode, with the input voltage used to write either HCS or LCS into the memristor. After this operand is set, the cell can work in reading mode, which is the multiplication operation. In reading mode, the input voltage takes the value of the other operand and is in either of the two states: high voltage state (HVS) and low voltage state (LVS). The cell current then forms the output (product) of the single-bit multiplication according to Ohm&#x2019;s law, and is also in Boolean format with high and low states. The transistors additionally serves the purpose of turning the cell off (not writing and not reading, but holding the operand encoded in the memristor conductance state). Representing both operands with memristor parameters, however, reduces the usefulness of the multiplier because of the writing cycle limitations of memristors (<xref ref-type="bibr" rid="B45">Khan et al., 2021</xref>). Millions of writing cycles are appropriate for a slow changing operand such as a control system coefficient or a weight parameter in machine learning, but far from enough for the fast changing values multiplied by them. A scheme with asymmetric non-volatility is therefore needed.</p>
<p>Therefore, the operation of multiplication cell can be easily used to encode Boolean logic: HCS and HVS represent logic 1, LCS and LVS represent logic 0. Similarly, the output current also has high and low states which can encode logic 1 and logic 0. In this way, a memristor-transistor cell can perform single bit multiplication (same as logic AND).</p>
</sec>
<sec id="s3-1-3">
<title>3.1.3 Crossbar multiplier</title>
<p>Single-bit multiplication cells are then composed into a multi-bit multiplier using a crossbar structure, with KCL taking charge of the partial product addition step. A 4-bit case can be seen in <xref ref-type="fig" rid="F4">Figure 4</xref>. In this figure, all single-bit multiplication cells are included in the Ohm&#x2019;s law zone (marked in brown dashed lines). On the other hand, all wires and nodes through which currents flow belong to the KCL zone, marked in purple dashed lines. In the KCL zone, nodes &#x201c;Digit1&#x201d; to &#x201c;Digit7&#x201d; represent partial products while the current through the load resistor <italic>R</italic>
<sub>
<italic>out</italic>
</sub> is the final product. Note that, unlike the common long-multiplication algorithm, there is no attempt at finding horizontal partial products and no attempt at passing carries horizontally. All partial products are generated vertically. Carries can be avoided because the vertical partial products and the final product are encoded in currents with higher upper limits to their values than that encodes a single logic 1. In other words, the currents at the Digit1 to Digit7 nodes and <italic>I</italic>
<sub>
<italic>out</italic>
</sub> can take values that are multiples of the high current state across a single memristor which encodes logic 1 at the lowest level of detail. For instance Digit2&#x2019;s current may be up to four times this single-memristor logic 1 and the maximum value of the partial product at Digit2 is therefore 4 (because each <italic>MC</italic>
<sub>2</sub> may generate twice the maximum current compared with <italic>MC</italic>
<sub>1</sub>), instead of 2 in the case of a typical digital multiplier at this bit position.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>4-bit crossbar multiplier structure. RL provides biasing for memristor, GL provides biasing for transistor, CL provides the current path for MC with different multiplier significance.</p>
</caption>
<graphic xlink:href="felec-03-877629-g004.tif"/>
</fig>
<p>Since the multiplication is performed by fixed voltage values for 0 and 1 from the voltage operand, the output currents of cells in each column corresponding to logic 1 at these cells need to be set according to the column&#x2019;s digit significance. Avoiding current-mirror amplifiers, this can be implemented by using <italic>x</italic> memristors in parallel with the appropriate <italic>x</italic> value. The relation between <italic>x</italic> and the digit significance <italic>N</italic> follows <xref ref-type="disp-formula" rid="e1">Eq. 1</xref>:<disp-formula id="e1">
<mml:math id="m2">
<mml:mi>x</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>Let us use the 4-bit multiplier in <xref ref-type="fig" rid="F4">Figure 4</xref> as an example, assume cell transistors are ideal switches, <italic>V</italic>
<sub>
<italic>MH</italic>
</sub> and <italic>V</italic>
<sub>
<italic>ML</italic>
</sub> as high voltage and low voltage operand inputs, and <italic>R</italic>
<sub>
<italic>MH</italic>
</sub> and <italic>R</italic>
<sub>
<italic>ML</italic>
</sub> as high and low cell resistance (memristor resistance) operand inputs. In each cell, the possible output current states can be found in <xref ref-type="fig" rid="F5">Figure 5</xref> as <italic>I</italic>
<sub>1</sub>, <italic>I</italic>
<sub>2</sub>, <italic>I</italic>
<sub>3</sub>, and <italic>I</italic>
<sub>4</sub>. Since the logic 1 state is defined by <italic>V</italic>
<sub>
<italic>MH</italic>
</sub> and <italic>R</italic>
<sub>
<italic>ML</italic>
</sub>, <italic>I</italic>
<sub>4</sub> is the output current representing logic 1, whilst the other three current states <italic>I</italic>
<sub>1</sub>, <italic>I</italic>
<sub>2</sub> and <italic>I</italic>
<sub>3</sub> all represent logic 0 because at least one of their input operands encodes 0. Given the cell structure, none of <italic>I</italic>
<sub>1</sub>, <italic>I</italic>
<sub>2</sub> and <italic>I</italic>
<sub>3</sub> can be true 0A. This is because <italic>R</italic>
<sub>
<italic>MH</italic>
</sub> cannot be true infinity and to maintain the commutative property of multiplication, true 0&#xa0;V should not be used in the voltage input operand either. Because of KCL, potentially a large number of relatively small <italic>I</italic>
<sub>1</sub>, <italic>I</italic>
<sub>2</sub> and <italic>I</italic>
<sub>3</sub> values may be accumulated with the sum still required to represent a product value of 0. In other words, a single <italic>I</italic>
<sub>4</sub> needs to be greater in value than the sum of a large number of <italic>I</italic>
<sub>1</sub>, <italic>I</italic>
<sub>2</sub> and <italic>I</italic>
<sub>3</sub> values to differentiate 0 and 1&#xa0;at the final product.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Mapping of all multiplication output current.</p>
</caption>
<graphic xlink:href="felec-03-877629-g005.tif"/>
</fig>
<p>The final result <italic>I</italic>
<sub>
<italic>out</italic>
</sub> matrix shown in <xref ref-type="fig" rid="F5">Figure 5</xref> illustrates this issue in detail by enumerating all possible <italic>I</italic>
<sub>
<italic>out</italic>
</sub> values across all possible combinations of input operand values. This current map assumes that the operand encoded in voltage is called multiplier and the other operand encoded in memristor resistance is called multiplicand, without losing generality. Each operand is 4 bit wide and takes values from 0 to 15. When the multiplier increases from 0 to 15 we move from left to right along the <italic>i</italic> axis, 0 &#x2264; <italic>i</italic> &#x2264; 15, and when the multiplicand increases from 0 to 15 we move from top to bottom along the <italic>j</italic> axis, 0 &#x2264; <italic>j</italic> &#x2264; 15. At each position (<italic>i</italic>, <italic>j</italic>) in the matrix, <italic>I</italic>
<sub>
<italic>i</italic>,<italic>j</italic>
</sub> encodes the product of multiplying (multiplier &#x3d; <italic>i</italic>) by (multiplicand &#x3d; <italic>j</italic>). To simplify the presentation, we use four coefficients <italic>a</italic>, <italic>b</italic>, <italic>c</italic> and <italic>d</italic> to differentiate all the output currents and define <italic>I</italic>
<sub>
<italic>i</italic>,<italic>j</italic>
</sub> as <italic>I</italic>
<sub>
<italic>i</italic>,<italic>j</italic>
</sub> &#x3d; <italic>aI</italic>
<sub>1</sub> &#x2b; <italic>bI</italic>
<sub>2</sub> &#x2b; <italic>cI</italic>
<sub>3</sub> &#x2b; <italic>dI</italic>
<sub>4</sub>. This means that moving down in the matrix, <italic>a</italic> decreases and <italic>c</italic> increases, with <italic>b</italic> and <italic>d</italic> held constant, and Move right in the matrix, <italic>b</italic> decreases and <italic>d</italic> increases, with <italic>a</italic> and <italic>c</italic> held constant. Because 15 &#xd7; 15 &#x3d; 225, <italic>a</italic> &#x2b; <italic>b</italic> &#x2b; <italic>c</italic> &#x2b; <italic>d</italic> &#x3d; 225. The four corner cases of the matrix are therefore <italic>I</italic>
<sub>
<italic>out</italic>
</sub> &#x3d; 225<italic>I</italic>
<sub>1</sub>, <italic>I</italic>
<sub>
<italic>out</italic>
</sub> &#x3d; 225<italic>I</italic>
<sub>2</sub>, <italic>I</italic>
<sub>
<italic>out</italic>
</sub> &#x3d; 225<italic>I</italic>
<sub>3</sub>, indicating final product values of 0 &#x3d; 0 &#xd7; 0 &#x3d; 0 &#xd7; 15 &#x3d; 15 &#xd7; 0, and <italic>I</italic>
<sub>
<italic>out</italic>
</sub> &#x3d; 225<italic>I</italic>
<sub>4</sub> which indicates a final product value of 225 &#x3d; 15 &#xd7; 15.</p>
<p>For the 4-bit crossbar multiplier shown in <xref ref-type="fig" rid="F5">Figure 5</xref>, the coefficients <italic>a</italic>, <italic>b</italic>, <italic>c</italic> and <italic>d</italic> are related to the operand values <italic>i</italic> and <italic>j</italic> according to <xref ref-type="disp-formula" rid="e2">Eqs 2&#x2013;5</xref>
<disp-formula id="e2">
<mml:math id="m3">
<mml:mi>a</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mi>j</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
<label>(2)</label>
</disp-formula>
<disp-formula id="e3">
<mml:math id="m4">
<mml:mi>b</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
</mml:math>
<label>(3)</label>
</disp-formula>
<disp-formula id="e4">
<mml:math id="m5">
<mml:mi>c</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mi>j</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
</mml:math>
<label>(4)</label>
</disp-formula>
<disp-formula id="e5">
<mml:math id="m6">
<mml:mi>d</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>For a general <italic>N</italic> &#xd7; <italic>N</italic>-bit multiplier, the equations above are replaced by <xref ref-type="disp-formula" rid="e6">Eqs 6&#x2013;9</xref>, where 0 &#x2264; <italic>i</italic> &#x2264; (2<sup>
<italic>N</italic>
</sup> &#x2212; 1) and 0 &#x2264; <italic>j</italic> &#x2264; (2<sup>
<italic>N</italic>
</sup> &#x2212; 1).<disp-formula id="e6">
<mml:math id="m7">
<mml:mi>a</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mi>j</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
<label>(6)</label>
</disp-formula>
<disp-formula id="e7">
<mml:math id="m8">
<mml:mi>b</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
</mml:math>
<label>(7)</label>
</disp-formula>
<disp-formula id="e8">
<mml:math id="m9">
<mml:mi>c</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mi>j</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
</mml:math>
<label>(8)</label>
</disp-formula>
<disp-formula id="e9">
<mml:math id="m10">
<mml:mi>d</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
<p>From these, the output current for position (<italic>i</italic>, <italic>j</italic>) in the result current matrix can be found according to <xref ref-type="disp-formula" rid="e10">Eq. 10</xref>
<disp-formula id="e10">
<mml:math id="m11">
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mi>j</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mi>j</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msub>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
<p>Assuming a base voltage <italic>V</italic>
<sub>0</sub> &#x2260; 0 and base resistance <italic>R</italic>
<sub>0</sub> &#x2260; 0, we can relate the high and low memristor voltages and resistances to these vase values as in <xref ref-type="disp-formula" rid="e11">Eq. 11</xref>:<disp-formula id="e11">
<mml:math id="m12">
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.3333em"/>
<mml:mspace width="0.3333em"/>
<mml:mspace width="0.3333em"/>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mi>&#x3b3;</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(11)</label>
</disp-formula>
</p>
<p>Then, the base current <italic>I</italic>
<sub>0</sub> &#x3d; <italic>V</italic>
<sub>0</sub>/<italic>R</italic>
<sub>0</sub> can be substituted into <italic>I</italic>
<sub>1</sub>&#x2013;<italic>I</italic>
<sub>4</sub>, resulting in <xref ref-type="disp-formula" rid="e12">Eqs 12</xref>&#x2013;<xref ref-type="disp-formula" rid="e15">15</xref>
<disp-formula id="e12">
<mml:math id="m13">
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:math>
<label>(12)</label>
</disp-formula>
<disp-formula id="e13">
<mml:math id="m14">
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:math>
<label>(13)</label>
</disp-formula>
<disp-formula id="e14">
<mml:math id="m15">
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:math>
<label>(14)</label>
</disp-formula>
<disp-formula id="e15">
<mml:math id="m16">
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:math>
<label>(15)</label>
</disp-formula>
</p>
<p>Substituting <xref ref-type="disp-formula" rid="e12">Eq. 12</xref>&#x2013;<xref ref-type="disp-formula" rid="e15">15</xref> into <xref ref-type="disp-formula" rid="e10">Eq. 10</xref> and simplifying the result, we obtain <xref ref-type="disp-formula" rid="e16">Eq. 16</xref>
<disp-formula id="e16">
<mml:math id="m17">
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:math>
<label>(16)</label>
</disp-formula>
</p>
<p>It is evident that the multiplication is commutative iff <inline-formula id="inf2">
<mml:math id="m18">
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
</inline-formula>. In practice, this is ensured by adjusting the parameters of cell components to make the contributions of both operands symmetrical and linear.</p>
</sec>
<sec id="s3-1-4">
<title>3.1.4 Precision analysis</title>
<p>This type of digital-in/analog-out multiplier does not represent Boolean 0 in the operands with true 0 values of physical parameters&#x2013;the high resistance state (HRS) of a memristor cannot have a conductance of true 0 and the low resistance state (LHS) of a memristor cannot have a conductance of infinity. This means that <italic>I</italic>
<sub>
<italic>i</italic>,<italic>j</italic>
</sub> cannot be 0&#xa0;amps even when it represents a Boolean value of 0. Consequently, when multiple Boolean 0&#x2019;s are added together to produce an overall product <italic>P</italic> of 0, the actual value of <italic>I</italic>
<sub>
<italic>out</italic>
</sub> representing <italic>p</italic> &#x3d; 0 is not 0&#xa0;amps.</p>
<p>The maximal precision of such a multiplier is therefore limited by the ratio between <italic>R</italic>
<sub>
<italic>MH</italic>
</sub> and <italic>R</italic>
<sub>
<italic>MH</italic>
</sub>, which is technology-dependent. This is because the value of <italic>I</italic>
<sub>
<italic>out</italic>
</sub> that represents <italic>p</italic> &#x3d; 0 must be lower than the value of <italic>I</italic>
<sub>
<italic>out</italic>
</sub> that represents <italic>p</italic> &#x3d; 1. Conservatively, this is true if <italic>I</italic>
<sub>
<italic>out</italic>
</sub> representing <italic>p</italic> &#x3d; 0 is lower than the current <italic>I</italic>
<sub>
<italic>i</italic>,<italic>j</italic>
</sub> representing a single bit value of 1. In other words, if the following inequality is true, the multiplier precision is not violated at a specific word length.<disp-formula id="e17">
<mml:math id="m19">
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3e;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">maxN</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:math>
<label>(17)</label>
</disp-formula>where <italic>P</italic>
<sub>
<italic>maxN</italic>
</sub> is the maximal value of the product for an <italic>N</italic> &#xd7; <italic>N</italic>-bit multiplier. For instance, for a four-bit multiplier <italic>P</italic>
<sub>
<italic>max4</italic>
</sub> &#x3d; 225 and for a five-bit multiplier <italic>P</italic>
<sub>
<italic>max5</italic>
</sub> &#x3d; 969.</p>
<p>TiO<sub>2</sub> memristors have a memristance state ratio between <italic>R</italic>
<sub>
<italic>MH</italic>
</sub> and <italic>R</italic>
<sub>
<italic>ML</italic>
</sub> around 300 (<xref ref-type="bibr" rid="B17">Kvatinsky et al., 2014</xref>), which is marginally satisfactory for a four-bit multiplier, and Cu:ZnO memristors have a memristance state ratio around 1,000 (<xref ref-type="bibr" rid="B35">Suresh et al., 2019</xref>), which is marginally satisfactory for a five-bit multiplier. In this work, we select Cu:ZnO memristors for a four-bit implementation for a better accuracy margin and because the application itself does not demand high precision. With future memristor and other resistive non-volatile memory technologies, higher precision realizations may be possible.</p>
<p>Another important issue that may cause precision degredations in this type of multipliers is the variability of the crucial memristor characteristics <italic>R</italic>
<sub>
<italic>MH</italic>
</sub> and <italic>R</italic>
<sub>
<italic>ML</italic>
</sub>. The effect of this variability will be investigated at the stage of final neural network application case studies (<xref ref-type="sec" rid="s6-3">Section 6.3</xref>).</p>
<p>Comprehensive comparative studies of the crossbar part of the MAC including numerical correctness and non-functional metrics such as energy and speed can be found in (<xref ref-type="bibr" rid="B40">Yu et al., 2021</xref>).</p>
</sec>
</sec>
<sec id="s3-2">
<title>3.2 Flash ADC</title>
<p>After the analog output <italic>I</italic>
<sub>
<italic>i</italic>,<italic>j</italic>
</sub> is generated, its value needs to be represented as an 4-bit (or <italic>N</italic>-bit for the general case) digital value either in memristor resistance or voltage encoding for the entire MAC unit to function in a multi-MAC NN using copies of the same MAC hardware. Since the memristor resistance values are written in by digital voltage signals, we do not lose generality if a 4-bit MAC outputs a 4-bit voltage encoded product (4 Boolean voltage signals).</p>
<p>We implement this functionality by using a flash ADC, designed from components adapted from (<xref ref-type="bibr" rid="B4">Bui et al., 2010</xref>; <xref ref-type="bibr" rid="B37">Vinayaka et al., 2019</xref>). The choice of using thermometer code as an intermediate step comes from the desire to make this MAC approximate in the sense of generating a 4-bit product from input operands which themselves are also in 4 bit width. This ADC consists of a single-action multiple-current comparator, buffer array and a ROM (read only memory) encoder. This section describes this part of the system in detail.</p>
<sec id="s3-2-1">
<title>3.2.1 <italic>Thermometer</italic> code generating current comparator</title>
<p>In <xref ref-type="fig" rid="F6">Figure 6A</xref>, the current comparator is represented. Given that the digital output is expected to be in 4 bits, the comparator is set to 16-value thermometer code output. The input current is mirrored by P-type current mirror which generates a row of pull up current sources, similarly, the reference current is mirrored by N-type current mirror which generates a row of pull down current sinks. By adjusting the size of <italic>M</italic>
<sub>1</sub> to <italic>M</italic>
<sub>
<italic>N</italic>
</sub>, the reference current can be set to different levels. If a current source has a higher value than the corresponding current sink, the voltage at the junction point is pulled up to Vdd, otherwise, the junction point voltage is pulled down to ground. Therefore, the comparator will generate a thermometer code in the buffer array.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Structure of flash ADC. In <bold>(A)</bold>, current comparator thermometer code generator (<xref ref-type="bibr" rid="B37">Vinayaka et al., 2019</xref>) is presented, in <bold>(B)</bold>, ROM thermometer to binary encoder (<xref ref-type="bibr" rid="B4">Bui et al., 2010</xref>; <xref ref-type="bibr" rid="B37">Vinayaka et al., 2019</xref>) is presented.</p>
</caption>
<graphic xlink:href="felec-03-877629-g006.tif"/>
</fig>
<p>In order to make this design work for our 4-bit crossbar mixed-signal multiplier, the transistor sizes need to be tuned to fit the multiplier current output characteristics. The details of MOS transistor size choices are listed in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Thermometer code generator transistor size.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Component</th>
<th align="left">Size</th>
<th align="left">Component</th>
<th align="left">Size</th>
<th align="left">Component</th>
<th align="left">Size</th>
<th align="left">Component</th>
<th align="left">Size</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<italic>M</italic>
<sub>
<italic>in</italic>
</sub>
</td>
<td align="left">3.2<italic>&#x3bc;</italic> m</td>
<td align="left">
<italic>M</italic>
<sub>
<italic>ref</italic>
</sub>
</td>
<td align="left">1.6<italic>&#x3bc;</italic> m</td>
<td align="left">
<italic>P</italic>
<sub>1</sub>
</td>
<td align="left">100&#xa0;nm</td>
<td align="left">
<italic>P</italic>
<sub>11</sub>
</td>
<td align="left">80&#xa0;nm</td>
</tr>
<tr>
<td align="left">
<italic>M</italic>
<sub>
<italic>out</italic>
</sub>
</td>
<td align="left">1.6<italic>&#x3bc;</italic> m</td>
<td align="left"/>
<td align="left"/>
<td align="left">
<italic>P</italic>
<sub>2</sub>
</td>
<td align="left">140&#xa0;nm</td>
<td align="left">
<italic>P</italic>
<sub>12</sub>
</td>
<td align="left">100&#xa0;nm</td>
</tr>
<tr>
<td align="left">
<italic>M</italic>
<sub>1</sub>
</td>
<td align="left">100&#xa0;nm</td>
<td align="left">
<italic>M</italic>
<sub>9</sub>
</td>
<td align="left">715&#xa0;nm</td>
<td align="left">
<italic>P</italic>
<sub>3</sub>
</td>
<td align="left">80&#xa0;nm</td>
<td align="left">
<italic>P</italic>
<sub>13</sub>
</td>
<td align="left">100&#xa0;nm</td>
</tr>
<tr>
<td align="left">
<italic>M</italic>
<sub>2</sub>
</td>
<td align="left">110&#xa0;nm</td>
<td align="left">
<italic>M</italic>
<sub>10</sub>
</td>
<td align="left">785&#xa0;nm</td>
<td align="left">
<italic>P</italic>
<sub>4</sub>
</td>
<td align="left">80&#xa0;nm</td>
<td align="left">
<italic>P</italic>
<sub>14</sub>
</td>
<td align="left">100&#xa0;nm</td>
</tr>
<tr>
<td align="left">
<italic>M</italic>
<sub>3</sub>
</td>
<td align="left">310&#xa0;nm</td>
<td align="left">
<italic>M</italic>
<sub>11</sub>
</td>
<td align="left">850&#xa0;nm</td>
<td align="left">
<italic>P</italic>
<sub>5</sub>
</td>
<td align="left">80&#xa0;nm</td>
<td align="left">
<italic>P</italic>
<sub>15</sub>
</td>
<td align="left">100&#xa0;nm</td>
</tr>
<tr>
<td align="left">
<italic>M</italic>
<sub>4</sub>
</td>
<td align="left">365&#xa0;nm</td>
<td align="left">
<italic>M</italic>
<sub>12</sub>
</td>
<td align="left">965&#xa0;nm</td>
<td align="left">
<italic>P</italic>
<sub>6</sub>
</td>
<td align="left">80&#xa0;nm</td>
<td align="left">
<italic>P</italic>
<sub>16</sub>
</td>
<td align="left">100&#xa0;nm</td>
</tr>
<tr>
<td align="left">
<italic>M</italic>
<sub>5</sub>
</td>
<td align="left">440&#xa0;nm</td>
<td align="left">
<italic>M</italic>
<sub>13</sub>
</td>
<td align="left">1&#xa0;<italic>&#x3bc;</italic>m</td>
<td align="left">
<italic>P</italic>
<sub>7</sub>
</td>
<td align="left">80&#xa0;nm</td>
<td align="left"/>
<td align="left"/>
</tr>
<tr>
<td align="left">
<italic>M</italic>
<sub>6</sub>
</td>
<td align="left">510&#xa0;nm</td>
<td align="left">
<italic>M</italic>
<sub>14</sub>
</td>
<td align="left">1.11&#xa0;<italic>&#x3bc;</italic>m</td>
<td align="left">
<italic>P</italic>
<sub>8</sub>
</td>
<td align="left">80&#xa0;nm</td>
<td align="left"/>
<td align="left"/>
</tr>
<tr>
<td align="left">
<italic>M</italic>
<sub>7</sub>
</td>
<td align="left">580&#xa0;nm</td>
<td align="left">
<italic>M</italic>
<sub>15</sub>
</td>
<td align="left">1.19&#xa0;<italic>&#x3bc;</italic>m</td>
<td align="left">
<italic>P</italic>
<sub>9</sub>
</td>
<td align="left">80&#xa0;nm</td>
<td align="left"/>
<td align="left"/>
</tr>
<tr>
<td align="left">
<italic>M</italic>
<sub>8</sub>
</td>
<td align="left">650&#xa0;nm</td>
<td align="left">
<italic>M</italic>
<sub>16</sub>
</td>
<td align="left">1.27&#xa0;<italic>&#x3bc;</italic>m</td>
<td align="left">
<italic>P</italic>
<sub>10</sub>
</td>
<td align="left">80&#xa0;nm</td>
<td align="left"/>
<td align="left"/>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-2-2">
<title>3.2.2 <italic>Thermometer</italic> code to binary encoder</title>
<p>The thermometer code is an intermediary format which, after serving the purpose of fast comparison and product precision adjustment, has to be converted into voltage binary code for MAC output. The structure of the thermometer to binary encoder is presented in <xref ref-type="fig" rid="F6">Figure 6B</xref>. As can be seen, this encoder consists of an AND gate array and a ROM encoder. For a 4-bit digital output, the 16-value thermometer code is first converted by the AND array to a 16-digit one hot code, which is then fed to the ROM encoder to generate a 4-bit binary output.</p>
<p>The complete MAC unit therefore accepts as inputs a multiplier in the form of 4-bit binary voltage signals and a multiplicand in the form of 4-bit binary memristor conductance values, and generates a product in the form of 4-bit binary voltage signals. This voltage-encoded 4-bit binary number can then be used directly as the multiplier for another MAC of the same configuration, or used to write the multiplicand for it. This means that the digital-to-digital MAC can be instantiated multiple times to form a NN or other machines that require a number of distinct MAC units of the same type working together.</p>
</sec>
</sec>
</sec>
<sec id="s4">
<title>4 Investigation of performance and functional correctness</title>
<p>This section describes results from investigating a number of implementations of our MAC unit. All parts of the system are realized in UMC 65&#xa0;nm technology and studied in the Cadence Virtuoso environment through analog simulations.</p>
<sec id="s4-1">
<title>4.1 Multiplication cell design</title>
<p>The structure of our multiplication cell is presented in <xref ref-type="fig" rid="F3">Figure 3</xref>, the parallel connected memristors and transistors are marked with brown to indicate them operating under Ohm&#x2019;s law. Similarly, cell output current path to column line is purple marked to indicate the KCL operation. Since the multiplication cell works as a conductive component on crossbar, both memristor and transistor contributes to the cell conductance. Therefore, it is important to ensure that the memristor dominates the cell conductance because we use the transistors as (ideal) switches. In other words, the high value of memductance should be much larger than the ON state transistor conductance, making the contribution to current by the transistor negligible. Meanwhile, the OFF state transistor conductance should be small enough to isolate a selected cell from the rest of the crossbar so that it can be in holding mode whilst other cells are written. With the memristor count for each cell determined by the digit significance, the transistor count and size need adjustments to balance that. Therefore, cells with fixed ratios of memristor count and transistor count are studied on our 4-bit crossbar multiplier.</p>
<p>In <xref ref-type="fig" rid="F7">Figures 7A,B</xref> comparisons between crossbar with respective yTxM cell are shown. As can be seen, the 4-bit crossbar multiplier generates same levels of <italic>I</italic>
<sub>
<italic>out</italic>
</sub> with different count transistor-memristor cells, and the product values are symmetric between multiplicand and multiplier indicating commutative multiplication. However, the 1T2M&#xa0;cell stands out in the error rate comparison. The maximum error rate of 1T2M&#xa0;cell crossbar multiplier is 0.58% while for the 7T16M&#xa0;cell it is 0.72% and for the 15T32M&#xa0;cell it is 0.86%. Therefore, apart from the least significant bit using a 1T1M&#xa0;cell, all the multiplication cells in this 4-bit multiplier follow the memristor-transistor ratio of 1T2M, i.e., two memristors for each transistor in a cell.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>yTxM MC performance mapping. In <bold>(A)</bold>, output current error rate in all 4 by 4 multiplications mapping is presented, in <bold>(B)</bold>, output current in all 4 by 4 multiplications mapping is presented.</p>
</caption>
<graphic xlink:href="felec-03-877629-g007.tif"/>
</fig>
</sec>
<sec id="s4-2">
<title>4.2 MAC unit design</title>
<p>The 4-bit crossbar multiplier shown in <xref ref-type="fig" rid="F4">Figure 4</xref> has two operations in each multiplication, writing (operand preparation) and reading (multiplying). When multiplication starts with a new multiplicand, all multiplication cells will be clear to LCS by each row line (RL), then the multiplicand is written by each gate line (GL) column. Finally, the reading (multiplier) voltages are applied on all RLs, meanwhile, all cell transistors are switched on. The multiplication result can be obtained from the ADC out terminal (See <xref ref-type="fig" rid="F6">Figure 6B</xref>). When multiplying with an existing multiplicand, the writing step is omitted and the reading step directly starts. That is why this multiplier is well suited for asymmetrical multiplication applications such as multiplying variables to coefficient/reference values found in such applications as monitoring and control and certain operations of NNs where one of the operands (e.g., the multiplicand) does not change too often.</p>
<p>ADC transistor design parameters are presented in <xref ref-type="table" rid="T1">Table 1</xref> and writing operation setting parameters are presented in <xref ref-type="table" rid="T2">Table 2</xref>. To reduce latency, the writing operations are parallelized on a per-row basis. To match the values of high and low memconductance, the reading (multiplier) voltage has values of 0.42&#xa0;V as logic 0 and 0.7&#xa0;V as logic 1. The total delay of each multiplication is 2&#xa0;ns which is almost entirely ADC delay. Three multiplications 15 &#xd7; 15, 15 &#xd7; 0 and 9 &#xd7; 6 are tested on the 4-bit multiplier, <xref ref-type="fig" rid="F8">Figures 8A,B</xref> present the results.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Multiplier operation design details.</p>
</caption>
<table>
<thead valign="top">
<tr>
<td align="left">Area</td>
<td colspan="2" align="left">Time (ns)</td>
<td colspan="2" align="left">Voltage (V)</td>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2" align="left">Entire Crossbar</td>
<td align="left">write 1</td>
<td align="left">write 0</td>
<td align="left">write 1</td>
<td align="left">write 0</td>
</tr>
<tr>
<td align="left">0.43</td>
<td align="left">16.9</td>
<td align="left">1.8</td>
<td align="left">&#x2212;2</td>
</tr>
<tr>
<td rowspan="2" align="left">Single Row</td>
<td align="left">write 1</td>
<td align="left">write 0</td>
<td align="left">write 1</td>
<td align="left">write 0</td>
</tr>
<tr>
<td align="left">0.275</td>
<td align="left">0.43</td>
<td align="left">1.8</td>
<td align="left">&#x2212;2</td>
</tr>
<tr>
<td rowspan="2" align="left">Single Cell</td>
<td align="left">write 1</td>
<td align="left">write 0</td>
<td align="left">write 1</td>
<td align="left">write 0</td>
</tr>
<tr>
<td align="left">0.261</td>
<td align="left">&#x2014;</td>
<td align="left">1.8</td>
<td align="left">&#x2014;</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Three cases multiplication result. In <bold>(A)</bold>, 3 multiplication output currents are presented, the red dash steps are the threshold for each digital output, in <bold>(B)</bold>, respective digital output from B0 (LSB) to B3 (MSB) are presented. 0&#x2013;2.97&#xa0;ns is 15 &#xd7; 15, 4.57&#xa0;&#x2013;7.13&#xa0;ns is 0 &#xd7; 0, and 10.77&#x2013;13.&#xa0;2ns is 9 &#xd7; 6.</p>
</caption>
<graphic xlink:href="felec-03-877629-g008.tif"/>
</fig>
<p>The red dash steps in <xref ref-type="fig" rid="F8">Figure 8A</xref> are the thresholds for the current comparator, which translates currents to a thermometer code. For instance, <italic>I</italic>
<sub>
<italic>out</italic>
</sub> &#x3d; 100&#xa0;<italic>&#x3bc;</italic>A translates to the thermometer code value of 8, and 9 &#xd7; 6 results in <italic>I</italic>
<sub>
<italic>out</italic>
</sub> &#x2248; 90&#xa0;<italic>&#x3bc;</italic>A which translates to the thermometer code of 7. The output bit voltages are recorded in <xref ref-type="fig" rid="F8">Figure 8B</xref>. Here B3 is the MSB and B0 the LSB. It can be seen that the ADC delay is data-dependent and the more bits are 1 the longer the delay. Since the less significant bits are settled after more significant bits and before then they may have swings. The output value of 1,111, corresponding to 15 &#xd7; 15, takes just less than 2&#xa0;ns to become stable, which is the worst-case delay of the MAC. In comparison, 0 &#xd7; 0 incurs almost no delay.</p>
<p>Value-wise, 15 &#xd7; 15 results in 1,111 (the largest number possible out of 4 bits). 15 &#xd7; 0 results in 0000 and 9 &#xd7; 6 results in 0111. These values work well for a 4-bit digital in and 4-bit digital out MAC unit.</p>
</sec>
</sec>
<sec id="s5">
<title>5 Neural network case study</title>
<p>This section presents a case study to validate the proposed MAC unit. In this section, an MLA NN is created using copies of our MAC unit servicing as perceptrons. The machine learning problem solved with this NN is the classification of the MNIST dataset.</p>
<p>As our MAC unit supports only 4-bit inputs (integers), we need to apply a quantization technique to preserve the high accuracy while using such low-precision numbers. Two state of the art techniques exist for this, namely post-training quantization (PTQ) and quantization-aware training (QAT). Regarding PTQ, the weights will be quantized to the target bitwidth after the floating-point based training. This is a simple technique yet not suitable for <inline-formula id="inf3">
<mml:math id="m20">
<mml:mo>&#x3c;</mml:mo>
</mml:math>
</inline-formula>8-bit resolution applications because of the increasing quantization error (<xref ref-type="bibr" rid="B23">Nagel et al., 2021</xref>). Alternatively, the QAT technique injects the quantization error during training. This allows the lower-resolution NN to learn and improve its weights appropriately. Previously, 98% accuracy of MNIST classification using 4-bit NN with QAT technique has been shown in (<xref ref-type="bibr" rid="B5">Chahal, 2019</xref>). Therefore, this technique will be applied in our NN training.</p>
<p>The most challenging issue in our NN training is that the output of our MAC unit contains variations due to its analog nature. To overcome this issue, we will use the same idea as QAT; the variations will be included in our training so that the NN can learn these variations and adjust its accuracy accordingly. In summary, this section contributes the QAT technique analysis to inject the MAC unit variations, the demonstration of NN training for MNIST classification and the accuracy comparison between the NN trained using our MAC unit and the basic 4-bit QAT NN. Note that, for the ease of computation analysis, our NN consists of fully-connected layers only. Extra software library development to include the proposed MAC unit in the convolution layers is considered as our future work.</p>
<sec id="s5-1">
<title>5.1 QAT analysis</title>
<p>Fundamentally, fully-connected NN computation contains dot-product operations between weight matrices and input vectors. <xref ref-type="disp-formula" rid="e18">Eq. 18</xref> means that the resulting matrix element <italic>r</italic>
<sub>3</sub> at row <italic>i</italic> and column <italic>k</italic> is obtained from the sum of products between the pairs of the weight matrix elements <italic>r</italic>
<sub>1</sub> at row <italic>i</italic> and the input vector elements <italic>r</italic>
<sub>2</sub> at column <italic>k</italic>. In general, these variables are presented precisely in floating-point format.<disp-formula id="e18">
<mml:math id="m21">
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:math>
<label>(18)</label>
</disp-formula>
</p>
<p>To compute the above equation using integer-arithmetic hardware, we need to quantize these real numbers. Following (<xref ref-type="bibr" rid="B15">Jacob et al., 2018</xref>), any real numbers can be quantized resulting positive quantized-values <italic>q</italic> in integers minus the zero-point <italic>Z</italic> and scaled by the scale factors <italic>S</italic> as shown in (<xref ref-type="disp-formula" rid="e19">Eq. 19</xref>). In addition, the range of <italic>q</italic> is between 0 and 2<sup>
<italic>n</italic>&#x2212;1</sup>, where <italic>n</italic> is the number of bits. Therefore, <italic>q</italic> in this work is in the [0, 15] range (4-bit unsigned integer).<disp-formula id="e19">
<mml:math id="m22">
<mml:mi>r</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>Z</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(19)</label>
</disp-formula>
</p>
<p>Replacing the weights <italic>r</italic>
<sub>1</sub> and inputs <italic>r</italic>
<sub>2</sub> in (<xref ref-type="disp-formula" rid="e18">Eq. 18</xref>) by <xref ref-type="disp-formula" rid="e19">Eq. 19</xref> yields <xref ref-type="disp-formula" rid="e20">Eq. 20</xref> which can be re-written as <xref ref-type="disp-formula" rid="e21">Eq. 21</xref>:<disp-formula id="e20">
<mml:math id="m23">
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(20)</label>
</disp-formula>
<disp-formula id="e21">
<mml:math id="m24">
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(21)</label>
</disp-formula>
</p>
<p>In <xref ref-type="disp-formula" rid="e21">Eq. 21</xref> there is no dot-product operation on floating-point numbers; it happens only in the term <inline-formula id="inf4">
<mml:math id="m25">
<mml:msubsup>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> where both operands are integers and therefore our multiplier is applicable to this operation.</p>
<p>Another issue is that our MAC unit is centered around an analog product. It therefore contains a non-ideal effect where its multiplication results deviates from the expected values as shown in <xref ref-type="table" rid="T3">Table 3</xref>, which is obtained from analog simulations of a single MAC unit. Note that the errors in this input-output correspondence error map shows that the actual full-MAC implementation has more errors than the crossbar itself given in <xref ref-type="sec" rid="s3-1-4">Section 3.1.4</xref>. This is because the DAC part introduces more errors. However, the maximal error value of &#x2212;5 shows that the MAC is still a four-bit unit with a higher resolution than a three-bit device (the maximal possible output value of this MAC is 15). In <xref ref-type="disp-formula" rid="e22">Eq. 22</xref>, we add <inline-formula id="inf5">
<mml:math id="m26">
<mml:msubsup>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> to sum up the variation from every multiplication. The value of <italic>C</italic> can be found at column <inline-formula id="inf6">
<mml:math id="m27">
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> and row <inline-formula id="inf7">
<mml:math id="m28">
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> of <xref ref-type="table" rid="T3">Table 3</xref>. This allows the NN to learn and adjust its weights according to our multiplier&#x2019;s numerical characteristics.<disp-formula id="e22">
<mml:math id="m29">
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(22)</label>
</disp-formula>
</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Multiplication errors of the proposed MAC unit.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Result</th>
<th align="left"/>
<th colspan="16" align="left">Multiplier</th>
</tr>
<tr>
<th align="left"/>
<th align="left">0</th>
<th align="left">1</th>
<th align="left">2</th>
<th align="left">3</th>
<th align="left">4</th>
<th align="left">5</th>
<th align="left">6</th>
<th align="left">7</th>
<th align="left">8</th>
<th align="left">9</th>
<th align="left">10</th>
<th align="left">11</th>
<th align="left">12</th>
<th align="left">13</th>
<th align="left">14</th>
<th align="left">15</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="16" align="left">Multiplicand</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
</tr>
<tr>
<td align="char" char=".">1</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
</tr>
<tr>
<td align="char" char=".">2</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
</tr>
<tr>
<td align="char" char=".">3</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
</tr>
<tr>
<td align="char" char=".">4</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;5</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
</tr>
<tr>
<td align="char" char=".">5</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;5</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
</tr>
<tr>
<td align="char" char=".">6</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
</tr>
<tr>
<td align="char" char=".">7</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
</tr>
<tr>
<td align="char" char=".">8</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;4</td>
</tr>
<tr>
<td align="char" char=".">9</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;4</td>
<td align="char" char=".">&#x2212;3</td>
</tr>
<tr>
<td align="char" char=".">10</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
</tr>
<tr>
<td align="char" char=".">11</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
</tr>
<tr>
<td align="char" char=".">12</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;1</td>
</tr>
<tr>
<td align="char" char=".">13</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;3</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
</tr>
<tr>
<td align="char" char=".">14</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">0</td>
</tr>
<tr>
<td align="char" char=".">15</td>
<td align="char" char=".">0</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;2</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">&#x2212;1</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
<td align="char" char=".">0</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>From <xref ref-type="disp-formula" rid="e22">Eq. 22</xref>, we can separate the loss term from the main bracket by multiplying the scale factors <italic>S</italic>
<sub>1</sub> and <italic>S</italic>
<sub>2</sub> as expressed in (<xref ref-type="disp-formula" rid="e23">Eq. 23</xref>). It can be seen that the large term remains the same as (<xref ref-type="disp-formula" rid="e21">Eq. 21</xref>). Therefore, we can conclude that the variation in our MAC unit can be simulated by subtracting the product of both scale factors and the sum of the MAC unit&#x2019;s errors from the basic dot-product&#x2019;s result. <xref ref-type="disp-formula" rid="e24">Eq. 24</xref> will be added to our training graph as explained in the next section.<disp-formula id="e23">
<mml:math id="m30">
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:math>
<label>(23)</label>
</disp-formula>
<disp-formula id="e24">
<mml:math id="m31">
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:math>
<label>(24)</label>
</disp-formula>
</p>
</sec>
</sec>
<sec sec-type="results|discussion" id="s6">
<title>6 Results and discussions</title>
<sec id="s6-1">
<title>6.1 MAC units</title>
<p>Our study is mainly based on worst-case delay assumptions. The worst-case multiplication cycle includes 4 row writing 0 (reset) operations with 1.72&#xa0;ns delay, 4 row writing 1 (set) operations with 1.1&#xa0;ns delay, and one entire crossbar reading (multiply &#x2b; ADC) operation with 2&#xa0;ns delay, which represents an order of magnitude speedup over existing memristor-based multipliers reported in (<xref ref-type="bibr" rid="B11">Guckert and Swartzlander, 2017</xref>). The average power is 290<italic>&#xa0;&#x3bc;</italic>W, also lower than the competition. The average energy consumption per multiplication cycle of the 4-bit 1T2M crossbar multiplier is 1.39&#xa0;pJ over a 4.82&#xa0;ns period.</p>
<p>The writing delay and energy costs are reduced from (<xref ref-type="bibr" rid="B11">Guckert and Swartzlander, 2017</xref>) because in the latter both operands are represented by memristor conductance values with the high memristor writing costs incurred twice. The structures of the multipliers featured in (<xref ref-type="bibr" rid="B11">Guckert and Swartzlander, 2017</xref>) also incur more delays compared with our crossbar mixed-signal approach because the latter takes advantage of resistive Ohm&#x2019;s Law and KCL. In addition, these are worst-case comparisons where both operands need to be written completely with the greatest writing delay considered. For the case where only one of the operands needs updating (and it happens to be the one with the more frequent updating requirement) our MAC will fare much better as there is no memristor writing. For applications where the writing frequency requirements for the two operands are asymmetric, our solution would show much greater improvements.</p>
<p>The energy per multiplication cycle worst case happens with 15 &#xd7; 15 because of its longest delay and highest <italic>I</italic>
<sub>
<italic>out</italic>
</sub> value (187.3&#xa0;<italic>&#x3bc;</italic>A) among all multiplication cases. This worst-case cycle has an energy consumption of 3.91&#xa0;pJ. The best case happens with 0 &#xd7; 0 which requires only 0.00524&#xa0;pJ of energy to complete, primarily because parameter setting, crossbar and ADC all take negligible time and in addition the currents and voltages also take low values.</p>
<p>The energy consumption of our MAC unit is compared with state-of-the-art memristor multipliers in <xref ref-type="table" rid="T4">Table 4</xref>. Generally, the proposed work saves 83.7% and 74.1% energy per multiplication cycle than the MAD shift-and-add multiplier and optimized MAD shift-and-add multiplier in their respective worst cases. In the best case, the energy saving can reach almost 99% comparative energy savings. Even the average energy consumption of the proposed MAC unit, at 1.39&#xa0;pJ, is significantly lower than the best case figures achieved by the competition.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Energy consumption per multiplication comparison with memristor-based MAD shift-and-add multiplier and optimized MAD shift-and-add multiplier (<xref ref-type="bibr" rid="B11">Guckert and Swartzlander, 2017</xref>).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="left">MAD S-and-A</th>
<th align="left">Opt MAD S-and-A</th>
<th align="left">1TxM Cu:ZnO (this work)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Best Case</td>
<td align="left">13.5&#xa0;pJ</td>
<td align="left">8.37&#xa0;pJ</td>
<td align="left">5.24&#xa0;fJ</td>
</tr>
<tr>
<td align="left">Worst Case</td>
<td align="left">24.1&#xa0;pJ</td>
<td align="left">15.1&#xa0;pJ</td>
<td align="left">3.91&#xa0;pJ</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>These very low energy consumption figures are based on the assumption that the crossbar part of the MAC is shut down after a cycle of operation ends before the next cycle starts. This crossbar structure should not be used to hold a constant output as that entails a continuous <italic>I</italic>
<sub>
<italic>out</italic>
</sub> needing to be maintained. In practice this can be solved by holding the output of the MAC in a register and powering down the crossbar when not needed. With one of the operands in non-volatile memory and the other the responsibility of the supplier of the voltage input, this regime does not introduce operational difficulties.</p>
</sec>
<sec id="s6-2">
<title>6.2 NN training and results</title>
<p>To demonstrate the application of the proposed MAC unit in our NN training, we constructed a three fully-connected layers perceptron for MNIST classification as illustrated in <xref ref-type="fig" rid="F9">Figure 9A</xref>. MNIST is chosen because it is commonly used for proving and benchmarking the NN hardware design concepts in the literature <xref ref-type="bibr" rid="B22">Mileiko et al. (2020)</xref>; <xref ref-type="bibr" rid="B1">Amirsoleimani et al. (2020)</xref>; <xref ref-type="bibr" rid="B38">Wang et al. (2020)</xref>; <xref ref-type="bibr" rid="B16">Krestinskaya et al. (2020)</xref>, especially for low-power edge applications and suitable as a proof of concept in this paper. We explore our 4-bit MAC with MNIST to demonstrate its validity in NN applications. This is the same approach taken by the authors of (<xref ref-type="bibr" rid="B36">Trusov et al., 2021</xref>), who targeted a mobile-relevant dataset with their 4-bit low-power NN. It is not our intention to confirm the NN application scalability of 4-bit MACs, given that the case has been proven in the state of the art (<xref ref-type="bibr" rid="B34">Sun et al., 2020</xref>).</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>
<bold>(A)</bold> Neural network structure to demonstrate MNIST classification using the proposed MAC unit. It consists of three fully-connected layers, each of which (input/hidden/output) contains 800/500/10 neurons. The traditional MAC unit will be replaced by the proposed one. <bold>(B)</bold> The training graph of the neural network in <bold>(A)</bold>. We added the MAC block (highlighted in blue) where the output of the dot-product will be subtracted by the non-ideal effect of our MAC unit following <xref ref-type="disp-formula" rid="e24">Eq. 24</xref> and the multiplication errors in <xref ref-type="table" rid="T3">Table 3</xref>. This allows the neural network to learn the loss regarding the proposed MAC unit.</p>
</caption>
<graphic xlink:href="felec-03-877629-g009.tif"/>
</fig>
<p>In addition, this demonstration aims to show the NN-type applications&#x2019; ability to absorb the output variations of our MAC unit. In other words, we need to validate the MAC design through demonstrating its usefulness for NN applications even under worst-case MAC variation scenarios including worst-case memristor <italic>R</italic>
<sub>
<italic>ML</italic>
</sub> and <italic>R</italic>
<sub>
<italic>MH</italic>
</sub> value combinations.</p>
<p>The numbers of neurons in the input/hidden/output layers are 800/500/10. The forward-pass calculation of each layer follows the graph in <xref ref-type="fig" rid="F9">Figure 9B</xref>. Regarding the QAT concept, the inputs and weights of each layer are quantized and de-quantized based on <xref ref-type="disp-formula" rid="e19">Eq. 19</xref> to simulate the quantization error. Note that this procedure is known as fake quantization in the literature (<xref ref-type="bibr" rid="B5">Chahal, 2019</xref>). In addition, the resolution of <italic>q</italic> is set to 4-bit, which is consistent with the input resolution of our MAC unit.</p>
<p>Then, the dot-product of the inputs and weights are preformed and the biases are added. Next, we insert a MAC block to subtract the dot-product results by our MAC&#x2019;s output variations as explained in <xref ref-type="sec" rid="s5-1">Section 5.1</xref>. After this step, the MAC block&#x2019;s results pass the ReLU activation function and another fake quantization of the activation is executed. Finally, the layer&#x2019;s output will be the input of the next layer.</p>
<p>Three NN configurations as listed in <xref ref-type="table" rid="T5">Table 5</xref> have been implemented using the PyTorch library (<xref ref-type="bibr" rid="B25">Paszke et al., 2019</xref>). The first one, which is our baseline, is the 4-bit QAT NN obtained from (<xref ref-type="bibr" rid="B5">Chahal, 2019</xref>) without the convolution layers. The backward pass is implemented using stochastic gradient descent while the straight through estimator is applied for the fake quantization blocks. The related parameters are as follows: batch size &#x3d; 64, learning rate &#x3d; 0.01 and momentum &#x3d; 0.5. To inspect the effect of the MAC&#x2019;s output variations, the second NN is trained using the above procedure while the variations are injected only in the testing phase. Lastly, the variations are included in both training and testing phases to evaluate the accuracy improvement.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>MNIST classification accuracy comparison.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">NN configuration</th>
<th align="left">Training acc. (%)</th>
<th align="left">Testing acc. (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">4-bit QAT NN (baseline)</td>
<td align="left">97</td>
<td align="left">94</td>
</tr>
<tr>
<td align="left">4-bit QAT NN w/o MAC variation training</td>
<td align="left">97</td>
<td align="left">30</td>
</tr>
<tr>
<td align="left">4-bit QAT NN with MAC variation training</td>
<td align="left">93</td>
<td align="left">93</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="table" rid="T5">Table 5</xref> shows our baseline yields the testing accuracy of 94%, which is only 4% accuracy drop from the convolutional NN implementation by (<xref ref-type="bibr" rid="B5">Chahal, 2019</xref>). This means the implementation with pure fully-connected layer is acceptable for MNIST classification. Without simulating the impact of our MAC unit in the NN training, however, the accuracy substantially drops to 30%. This confirms the MAC unit simulation is highly required in the training phase. Finally, the accuracy is back up to 93% when training the NN with the MAC&#x2019;s output variations. This implies the proposed MAC unit is applicable for NN applications and that variation injection is required during the NN training to maintain the accuracy.</p>
</sec>
<sec id="s6-3">
<title>6.3 Effects of technology parametric variations</title>
<p>However, device parametric variation in multiplication cell may lead to additional and substantial analog output error. Devices may have different properties or technology parametric variations. For our MAC, we consider faster/slower operating speeds of transistors and higher/lower <italic>R</italic>
<sub>
<italic>MH</italic>
</sub> and <italic>R</italic>
<sub>
<italic>ML</italic>
</sub> values of memristors. Therefore, the multiple-component cell design in this work risks large accuracy drops resulting from such variations. Both the transistor variation and memristor variation have been investigated to show the relation between variation and NN accuracy of MNIST classification.</p>
<p>The variability transistor models are investigated first. The fabricated transistor&#x2019;s performance can be modeled as Fast-Fast (FF), Typical-Typical (TT), and Slow-Slow (SS) corners. Analog simulations of the MAC corresponding with these corners are used to generate modified MAC input to output error maps in the same style as <xref ref-type="table" rid="T3">Table 3</xref>. Then respective NN simulation using the method given in <xref ref-type="sec" rid="s6-2">Section 6.2</xref> generates the accuracy results reported in <xref ref-type="table" rid="T6">Table 6</xref>.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>QAT NN with MAC component variation training.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Transistor</th>
<th colspan="2" align="left">Training acc. (%)</th>
<th colspan="2" align="left">Testing acc. (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Slow-Slow</td>
<td colspan="2" align="left">96</td>
<td colspan="2" align="left">96</td>
</tr>
<tr>
<td align="left">Typical-Typical</td>
<td colspan="2" align="left">96</td>
<td colspan="2" align="left">96</td>
</tr>
<tr>
<td align="left">Fast-Fast</td>
<td colspan="2" align="left">90</td>
<td colspan="2" align="left">85</td>
</tr>
<tr>
<td rowspan="2" align="left">Memristor</td>
<td colspan="2" align="left">Training acc. (%)</td>
<td colspan="2" align="left">Testing acc. (%)</td>
</tr>
<tr>
<td align="left">Average</td>
<td align="left">Worst</td>
<td align="left">Average</td>
<td align="left">Worst</td>
</tr>
<tr>
<td align="left">DD</td>
<td align="left">95</td>
<td align="left">86</td>
<td align="left">94</td>
<td align="left">79</td>
</tr>
<tr>
<td align="left">CC</td>
<td align="left">95</td>
<td align="left">95</td>
<td align="left">95</td>
<td align="left">94</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Then we investigate the effects of memristor resistance variability. As shown in (<xref ref-type="bibr" rid="B31">Siddik et al., 2020</xref>), for the technology of our choice (Cu:ZnO), the device-to-device (DD) variability is 59% for the high resistance state (HRS) and 36% for the low resistance state (LRS), while the cycle-to-cycle (CC) variability is 89% for the HRS and 51% LRS for the LRS. Note that although the CC variability is especially large, it is not possible for <italic>R</italic>
<sub>
<italic>ML</italic>
</sub> to become higher than <italic>R</italic>
<sub>
<italic>MH</italic>
</sub> given that the baseline ratio between these two parameters is 1,000 for the Cu:ZnO technology.</p>
<p>Similar to the case of transistor variation investigations, our simulation investigations include analog simulations of one MAC unit with all possible corner cases of expected variability in the memristors. The result of these simulations are put into digital models in the form of input value to output value correspondence error maps in the form of <xref ref-type="table" rid="T3">Table 3</xref>. These corner case models are then used in NN training exercises on the MNIST dataset, using exactly the same method described in <xref ref-type="sec" rid="s6-2">Section 6.2</xref>. The accuracy results are reported in <xref ref-type="table" rid="T6">Table 6</xref>.</p>
<p>In presenting these results we focus on investigating how the worst-case scenarios of memristor variability may affect the NN application and compare with the average case. The worst case happens when <italic>R</italic>
<sub>
<italic>MH</italic>
</sub> takes the lowest possible value coinciding with <italic>R</italic>
<sub>
<italic>ML</italic>
</sub> taking the highest possible value. This maximally reduces the margin between these two values and hence reduce the precision of the multiplier part of the MAC, as discussed in <xref ref-type="sec" rid="s3-1-4">Section 3.1.4</xref>.</p>
<p>The reported average case results are the average values obtained from all different corner cases and do not correspond with any one particular set of parameter value. It is noteworthy that some of the accuracy numbers reported in <xref ref-type="table" rid="T6">Table 6</xref> are actually better than those reported in the last row of <xref ref-type="table" rid="T5">Table 5</xref>. This is because in many cases, the technology parametric variation corner cases have smaller errors in their input-output relation error maps than the non-variation case of <xref ref-type="table" rid="T6">Table 6</xref>. This is a result of effective cancellations between the two kinds of errors. The true global worst case results, however, do happen with worst-case memristor parametric variation combinations.</p>
<p>As can be seen from the results, in all experiments both training and testing always successfully complete, but in the highlighted cases the accuracy does not achieve better than 90%. However, even the global worst case of 79% accuracy should be tolerable for low-power edge AI applications. It is also noteworthy that NN operations seem to be especially resistant to the CC type of parametric variability. This is likely because NN operations usually include a substantial number of cycles during which CC variability in the MACs is moderated by a kind of low-pass filtering process.</p>
</sec>
</sec>
<sec id="s7">
<title>7 Conclusion</title>
<p>In this paper, a MAC unit based on a crossbar multiplier is presented. By using memristor-transistor single-bit multiplication cells with mixed-signal design, this crossbar multiplier removes the need for carry propagation. Multiplying by passive current generation across resistive elements only, the multiplication step itself can be regarded as instantaneous according to Ohm&#x2019;s law and KCL. By using a mixed-mode, flash ADC conversion step, latency is kept under control for the ultimate digital-in/digital-out unit through single-action thermometer code generation. This means that the worst case delay depends only on writing memristor values and converting thermometer code to binary code. This latency management means that the MAC unit has a relatively low worst-case latency. At the same time, the energy efficiency is also improved over conventional digital multipliers using memristors by eliminating the need for costly carry-to-the-left operations.</p>
<p>The proposed MAC unit also has the same precision for both input and output, which means that it can be used to compose multi-MAC structures such as NNs without worrying about bit-conversion when fitting outputs of one layer to the inputs of another layer. The approximation happens in the thermometer code generation step where it leads to circuit size and complexity reductions in subsequent circuitry without sacrificing precision unnecessarily.</p>
<p>To validate this MAC unit, it is used as the basic perceptron in the creation of an NN of multiple neurons and layers, and the resulting NN is used to classify the MNIST dataset. The low precision and multiplication errors attributed to the analog product from the crossbar multiplier are shown to be compensatable through an extended use of QAT. With such compensation techniques, our case study NN achieves comparable learning accuracy to the same NN based on fully-digital QAT MAC units of the same bit width. In doing this, we additionally demonstrate the potential for extending QAT to compensate for any characterizable imprecision beyond quantization effects in the perceptron unit. The effects of parametric variability for both transistors and memristors are also investigated demonstrating the usability of this type of MAC units.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s8">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s9">
<title>Ethics statement</title>
<p>Written informed consent was obtained from the individual(s) for the publication of any potentially identifiable images or data included in this article.</p>
</sec>
<sec id="s10">
<title>Author contributions</title>
<p>SY and TB contributed to conception and design of the study, SY organized the database. SY and TB performed the statistical analysis. SY wrote the first draft of the manuscript. TB and FX wrote sections of the manuscript. RS and AY designed the original hypothesis and contributed to optimizing the circuit designs. They also reviewed and revised the manuscript. All authors contributed to manuscript revision, read, and approved the submitted version.</p>
</sec>
<sec id="s11">
<title>Funding</title>
<p>The authors gratefully acknowledge funding support from EPSRC IAA project Whisperable AI Power Management (ref: NU-007755, Newcastle University, United Kingdom) and Northern Accelerator grant on Low-Power AI Circuits (NU-009397, Newcastle University).</p>
</sec>
<sec sec-type="COI-statement" id="s12">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s13">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Amirsoleimani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Alibart</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Yon</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Pazhouhandeh</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Ecoffey</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>In-memory vector-matrix multiplication in monolithic complementary metal&#x2013;oxide&#x2013;semiconductor-memristor integrated circuits: Design choices, challenges, and perspectives</article-title>. <source>Adv. Intell. Syst.</source> <volume>2</volume>, <fpage>2000115</fpage>. </citation>
</ref>
<ref id="B2">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Berdan</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Khiat</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Papavassiliou</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Prodromakis</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Qualitative SPICE modeling accounting for volatile dynamics of TiO<sub>2</sub> memristors</article-title>,&#x201d; in <conf-name>2014 IEEE Int. Sym. Circuits &#x26; Systems (ISCAS)</conf-name>, <fpage>2033</fpage>&#x2013;<lpage>2036</lpage>. <pub-id pub-id-type="doi">10.1109/ISCAS.2014.6865564</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Borghetti</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Snider</surname>
<given-names>G. S.</given-names>
</name>
<name>
<surname>Kuekes</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J. J.</given-names>
</name>
<name>
<surname>Stewart</surname>
<given-names>D. R.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>R. S.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>&#x2018;memristive&#x2019; switches enable &#x2018;stateful&#x2019; logic operations via material implication</article-title>. <source>Nature</source> <volume>464</volume>, <fpage>873</fpage>&#x2013;<lpage>876</lpage>. <pub-id pub-id-type="doi">10.1038/nature08940</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Bui</surname>
<given-names>V. H.</given-names>
</name>
<name>
<surname>Beak</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Seon</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jeong</surname>
<given-names>T. T.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>Thermometer-to-binary encoder with bubble error correction (BEC) circuit for flash analog-to-digital converter (FADC)</article-title>,&#x201d; in <conf-name>International Conference on Communications and Electronics 2010</conf-name> (<publisher-loc>Nha Trang, Vietnam</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>102</fpage>&#x2013;<lpage>106</lpage>. </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>W. H.</given-names>
</name>
<name>
<surname>Dou</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>K. X.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>W. Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>P. Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J. H.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Cmos-integrated memristive non-volatile computing-in-memory for ai edge processors</article-title>. <source>Nat. Electron.</source> <volume>2</volume>, <fpage>420</fpage>&#x2013;<lpage>428</lpage>. <pub-id pub-id-type="doi">10.1038/s41928-019-0288-0</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chua</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>1971</year>). <article-title>Memristor-the missing circuit element</article-title>. <source>IEEE Trans. Circuit Theory</source> <volume>18</volume>, <fpage>507</fpage>&#x2013;<lpage>519</lpage>. <pub-id pub-id-type="doi">10.1109/tct.1971.1083337</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cilardo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>De Caro</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Petra</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Caserta</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Mazzocca</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Napoli</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>High speed speculative multipliers based on speculative carry-save tree</article-title>. <source>IEEE Trans. Circuits Syst. I.</source> <volume>61</volume>, <fpage>3426</fpage>&#x2013;<lpage>3435</lpage>. <pub-id pub-id-type="doi">10.1109/tcsi.2014.2337231</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Constantoudis</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Papavieros</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Karakolis</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Khiat</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Prodromakis</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Dimitrakis</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Impact of line edge roughness on ReRAM uniformity and scaling</article-title>. <source>Materials</source> <volume>12</volume>, <fpage>3972</fpage>. <pub-id pub-id-type="doi">10.3390/ma12233972</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="book">
<comment>[Dataset]</comment> <person-group person-group-type="author">
<name>
<surname>Chahal</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2019</year>). <source>Aggressive quantization: How to run MNIST on a 4 bit neural net using pytorch</source>. </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fujiki</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Subramaniyan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>In-/near-memory computing</article-title>. <source>Synthesis Lect. Comput. Archit.</source> <volume>16</volume>, <fpage>1</fpage>&#x2013;<lpage>140</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-031-01772-8</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guckert</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Swartzlander</surname>
<given-names>E. E.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Optimized memristor-based multipliers</article-title>. <source>IEEE Trans. Circuits Syst. I.</source> <volume>64</volume>, <fpage>373</fpage>&#x2013;<lpage>385</lpage>. <pub-id pub-id-type="doi">10.1109/tcsi.2016.2606433</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gupta</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Serb</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Khiat</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zeitler</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Vassanelli</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Prodromakis</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Real-time encoding and compression of neuronal spikes by metal-oxide memristors</article-title>. <source>Nat. Commun.</source> <volume>7</volume>, <fpage>12805</fpage>&#x2013;<lpage>12809</lpage>. <pub-id pub-id-type="doi">10.1038/ncomms12805</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ho</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>G. M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2009</year>). &#x201c;<article-title>Nonvolatile memristor memory: Device characteristics and design implications</article-title>,&#x201d; in <conf-name>2009 Int. Conf. Computer-Aided Design (ICCAD&#x2019;09)</conf-name>, <conf-loc>San Jose, CA, USA</conf-loc>, <conf-date>November 2-5, 2009</conf-date>, <fpage>485</fpage>&#x2013;<lpage>490</lpage>. </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hung</surname>
<given-names>J.-M.</given-names>
</name>
<name>
<surname>Jhang</surname>
<given-names>C.-J.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>P.-C.</given-names>
</name>
<name>
<surname>Chiu</surname>
<given-names>Y.-C.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>M.-F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Challenges and trends of nonvolatile in-memory-computation circuits for ai edge devices</article-title>. <source>IEEE Open J. Solid. State. Circuits Soc.</source> <volume>1</volume>, <fpage>171</fpage>&#x2013;<lpage>183</lpage>. <pub-id pub-id-type="doi">10.1109/ojsscs.2021.3123287</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Jacob</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kligys</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Howard</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). &#x201c;<article-title>Quantization and training of neural networks for efficient integer-arithmetic-only inference</article-title>,&#x201d; in <conf-name>IEEE/CVF Conf. Computer Vision and Pattern Recognition</conf-name>, <fpage>2704</fpage>&#x2013;<lpage>2713</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2018.00286</pub-id> </citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khan</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Ilyas</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Shamim</surname>
<given-names>M. Z. M.</given-names>
</name>
<name>
<surname>Ilyas Khan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sohail</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rahman</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Oxide-based resistive switching-based devices: Fabrication, influence parameters and applications</article-title>. <source>J. Mater. Chem. C</source> <volume>9</volume>, <fpage>15755</fpage>&#x2013;<lpage>15788</lpage>. <pub-id pub-id-type="doi">10.1039/D1TC03420K</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krestinskaya</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Choubey</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>James</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Memristive GAN in analog</article-title>. <source>Sci. Rep.</source> <volume>10</volume>, <fpage>5838</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-020-62676-7</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kvatinsky</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Belousov</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Liman</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Satat</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wald</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Friedman</surname>
<given-names>E. G.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>MAGIC&#x2014;memristor-aided logic</article-title>. <source>IEEE Trans. Circuits Syst. Ii.</source> <volume>61</volume>, <fpage>895</fpage>&#x2013;<lpage>899</lpage>. <pub-id pub-id-type="doi">10.1109/TCSII.2014.2357292</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kvatinsky</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Friedman</surname>
<given-names>E. G.</given-names>
</name>
<name>
<surname>Kolodny</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Weiser</surname>
<given-names>U. C.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>TEAM: ThrEshold adaptive memristor model</article-title>. <source>IEEE Trans. Circuits Syst. I.</source> <volume>60</volume>, <fpage>211</fpage>&#x2013;<lpage>221</lpage>. <pub-id pub-id-type="doi">10.1109/TCSI.2012.2215714</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kvatinsky</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ramadan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Friedman</surname>
<given-names>E. G.</given-names>
</name>
<name>
<surname>Kolodny</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Vteam: A general model for voltage-controlled memristors</article-title>. <source>IEEE Trans. Circuits Syst. Ii.</source> <volume>62</volume>, <fpage>786</fpage>&#x2013;<lpage>790</lpage>. <pub-id pub-id-type="doi">10.1109/TCSII.2015.2433536</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Lehtonen</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Laiho</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>CNN using memristors for neighborhood connections</article-title>,&#x201d; in <conf-name>2010 12th Int. Workshop on Cellular Nanoscale Networks and their Applications (CNNA 2010)</conf-name>, <fpage>1</fpage>&#x2013;<lpage>4</lpage>. <pub-id pub-id-type="doi">10.1109/CNNA.2010.5430304</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ge</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Montgomery</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Analogue signal and image processing with large memristor crossbars</article-title>. <source>Nat. Electron.</source> <volume>1</volume>, <fpage>52</fpage>&#x2013;<lpage>59</lpage>. <pub-id pub-id-type="doi">10.1038/s41928-017-0002-z</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mileiko</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bunnam</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Shafik</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yakovlev</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Neural network design for energy-autonomous artificial intelligence applications using temporal encoding</article-title>. <source>Phil. Trans. R. Soc. A</source> <volume>378</volume>, <fpage>20190166</fpage>. <pub-id pub-id-type="doi">10.1098/rsta.2019.0166</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Nagel</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fournarakis</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Amjad</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Bondarenko</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>van Baalen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Blankevoort</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2021</year>). <source>A white paper on neural network quantization</source>. <comment>
<italic>arXiv e-prints</italic>
</comment>. </citation>
</ref>
<ref id="B24">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Park</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Yoo</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Energy-efficient neural network accelerator based on outlier-aware low-precision computation</article-title>,&#x201d; in <conf-name>2018 ACM/IEEE 45th Annual International Symposium on Computer Architecture (ISCA)</conf-name> (<publisher-loc>Los Angeles, CA, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>688</fpage>&#x2013;<lpage>698</lpage>. </citation>
</ref>
<ref id="B25">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Paszke</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gross</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Massa</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Lerer</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bradbury</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chanan</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). &#x201c;<article-title>Pytorch: An imperative style, high-performance deep learning library</article-title>,&#x201d; in <source>Advances in neural information processing systems 32</source>. Editors <person-group person-group-type="editor">
<name>
<surname>Wallach</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Larochelle</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Beygelzimer</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>d&#x27;Alch&#xe9;-Buc</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Fox</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Garnett</surname>
<given-names>R.</given-names>
</name>
</person-group> (<publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Curran Associates, Inc.</publisher-name>), <fpage>8024</fpage>&#x2013;<lpage>8035</lpage>. </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qiqieh</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Shafik</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Tarawneh</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sokolov</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yakovlev</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Significance-driven logic compression for energy-efficient multiplier design</article-title>. <source>IEEE J. Emerg. Sel. Top. Circuits Syst.</source> <volume>8</volume>, <fpage>417</fpage>&#x2013;<lpage>430</lpage>. <pub-id pub-id-type="doi">10.1109/JETCAS.2018.2846410</pub-id> </citation>
</ref>
<ref id="B27">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Radwan</surname>
<given-names>A. G.</given-names>
</name>
<name>
<surname>Zidan</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Salama</surname>
<given-names>K. N.</given-names>
</name>
</person-group> (<year>2012</year>). &#x201c;<article-title>HP Memristor mathematical model for periodic signals and DC</article-title>,&#x201d; in <conf-name>IEEE Int. Midwest Sym. on Circuits &#x26; Systems</conf-name> (<publisher-loc>Seattle, WA, USA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>861</fpage>&#x2013;<lpage>864</lpage>. </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reid</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Memristor multiplication</article-title>. <source>Nat. Nanotechnol.</source> <volume>2009</volume>, <fpage>295</fpage>. <pub-id pub-id-type="doi">10.1038/nnano.2009.295</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shafik</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Maeda-Nunez</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Merrett</surname>
<given-names>G. V.</given-names>
</name>
<name>
<surname>Al-Hashimi</surname>
<given-names>B. M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Learning transfer-based adaptive energy minimization in embedded systems</article-title>. <source>IEEE Trans. Comput. -Aided. Des. Integr. Circuits Syst.</source> <volume>35</volume>, <fpage>877</fpage>&#x2013;<lpage>890</lpage>. <pub-id pub-id-type="doi">10.1109/tcad.2015.2481867</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shafik</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yakovlev</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Real-power computing</article-title>. <source>IEEE Trans. Comput.</source> <volume>67</volume>, <fpage>1445</fpage>&#x2013;<lpage>1461</lpage>. <pub-id pub-id-type="doi">10.1109/TC.2018.2822697</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Siddik</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Haldar</surname>
<given-names>P. K.</given-names>
</name>
<name>
<surname>Garu</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Bhattacharjee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Barman</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Enhancement of data storage capability in a bilayer oxide-based memristor for wearable electronic applications</article-title>. <source>J. Phys. D. Appl. Phys.</source> <volume>53</volume>, <fpage>295103</fpage>. <pub-id pub-id-type="doi">10.1088/1361-6463/ab81d3</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Singh</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Prasad</surname>
<given-names>P. W. C.</given-names>
</name>
<name>
<surname>Alsadoon</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Beg</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Pham</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Elchouemi</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Survey on memrister models</article-title>,&#x201d; in <conf-name>2016 Int. Conf. Electronics, Information, and Communications (ICEIC)</conf-name>, <fpage>1</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1109/ELINFOCOM.2016.7563017</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Strukov</surname>
<given-names>D. B.</given-names>
</name>
<name>
<surname>Snider</surname>
<given-names>G. S.</given-names>
</name>
<name>
<surname>Stewart</surname>
<given-names>D. R.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>R. S.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>The missing memristor found</article-title>. <source>Nature</source> <volume>453</volume>, <fpage>80</fpage>&#x2013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1038/nature06932</pub-id> </citation>
</ref>
<ref id="B34">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>C. Y.</given-names>
</name>
<name>
<surname>Ni</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Agrawal</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). &#x201c;<article-title>Ultra-low precision 4-bit training of deep neural networks</article-title>,&#x201d; in <source>NeurIPS</source>. </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Suresh</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Boppidi</surname>
<given-names>P. K. R.</given-names>
</name>
<name>
<surname>Rao</surname>
<given-names>B. P.</given-names>
</name>
<name>
<surname>Banerjee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kundu</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Realizing spike-timing dependent plasticity learning rule in pt/cu: Zno/nb: Sto memristors for implementing single spike based denoising autoencoder</article-title>. <source>J. Micromech. Microeng.</source> <volume>29</volume>, <fpage>085006</fpage>. <pub-id pub-id-type="doi">10.1088/1361-6439/ab235f</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Trusov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Limonova</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Slugin</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Nikolaev</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Arlazarov</surname>
<given-names>V. V.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Fast implementation of 4-bit convolutional neural networks for mobile devices</article-title>,&#x201d; in <conf-name>2020 25th International Conference on Pattern Recognition (ICPR)</conf-name>, <fpage>9897</fpage>&#x2013;<lpage>9903</lpage>. <pub-id pub-id-type="doi">10.1109/ICPR48806.2021.9412841</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Vinayaka</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Namboodiri</surname>
<given-names>S. P.</given-names>
</name>
<name>
<surname>Abdalla</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kerstetter</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Mata-carlos</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Senda</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). &#x201c;<article-title>Monolithic 8x8 sipm with 4-bit current-mode flash adc with tunable dynamic range</article-title>,&#x201d; in <conf-name>Proceedings of the 2019 on Great Lakes Symposium on VLSI</conf-name>, <fpage>57</fpage>&#x2013;<lpage>62</lpage>. </citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J. Q.</given-names>
</name>
<name>
<surname>Mao</surname>
<given-names>J. Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z. P.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Recent advances of volatile memristors: Devices, mechanisms, and applications</article-title>. <source>Adv. Intell. Syst.</source> <volume>2</volume>, <fpage>2000055</fpage>. <pub-id pub-id-type="doi">10.1002/aisy.202000055</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Yakovlev</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Enabling survival instincts in electronic systems: An energy perspective</article-title>,&#x201d; in <source>Transforming reconfigurable systems</source> (<publisher-loc>London, UK</publisher-loc>: <publisher-name>Imperial College Press</publisher-name>), <fpage>237</fpage>&#x2013;<lpage>263</lpage>. <pub-id pub-id-type="doi">10.1142/9781783266975_0013</pub-id> </citation>
</ref>
<ref id="B40">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shafik</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bunnam</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Yakovlev</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Optimized multi-memristor model based low energy and resilient current-mode multiplier design</article-title>,&#x201d; in <conf-name>2021 Design, Automation Test in Europe Conference Exhibition (DATE)</conf-name>, <fpage>1230</fpage>&#x2013;<lpage>1233</lpage>. <pub-id pub-id-type="doi">10.23919/DATE51398.2021.9473926</pub-id> </citation>
</ref>
<ref id="B41">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shafik</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bunnam</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Yakovlev</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020a</year>). &#x201c;<article-title>Self-amplifying current-mode multiplier design using a multi-memristor crossbar cell structure</article-title>,&#x201d; in <conf-name>2020 27th IEEE Int. Conf. Electronics, Circuits and Systems (ICECS)</conf-name>, <fpage>1</fpage>&#x2013;<lpage>4</lpage>. <pub-id pub-id-type="doi">10.1109/ICECS49266.2020.9294797</pub-id> </citation>
</ref>
<ref id="B42">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Soltan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shafik</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bunnam</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Balsamo</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2020b</year>). &#x201c;<article-title>Current-mode carry-free multiplier design using a memristor-transistor crossbar architecture</article-title>,&#x201d; in <conf-name>2020 Design, Automation Test in Europe Conf. Exhibition (DATE)</conf-name>, <fpage>638</fpage>&#x2013;<lpage>641</lpage>. <pub-id pub-id-type="doi">10.23919/DATE48585.2020.9116417</pub-id> </citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yuan</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Low-voltage CMOS current-mode circuits: Topology and characteristics</article-title>. <source>IEE Proc. Circuits Devices Syst.</source> <volume>153</volume>, <fpage>219</fpage>&#x2013;<lpage>230</lpage>. <pub-id pub-id-type="doi">10.1049/ip-cds:20045058</pub-id> </citation>
</ref>
<ref id="B44">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Mazumder</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). <source>Learning in energy-efficient neuromorphic computing: Algorithm and architecture Co-design</source>. <publisher-loc>Hoboken, NJ, USA</publisher-loc>: <publisher-name>John Wiley &#x26; Sons</publisher-name>. </citation>
</ref>
</ref-list>
</back>
</article>