%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % COMPLETE.BIB % % Brigham Young University Reconfigurable Logic Lab Bibliography % % This bibliography, in BibTex format, represents the current research % in the field of 'Reconfigurable Computing'. The bibliography is % continually expanding and additions or modifications are welcome. % Please direct all correspondence regarding this bibliography to % wirthlim(at)fpga.ee.byu.edu % % Last Modified: $Date: 1999/08/17 16:22:15 $ % Last Edited By: $Author: grahamp $ % % The 'key' field is used with each entry in the bibliography to help % sort through the entries. The keywords used to date are the % following: % % system: includes articles that describe a reconfigurable computing % system or discusses architectural features of reconfigurable % computing machines, % tool: includes articles that discuss any aspect of reconfigurable % computing tools such as high-level programming environements, % hardware synthesis tools, and novel design approaches, % app: includes articles that discuss an application proposed, tested % or implemented on a reconfigurable computing platform, % device: includes articles that discuss devices used in % reconfigurable computing (FPGAs and other programmable logic). % rtr: includes articles that describe a reconfigurable computing % system, tool, device or application that uses the feature of % run-time reconfiguration. % % Each entry is given an index number and is used as a means for % reference in BYU's reconfigurable logic laboratory library % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Index Number: 1 @Article{athanas-silverman:93, author = "P. M. Athanas and H. F. Silverman", title = "Processor Reconfiguration through Instruction-set Metamorphosis", key = "system,tool", journal = "IEEE Computer", volume = "26", number = "3", pages = "11--18", year = "1993", month = mar, abstract = "The processor reconfiguration through instruction-set metamorphosis (PRISM) general-purpose architecture, which speeds up computationally intensive tasks by augmenting the core processor's functionality with new operations, is described. The PRISM approach adapts the configuration and fundamental operations of a core processing system to the computationally intensive portions of a targeted application. PRISM-1, an initial prototype system, is described, and experimental results that demonstrate the benefits of the PRISM concept are presented.", } % Index Number: 2 @InProceedings{ebeling-boriello:91, author = "C. Ebeling and G. Borriello and S. A. Hauck and D. Song and E. A. Walkup", title = "{TRIPTYCH}: a New {FPGA} Architecture", key = "device", booktitle = "{FPGAs}. International Workshop on Field Programmable Logic and Applications", year = "1991", month = sep, pages = "75--90", abstract = "Existing FPGA architectures can be classified along two dimensions: reprogrammable vs. one-time programmable and general- purpose vs. domain specific. The most challenging class of FPGA architectures to design is the reprogrammable, general-purpose FPGA, of which Xilinx is the most well-known example. In this paper we describe Triptych, a new FPGA architecture that addresses two problems of current reprogrammable FPGAs: the large delays incurred in composing large functions and the strict division between routing and logic resources. Our studies indicate that Triptych is more area-efficient than current architectures and has comparable delay characteristics for a large range of circuits that include both data-path elements and control logic.", } % Index Number: 3 @Article{monaghan-noakes:92, author = "S. Monaghan and P. D. Noakes", title = "Reconfigurable Special Purpose Hardware for Scientific Computation and Simulation", key = "app", journal = "Computing \& Control Engineering Journal", year = "1992", month = sep, pages = "225--234", abstract = "Xilinx Field Progammable Gate Arrays (FPGAs) are used to implement reconfigurable special purpose computing hardware for computationally intensive many-body problems in physics and mathematics. The inexpensive PC-based design environment used for this work is described, and the performance for several different problems of the resulting reconfigurable hardware is compared with that of some general purpose computers. The merits of using FPGAs in special purpose computational hardware are outlined.", } % Index Number: 4 @Article{kean-gray:90, author = "T. Kean and J. Gray", title = "Configurable Hardware: Two Case Studies of Micro-Grain Computation", key = "device, app", journal = "Journal of VLSI Signal Processing", volume = "2", number = "1", year = "1990", month = sep, pages = "9--16", abstract = "This paper describes a new VLSI architecture-Configurable Array Logic (CAL) which, at its lowest level, can be programmed electrically to implement any circuit composed of logic gates. At higher levels the technology provides a medium for the direct implementation of algorithms. It particularly addresses systolic and cellular automaton algorithms where the basic computational elements perform computations unsuited to conventional processors.", } % Index Number: 5 @InProceedings{chow-rose:93, author = "P. Chow and S. O. Seo and K. Chung and G. Paez and J. Rose", title = "A High-Speed {FPGA} Using Programmable Mini-tiles", key = "device", booktitle = "Research on integrated systems: Proceedings of the 1993 Symposium", editor = "G. Borriello and C. Ebeling", year = "1993", pages = "103--122", abstract = "Field-Programmable Gate Arrays (FPGAs) are now a recognized technology for the implementation of digital systems, but they suffer from reduced speed and logic density compared to Mask- Programmed Gate Arrays. Many studies have been performed concerning the effect of an FPGA's architecture on its speed and density. In this paper we describe how these studies are used in an actual implementation of a high-performance FPGA. The architecture of the FPGA logic block was determined through an experimental process using custom-built CAD tools. The result is a logic block that is an asymmetric tree of four-input lookup tables that are hardwired together. A segmented routing architecture, also tuned using experiments, is employed to improve the speed of the interconnect. To address the problems of full-custom design, a novel layout style for FPGAs is proposed. It can be likened to the technique used in PLAs, in which a 'mini-tile' contains a portion of most components in the logic tile. The mini-tile is optimized for layout density and speed, and placed into a 4x4 array, where it is then customized by adding vias to obtain the desired hardwired connections. As well as providing ease of layout, this technique gives the capability to easily change the hardwired connections in the logic block architecture, and the segmentation length distribution in the routing architecture.", } % Index Number: 6 @InProceedings{bertin-roncin:93, author = "P. Bertin and D. Roncin and J. Vuillemin", title = "Programmable Active Memories: a Performance Assessment", key = "system, app", booktitle = "Research on Integrated Systems: Proceedings of the 1993 Symposium", editor = "G. Borriello and C. Ebeling", year = "1993", pages = "88--102", abstract = "We present some quantitative performance measurements for the computing power of Programmable Active Memories (PAM), as introduced by [2]. Based on Field Programmable Gate Array (FPGA) technology, the PAM is a universal hardware co-processor closely coupled to a standard host computer. The PAM can speed up many critical software applications running on the host, by executing part of the computations through a specific hardware design. The performance measurements presented are based on two PAM architectures and ten specific applications, drawn from arithmetics, algebra, geometry, physics, biology, audio and video. Each of these PAM designs proves as fast as any reported hardware or super-computer for the corresponding application. In cases where we could bring some genuine algorithmic innovation into the design process, the PAM has proved an order of magnitude faster than any previously existing system (see [19] and [18]).", } % Index Number: 7 @InProceedings{linde-nordstrom:92, author = "A. Linde and T. Nordstrom and M. Taveniku", title = "Using {FPGA}s to Implement a Reconfigurable Highly Parallel Computer", key = "system", booktitle = "Field-Programmable Gate Arrays: Architectures and Tools for Rapid Prototyping. Second International Workshop on Field Programmable Logic and Applications", year = "1992", address = "Vienna, Austria", month = aug, pages = "199--210", abstract = "With the arrival of large Field Programmable Gate Arrays (FPGAs) it is possible to build an entire computer using only FPGA and memory. In this paper we share some experience from building a highly parallel computer using this concept. Even if today's FPGAs are of considerable size, each processor must be relatively simple if a highly parallel computer is to be constructed from them. Based on our experience of other parallel computers and thorough studies of the intended applications, we think it is possible to build very powerful and efficient computers using bit- serial processing elements with SIMD (Single Instruction stream, Multiple Data streams) control. A major benefit of using FPGAs is the fact that different architectural variations can easily be tested and evaluated on real applications. In the primary application area, which is artificial neural networks, the gains of extensions like bit- serial multipliers or counters can quickly be found. A concrete implementation of a processor array, using Xilinx FPGAs is described in this paper. To get efficient usage and high performance with the FPGA circuits signal flow plays an important role. As the current implementation of the Xilinx EDA software does not support that design issue, the signal flow design has to be made by hand. The processing elements are simple and regular which makes it easy to implement them with the XACT Editor. This gives high performance, up to 40-50 MHz.", } %above is the year of the conference; year of publication:1993 %above is the place of the conf.; place of publication:Berlin, Germany %date of conference: 31 Aug.-2 Sept. 1992 % Index Number: 8 @InProceedings{furtek:93, author = "F. Furtek", title = "A Field-Programmable Gate Array for Systolic Computing", key = "device, app", booktitle = "Research on Integrated Systems: Proceedings of the 1993 Symposium", editor = "G. Borriello and C. Ebeling", year = "1993", pages = "183--199", abstract = "There is a growing awareness that reconfigurable logic, in the form of SRAM-based field-programmable gate arrays (FPGA's), is an ideal vehicle for implementing a wide range of compute-intensive algorithms. The CLi6000 series of SRAM FPGA's from Concurrent Logic is especially well suited to the special needs of this area. We describe the architecture of the CLi6000 series of SRAM-based FPGA's, emphasizing those features that support efficient implementation of pipelined arithmetic circuits. These features are illustrated through a massively parallel, highly pipelined algorithm for motion estimation, an especially compute-intensive algorithm used in digital video compression.", } % Index Number: 9 @InProceedings{gray-kean:89, author = "J. P. Gray and T. A. Kean", title = "Configurable Hardware: {A} New Paradigm for Computation", key = "device, app", booktitle = "Decennial CalTech Conference on VLSI", year = "1989", month = mar, pages = "277--293", address = "Pasadena, CA", abstract = "At present there are two main methods of implementing algorithms: interpretation of a data stream representing a program by an active processing unit (software) and interconnection of active logic elements (hardware). In one case the computation performed is dependent on data stored in memory and in the other on the interconnection between a set of physical devices (transistors). Both paradigms can be shown, given reasonable definitions, to be essentially equivalent in terms of the functions they can compute (see, for example, [Savage76]). In this paper we will make the case for a third paradigm: Configurable Hardware in which the interconnection between active logic elements, and hence the function computed, is dependent on a control store.", } % Index Number: 10 @InProceedings{arnold:93, author = "J. M. Arnold", title = "The {Splash} 2 Software Environment", key = "system, app", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, pages = "88--93", address = "Napa, CA", abstract = "Splash 2 is an attached special purpose parallel processor in which the computing elements are user programmable FPGA devices. The architecture of Splash 2 is designed to accelerate the solution of problems which exhibit at least modest amounts of temporal or data parallelism. Applications are developed by writing behavioral descriptions of algorithms in VHDL, which are then iteratively refined and debugged within the Splash 2 simulator. Once an application is determined to be functionally correct in simulation, it is compiled to a gate list and optimized by logic synthesis. The gate list is then mapped onto the FPGA architecture by automatic placement and routing tools to form a loadable FPGA object module. A C language library and a symbolic debugger comprise the execution environment. The Splash 2 system has been shown to be effective on a variety of applications, including text searching, sequence analysis, and image processing.", } % Index Number: 11 @InProceedings{pryor-thistle:93, author = "D. V. Pryor and M. R. Thistle and N. Shirazi", title = "Text Searching On {Splash} 2", key = "app", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "172--177", abstract = "This paper proposes a flexible, reprogrammable hardware solution to the acceleration of text-based keyword search problems. In these problems, a stream of input text is checked against a known list of keywords (a dictionary) for occurrences of those keywords in the text. Our solution employs an attached processor called Splash 2, which exploits the speed and reconfigurability of Field Programmable Gate Array technology. The Splash 2 system was designed and built at the SRC for a wide variety of applications. A Splash 2 system is comprised of an interface board to a Sun Sparc-2 host and up to 16 Splash boards, each of which contains 16 Xilinx 4010 FPGAs interconnected in a linear array and also through a 16-way full crossbar switch. Each Xilinx chip is coupled with a 4 Mbit static RAM through a dedicated interface. The text searching program implemented on a one-board Splash 2 system is capable of processing text at an estimated rate of 50 million characters per second.", } % Index Number: 12 @InProceedings{arnold-buell:92, author = "J. M. Arnold and D. A. Buell and E. G. Davis", title = "Splash 2", key = "system", booktitle = "Proceedings of the 4th Annual ACM Symposium on Parallel Algorithms and Architectures", year = "1992", month = jun, pages = "316--324", abstract = "The Splash attached processor board (referred to as Splash 1) was designed and built at the SRC to provide very high performance on a range of bit-processing problems. It proved to be highly successful; notwithstanding the known dangers of Second System Syndrome, a follow-on system, Splash 2, is being designed and built. This paper describes Splash 2, compares it with Splash 1 and to discusses both its programming and two algorithmic applications.", } % Index Number: 13 @InProceedings{arnold-buell:93, author = "J. M. Arnold and D. A. Buell and E. G. Davis", title = "{VHDL} Programming on {Splash} 2", key = "system", booktitle = "More {FPGAs}: Proceedings of the 1993 International Workshop on Field-Programmable Logic and Applications", year = "1993", month = sep, pages = "182--191", address = "Oxford, England", abstract = "Splash 2 is an attached special purpose parallel processor in which the computing elements are user programmable FPGA devices. The programming environment for Splash 2 is based upon the VHSIC Hardware Description Language (VHDL), simulation and logic synthesis. Application programs for Splash 2 are developed by writing behavioral descriptions of algorithms in VHDL which are then iteratively refined and debugged within the Splash 2 simulator. Logic synthesis and automatic placement and routing techniques are used to compile the VHDL applications into loadable FPGA object modules.", } % Index Number: 14 @Article{hartenstein-hirschbiel:91, author = "R. W. Hartenstein and A. G. Hirschbiel and M. Riedmuller and K. Schmidt and M. Weber", title = "A Novel {ASIC} Design Approach Based on a New Machine Paradigm", key = "system", journal = "IEEE Journal of Solid-State Circuits", volume = "26", number = "7", year = "1991", month = jul, pages = "975--989", abstract = "This paper introduces a new design methodology for rapid implementation of cheap high-performance ASIC's. The method described here derives from high-level algorithm specifications or from high-level source programs not only the target hardware, but (in contrast to silicon compilers) at the same time also the machine code to run it. The new method is based on a novel sequential machine paradigm where execution is used (being orders of magnitude more efficient) instead of simulation and where programmers may do the design job, rather than real hardware designers. The paper illustrates that for a very large class of commercially important algorithms (DSP, graphics, image processing and many others) this paradigm is orders of magnitude more efficient than the von Neumann paradigm. Compared to von-Neumann- based implementations, acceleration factors of up to more than 2000 have been obtained experimentally. The performance of ASIC's obtained by this new methodology is mostly competitive with ASIC designs obtained in the much slower and much more expensive {"}traditional{"} way. As a byproduct the new methodology also supports the automatic generation of universal accelerators for coprocessor use in workstations, etc., such as, e.g., to accelerate EDA tools. It is the goal of this paper to explain the highly efficient application of the xputer paradigm, rather than to introduce its hardware implementation. It is the goal of this paper to illustrate the innovative power of this paradigm, and its potential as a major step in progress toward systematically deriving ASIC designs from algorithm specifications.", } % Index Number: 15 @InProceedings{raimbault-lavenier:93, author = "F. Raimbault and D. Lavenier and S. Rubini and B. Pottier", title = "Fine Grain Parallelism on a {MIMD} Machine Using {FPGA}s", key = "system", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "2--8", abstract = "Current MIMD machines are used for coarse grain-parallelism and also offer message passing mechanisms to deal with inter-processor communications. But these mechanisms lack efficiency in fine- grain parallel applications such as systolic computation. This article presents the use of an FPGA chip to set up a fast systolic communication agent on a linear asynchronous network of TRANSPUTER processors; the machine is called ARMEN.", } % Index Number: 16 @InProceedings{iseli-sanchez:93, author = "C. Iseli and E. Sanchez", title = "Spyder: {A} Reconfigurable {VLIW} Processor using {FPGA}s", key = "system", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "17--24", abstract = "A processor with multiple reconfigurable execution units has been designed and implemented. The reconfigurable execution units are implemented using reprogrammable field programmable gate array (FPGA) chips. The architecture and implementation of this processor are described in detail in this paper. An example shows that this reconfigurable processor is able to compute the new state of 100'000'000 cells of Conway's game of life per second with a clock speed of 6.25 MHz.", } % Index Number: 17 @InProceedings{milne-cockshott:93, author = "G. Milne and P. Cockshott and G. McCaskill and P. Barrie", title = "Realising Massively Concurrent Systems on the {SPACE} Machine", key = "system", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "26--32", abstract = "Highly concurrent systems occur frequently in the physical world. They include weather systems, traffic systems, electrocardiac systems and integrated circuits. To better understand such systems requires that they be rigorously described and then simulated. How do we best perform this description? Since such systems are inherently concurrent and do not fit well onto sequential von Neumann architectures, what type of machine should be used to simulate them? This paper focuses on a class of systems characterised as being highly concurrent and which are composed out of many simple parts which interact with other parts in their locality. It discusses how to describe these systems and introduces a cellular automata type of architecture which is used to simulate these systems directly in hardware, with physical concurrency being realised by true hardware concurrency. The architecture of the SPACE machine (Scalable Parallel Architecture for Concurrency Experiments), which is constructed from reconfigurable FPGA logic is introduced and it is demonstrated how to simulate road traffic systems using it.", } % Index Number: 18 @InProceedings{ling-amano:93, author = "X. P. Ling and H. Amano", title = "{WASMII}: a Data Driven Computer on a Virtual Hardware", key = "system,app,rtr", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "33--42", abstract = "Virtual hardware is a technique to realize a large digital circuit with a small real hardware by using an extended Field Programmable Gate Array (FPGA) technology. Several configuration RAM modules are provided inside the FPGA chip, and the configuration of the gate array can be rapidly changed by replacing the active module. Data for configuration are transferred from an off-chip backup RAM to an un-used configuration RAM module. A novel computation mechanism called the WASMII, which executes a target dataflow graph directly, is proposed on the basis of the virtual hardware. A WASMII chip consists of the FPGA for virtual hardware and the additional mechanism to replace configuration RAM modules in the data driven manner. Configuration data are preloaded by the order which is assigned in advance with a static scheduling preprocessor. By connecting a number of WASMII chips, a highly parallel system can be easily constructed.", } % Index Number: 19 @InProceedings{casselman:93, author = "S. Casselman", title = "Virtual Computing and The Virtual Computer", key = "system", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "43--48", abstract = "Virtual Computing is an entirely new form of supercomputing that allows an algorithm to be implemented in hardware. Based on the Xilinx FPGA[1] and ICubes FPID[2] the Virtual Computer is completely reconfigurable in every respect. Computing machines based on reconfigurable logic are hyper-scalable meaning they scale up better than 1-1.", } % Index Number: 20 @InProceedings{french-taylor:93, author = "P. C. French and R. W. Taylor", title = "A Self-Reconfiguring Processor", key = "system", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "50--59", abstract = "Recent developments in the design and fabrication of field programmable logic devices (FPGA's) may well change the way in which we design and fabricate conventional microprocessors. The use of uncommitted logic whose function may be modified at run time makes the prospect of dynamic application specific integrated circuits closer to reality than ever before. Much of the work to date on reconfigurable logic has focussed on its application in co-processor and {"}glue{"} roles. This paper discusses how complete processors might be fabricated with a minimum of {"}fixed{"} or static logic. It is shown that in order to exploit FPGAs, a processor that is radically different from conventional architectures is required. The paper concludes by considering what evolutions of current logic families would favour this type of application.", } % Index Number: 21 @InProceedings{lewis-vanierssel:93, author = "D. M. Lewis and M. H. van Ierssel and D. H. Wong", title = "A Field Programmable Accelerator for Compiled-Code Applications", key = "system", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "60--67", abstract = "This paper describes a special purpose application accelerator using field programmable gate arrays to accelerate a range of applications. The accelerator is designed to support applications by allowing the user to implement a processor with an instruction set designed for the specific application being accelerated, using specialized instructions to implement critical fragments of the application. A compiled-code software organization is used to reduce overhead operations. A prototype has been built, and the first application to be ported to it, logic simulation, is underway.", } % Index Number: 22 @InProceedings{guccione-gonzales:93, author = "S. A. Guccione and M. J. Gonzalez", title = "A Data-Parallel Programming Model for Reconfigurable Architectures", key = "tool", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "79--87", abstract = "Recently, several machines have been built using Field Programmable Gate Array (FPGA) technology. These reconfigurable architectures have demonstrated very high performance for a variety of problems. The configuration of these machines typically rely on some form of hardware specification. In this paper we demonstrate that a more traditional software approach may be used. A vector based data-parallel model and its mapping to a reconfigurable architecture are introduced. Included in the model are parallel prefix or scan operators. The language supporting this model is a subset of the C programming language.", } % Index Number: 23 @InProceedings{monaghan-cowen:93, author = "S. Monaghan and C. P. Cowen", title = "Reconfigurable Multi-Bit Processor for {DSP} Applications in Statistical Physics", key = "system, app", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "103--110", abstract = "A PC-AT hosted DSP processor architecture implemented in SRAM- based field programmable gate arrays (FPGA) and static memories is described. Despite its simplicity, the processor circuits can be reconfigured under software control to tackle a class of multi-bit 'pixel' processing problems of current interest in the statistical physics of disordered materials, thereby offering some of the problem flexibility of a general purpose processor and the performance of custom hardware. The flexibility offered by the FPGA implementation is discussed in detail as is a particular application of the processor (to disordered superconductors). The performance of the processor is shown to compare well with similarly costing commercial DSP hardware. The low cost of the processor means it can be replicated to obtain dedicated supercomputer performance.", } % Index Number: 24 @InProceedings{cuccaro-reese:93, author = "S. A. Cuccaro and C. F. Reese", title = "The {CM-2X}: {A} Hybrid {CM-2/Xilinx} Prototype", key = "system, app", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "121--130", abstract = "This paper describes the CM-2X prototype. This one-of-a-kind machine is the result of a Supercomputing Research Center/Thinking Machines Corporation joint effort to examine the suitability of a hybrid combination of CM-2 architecture and Xilinx programmable gate array technology. In addition to a description of the CM-2X and Xilinx architecture, a simple applications example is provided that illustrates many of the issues involved in programming the machine.", } % Index Number: 25 @InProceedings{wood:93, author = "L. F. Wood", title = "High Performance Analysis and Control of Complex Systems Using Dynamically Reconfigurable Silicon and Optical Fiber Memory", key = "system", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "132--141", abstract = "M is a highly parallel asynchronous computer for the analysis and control of complex systems. A complex system is a system with many interacting components. Examples of complex systems include applications in molecular biology, economics, and signal processing. M asynchronous computations reproduce the structural dynamics of a system using high fidelity behavioral modeling. Programs are composed of an application model, an environment model, and a distributed subsumption operation system. Processes are implemented using position independent instructions that operate in parallel on strings of binary data. All M FPGA fine grained parallel processing nodes are double buffered, asynchronous, and highly pipelined. The fiber system memory is optically multiplexed, and asynchronous. The technology will extend new gigabit ATM optical networks with integrated high performance computing services.", } % Index Number: 26 @InProceedings{babb-tessier:93, author = "J. Babb and R. Tessier and A. Agarwal", title = "Virtual Wires: Overcoming Pin Limitations in {FPGA}-based Logic Emulators", key = "tool", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "142--151", abstract = "Existing FPGA-based logic emulators suffer from limited inter- chip communication bandwidth, resulting in low gate utilization (10-20 percent). This resource imbalance increases the number of chips needed to emulate a particular logic design and thereby decreases emulation speed, since signals must cross more chip boundaries. Current emulators only use a fraction of potential communication bandwidth because they dedicate each FPGA pin (physical wire) to a single emulated signal (logical wire). These logical wires are not active simultaneously and are only switched at emulation clock speeds. Virtual wires overcome pin limitations by intelligently multiplexing each physical wire among multiple logical wires and pipelining these connections at the maximum clocking frequency of the FPGA. A virtual wire represents a connection from a logical output on one FPGA to a logical input on another FPGA. Virtual wires not only increase usable bandwidth, but also relax the absolute limits imposed on gate utilization. The resulting improvement in bandwidth reduces the need for global interconnect, allowing effective use of low dimension inter-chip topologies, coupledf with the ability of virtual wires to overlap communication with computation, can even improve emulation speeds. We present the concept of virtual wires and describe our first implementation, a {"}softwire{"} compiler which utilizes static routing and relies on minimal hardware support. Results from compiling netlists for the 18K gate Sparcle microprocessor and the 86K gate Alewife Communications and Cache Controller indicate that virtual wires can increase FPGA gate utilization beyond 80 percent without a significant slowdown in emulation speed.", } % Index Number: 27 @InProceedings{foulk:93, author = "P. W. Foulk", title = "Data-folding in {SRAM} configurable {FPGA}s", key = "tool, app", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "163--171", abstract = "FPGAs which are configured by static RAM can be rapidly changed from one logic configuration to another. This raises the possibility of configuring the logic to implement a function for a specific set of values, i.e. folding the inputs into the logic design. The paper discusses data folding with respect to Algotronix FPGAs, presenting a text searching circuit as an example. This folded circuit saves at least half the logic over a conventional circuit, and very much more if data folding is taken as far as possible. It also presents performance figures for the folded circuit, and discusses other applications, and suggests features which are desirable if data folding is to be practicable, most of which are possessed by the Algotronix CAL array.", } % Index Number: 28 @InProceedings{hoang:93, author = "D. T. Hoang", title = "Searching Genetic Databases on {Splash} 2", key = "app", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "185--191", abstract = "In this paper, we describe two systolic arrays for computing the edit distance between two genetic sequences using a well-known dynamic programming algorithm. The systolic arrays have been implemented for the Splash 2 programmable logic array, and are intended to be used for database searching. Simulations indicate that the faster Splash 2 implementation can search a database at a rate of 12 million characters per second, several orders of magnitude faster than implementations of the dynamic programming algorithm on conventional computers.", } % Index Number: 29 @InProceedings{luk-lok:93, author = "W. Luk and V. Lok and I. Page", title = "Hardware Acceleration of Divide-and-Conquer Paradigms: a Case Study", key = "tool, app", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "192--201", abstract = "We describe a method for speeding up divide-and-conquer algorithms with a hardware coprocessor, using sorting as an example. The method employs a conventional processor for the {"}divide{"} and {"}merge{"} phases, while the {"}conquer{"} phase is handled by a purpose-built coprocessor. It is shown how transformation techniques from the Ruby language can be adopted in developing a family of systolic sorters, and how one of the resulting designs is prototyped in eight FPGAs on a PC coprocessor board known as CHS2x4 from Algotronix. The execution of the hardware unit is embedded in a sorting program, with the PC host merging the sorted sequences from the hardware sorter. The performance of this implementation is compared against various sorting algorithms on a number of PC systems.", } % Index Number: 30 @InProceedings{daalen-jeavons:93, author = "M. van Daalen and P. Jeavons and J. Shawe-Taylor", title = "A Stochastic Neural Architecture that Exploits Dynamically Reconfigurable {FPGA}s", key = "app", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "202--211", abstract = "In this paper we present an expandable digital architecture that provides an efficient real time implementation platform for large neural networks. The architecture makes heavy use of the techniques of bit serial stochastic computing to carry out the large number of required parallel synaptic calculations. In this design all real valued quantities are encoded on to stochastic bit streams in which the '1' density is proportional to the given quantity. The actual digital circuitry is simple and highly regular thus allowing very efficient space usage of fine grained FPGAs. Another feature of the design is that the large number of weights required by a neural network are generated by circuitry tailored to each of their specific values, thus saving valuable cells. Whenever one of these values is required to change, the appropriate circuitry must be dynamically reconfigured. This may always be achieved in a fixed and minimum number of cells for a given bit stream resolution.", } % Index Number: 31 @InProceedings{chan-schlag:93, author = "P. K. Chan and M. D. F. Schlag", title = "Architectural Tradeoffs in Field-Programmable-Device-Based Computing Systems", key = "tool", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, CA", pages = "152--161", abstract = "Reprogrammable Field-Programmable Gate Arrays (FPGAs) have enabled the realization of high performance and affordable reconfigurable computing engines. We examine the architectural tradeoffs involved in designing general purpose FPGA-based computing systems with field-programmable gate arrays and field- programmable interconnects. The fact that FPGAs provide both programmable logic and programmable interconnects raises numerous design issues that need to be considered with care. Factors that influence the tradeoffs are routability, rearrangeability, and speed.", } % Index Number: 32 @InProceedings{page-luk:91, author = "I. Page and W. Luk", title = "Compiling occam into {FPGA}s", key = "tool, app", booktitle = "FPGAs. International Workshop on Field Programmable Logic and Applications", year = "1991", month = sep, address = "Oxford, UK", editors = "W. R. Moore and W. Luk", pages = "271--283", abstract = "We describe a compiler which maps programs expressed in a subset of Occam into netlist descriptions of parallel hardware. Using Field-Programmable Gate Arrays to implement such netlists, problem-specific hardware can be generated entirely by a software process. Inner loops of time-consuming programs can be implemented as hardware and the less intensively-used parts of the program can be mapped into machine code by a conventional compiler. Software investment is protected since the same program can run entirely in software, entirely in hardware, or in a mixture of both. A single program can thus result in many implementations across a potentially wide cost-performance range. The compilation system has been used to generate inner-loops, hardware interfaces to real-world devices, systolic arrays, and complete microprocessors. In the near future we hope to have a proven version of the compiler, enabling us automatically to generate provably correct hardware implementations, including microprocessors, from higher-level specifications.", } % Index Number: 33 @InProceedings{champeau-pape:94, author = "J. Champeau and L. Le Pape and B. Pottier and S. Rubini and E. Gautrin and L. Perraudeau", title = "Flexible Parallel {FPGA}-Based Architectures with {ArMen}", key = "system, app", booktitle = "Proceedings of the 27th Hawaii International Conference on System Sciences", year = "1994", month = jan, address = "Wailea, HI", editor = "T. N. Mudge and B. D. Shriver", pages = "105--113", abstract = "ArMen is a parallel machine in which each node is coupled to an FPGA ring. The underlying idea is to complement an MIMD architecture with global coprocessors providing extra control and processing properties. The use of regular hardware patterns such as cellular automata or pipelines allows high level definitions of the coprocessors. The results are fast prototyping possibilities for specific applications such as image processing or industrial control. Basic realizations are described. Changing from an FPGA technology to a VLSI one provider benefits with respect to cost and performance, without any effort at the specification level. The MADMACS pattern generator can be used to fold several FPGA configurations into the same VLSI circuit.", } % Index Number: 34 @InProceedings{herpel-held:94, author = "H. J. Herpel and M. Held and M. Glesner", title = "A Design Methodology for the Conceptual Design of Application Specific Digital Processors in Mechatronic Systems", key = "tool", booktitle = "Proceedings of the 27th Hawaii International Conference on System Sciences", year = "1994", month = jan, address = "Wailea, HI", editor = "T. N. Mudge and B. D. Shriver", pages = "78--86", abstract = "This paper presents a methodology and a design environment to support validation and design space exploration for embedded systems including application specific digital signal processors prototyping. Our approach to heterogeneous system design is based on rapid prototyping integrated with a set of graphical design entry, synthesis, and analysis tools. System partitioning into a set of software and hardware modules is done at system description level. User guided and automated synthesis tools generate a fully functional prototype that can be connected to real world processes to verify system design and to estimate system performance.", } % Index Number: 35 @InProceedings{gebotys-gebotys:94, author = "C. H. Gebotys and R. J. Gebotys", title = "Application-Specific Architectures for Field-Programmable {VLSI} Technologies", key = "tool", booktitle = "Proceedings of the 27th Hawaii International Conference on System Sciences", year = "1994", month = jan, address = "Wailea, HI", editor = "T. N. Mudge and B. D. Shriver", pages = "124--130", abstract = "New field-programmable gate array (FPGA) technologies have increased the industrial interest in tools which map a DSP application and a set of performance constraints to a specific VLSI architecture. This paper presents an optimization methodology for mapping a DSP application and a set of performance constraints into an architecture targeted for FPGA technologies with user-programmable RAM blocks on chip. The target architecture supports multiple register files, multiple busses, complex types of functional units, and multichip implementation. The optimization methodology presented in this paper maps DSP applications to optimized register file architectures suitable for FPGAs using a number of different integer programming models. A new integer programming model is presented and used to minimize the number of busses required in the application-specific architectures. Results show that the optimization methodology provides architectures with 22% fewer bus connections than previous research in practical cpu times. For the first time this research provides industry with 1) a high level design optimization methodology that synthesizes application-specific DSP architectures for implementation in new field programmable VLSI technologies, and 2) a methodology to support fast prototyping of DSP applications using multiple FPGA chips.", } % Index Number: 36 @InProceedings{eldredge-hutchings:94, author = "J. G. Eldredge and B. L. Hutchings", title = "Density Enhancement of a Neural Network Using {FPGA}s and Run-Time Reconfiguration", key = "app,rtr", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1994", month = apr, address = "Napa, CA", pages = "180--188", abstract = "Run-time reconfiguration is a way of more fully exploiting the flexibility of reconfigurable FPGAs. The Run-Time Reconfiguration Artificial Neural network (RRANN) uses run-time reconfiguration to increase the hardware density of FPGAs. The RRANN architecture also allows large amounts of parallelism to be used and is very scalable. RRANN divides the back-stages and configures the FPGAs to execute only one stage at a time. The FPGAs are reconfigured as part of normal execution in order to change stages. Using reconfigurability in this way increases the number of hardware neurons a single Xilinx XC3090 can implement by 500%. Performance is effected by reconfiguration overhead, but this overhead becomes insignificant in large networks. This overhead is made even more insignificant with improved configuration methods. Run-time reconfiguration is a flexible realization of the time/space trade-off. The RRANN architecture has been designed and built using commercially available hardware, and its performance has been measured.", } % Index Number: 37 @InProceedings{bade-hutchings:94, author = "S. Bade and B. L. Hutchings", title = "{FPGA}-Based Stochastic Neural Networks: Implementation", key = "app", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1994", month = apr, address = "Napa, CA", pages = "189--198", abstract = "Reconfigurable Field-Programmable Gate Arrays (FPGAs) provide an effective programmable resource for implementing hardware-based Artificial Neural Networks (ANNs). They are low cost, readily available and reconfigurable--all important advantages for ANN applications. However, FPGAs lack the circuit density necessary to implement large parallel ANNs with many thousands of synapses. This paper presents an architecture that makes it feasible to implement large ANNs with FPGAs. The architecture combines stochastic computation techniques with a novel lookup-table-based architecture that fully exploits the lookup-table structure of many FPGAs. This lookup-table-based architecture is extremely efficient: it is capable of supporting up to two synapses per Configurable Logic Block (CLB). In addition, the architecture is simple to implement, self-contained (weights are stored directly in the synapse), and scales easily across multiple chips.", } % Index Number: 38 @InProceedings{wirthlin-gilson:94, author = "M. J. Wirthlin and B. L. Hutchings and K. L. Gilson", title = "The {Nano Processor}: {A} Low Resource Reconfigurable Processor", key = "system, app", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1994", month = apr, address = "Napa, CA", pages = "23--30", abstract = "Reconfigurable logic systems approach the performance of Application-Specific integrated Circuits (ASICs) while retaining much of the generality of conventional computing systems through reconfiguration. Unfortunately, the development of these systems, unlike conventional software systems, is hardware intensive, requiring significant hardware development time. One way to introduce a more flexible development approach is to implement a customizable stored-program processor. For a given application, the designer can develop customized hardware to increase performance and then control the sequencing and operation of this hardware with software. Development time can be significantly reduced because conventional software development tools, e.g., assemblers and compilers, can be used to quickly develop new applications on the customized processor. This paper presents the Nano Processor (nP), a fully customizable reconfigurable processor, together with its integrated assembler, that has been successfully implemented on the Xilinx 3000 series Field Programmable Gate Arrays (FPGA).", } % Index Number: 39 @InProceedings{eldredge-hutchings:94b, author = "J. G. Eldredge and B. L. Hutchings", title = "{RRANN}: The Run-Time Reconfiguration Artificial Neural Network", key = "app,rtr", booktitle = "Custom Integrated Circuits Conference", year = "1994", month = may, address = "San Diego, CA", pages = "77--80", abstract = "Run-time reconfiguration is a way of more fully exploiting the flexibility of reconfigurable FPGAs. The Run-Time Reconfiguration Artificial Neural Network (RRANN) uses run-time reconfiguration to increase the hardware density of FPGAs. This is done by dividing the backpropagation algorithm into three sequentially executed stages and configuring the FPGAs to execute only one stage at a time. The FPGAs are reconfigured as part of normal execution in order to change stages. Using reconfigurability in this way increases the number of hardware neurons a single FPGA can implement by 500%. The RRANN architecture has been designed and built using commercially available hardware, and its performance has been measured.", } % Index Number: 40 @InProceedings{eldredge-hutchings:94c, author = "J. G. Eldredge and B. L. Hutchings", title = "{RRANN}: {A} Hardware Implementation of the Backpropagation Algorithm Using Reconfigurable {FPGA}s", key = "app,rtr", booktitle = "IEEE World Conference on Computational Intelligence", year = "1994", month = jun, address = "Orlando, FL", } % Index Number: 41 @InProceedings{furtek-stone:90, title = "Labyrinth: {A} Homogeneous Computational Medium", year = "1990", booktitle = "Proceedings of the IEEE Custom Integrated Circuits Conference", key = "device", author = "F. Furtek and G. Stone and I. Jones", month = may, pages = "31.1.1--31.1.4", abstract = "As a RAM-based reconfigurable logic array, Labyrinth provides the flexibility and malleability of software with the performance of a dedicated circuit. With a single bit register, and a half adder per cell, the architecture is optimized for register intensive, massively parallel algorithms. The fine-grained, highly-symmetric architecture scales very naturally and facilitates compact circuit layouts. A 64-cell test chip has been successfully built and tested, and a 4,096-cell chip is in the final stages of preparation for fabrication.", } % Index Number: 42 @InBook{brown-francis:92, author = "S. D. Brown and R. J. Francis and J. Rose and Z. Vranesic", title = "Field-Programmable Gate Arrays", publisher = "Kluwer Academic Publishers", chapter = "1", year = "1992", } % Index Number: 43 @Article{cox-blanz:92, author = "C. E. Cox and W. E. Blanz", title = "{GANGLION} - a fast field-programmable gate array implementation of a connectionist classifier", key = "system", journal = "IEEE Journal of Solid-State Circuits", volume = "27", number = "3", pages = "288--299", year = "1992", month = mar, abstract = "The architecture, implementation, and application of GANGLION, a totally digital connectionist classifier, are described. This fully interconnected feedforward net with one hidden layer is capable of generating 4.48 billion interconnection/s. The architecture is realized on a single 9U VME card and is built entirely from off-the-shelf components. The very high throughput of 20 million decision/s is achieved by making efficient use of field- programmable gate arrays. Specifically, the authors take advantage of the reprogrammability of the devices to automatically generate new custom hardware for each application of the classifier.", } % Index Number: 44 @Article{rose-francis:1990, author = "J. Rose and R. Francis and D. Lewis and P. Chow", title = "Architecture of Field-Programmable Gate Arrays: The Effect of Logic Block functionality on Area Efficiency", key = "device", journal = "IEEE Journal of Solid State Circuits", year = "1990", volume = "25", number = "5", pages = "1217--1225", month = oct, abstract = "This paper examines the relationship between the functionality of a field-programmable gate array (FPGA) logic block and the area required to implement digital circuits using that logic block. This investigation is done experimentally by implementing a set of industrial circuits as FPGA's using CAD tools for technology mapping, placement, and routing. Unsing a simple model of the interconnection and logic block area, a range of programming technologies (the method of FPGA customization) is explored. The experiments are based on logic blocks that use lookup tables for implementing combinational logic. Results indicate that the best number of inputs to use (a measure of the block's functionality) is between three and four, and that a D flip-flop should be included in the logic block. These results are largely independent of the programming technology. More generally, it was observed that the area efficiency of a logic block depends not only on its functionality but on the average number of pins connected per logic block. It is shown that as the number of connected pins per block increases, the number of wiring tracks required to route those blocks also increases. Since adding functionality to a block will lead to an increase in the number of connected pins, it follows that an increase in functionality of the block is only beneficial if the total number of blocks is reduced to more than compensate for the increased wiring area. This notion leads to the conclusion that the most area-efficient logic blocks are those with a high amount of functionality per pin.", } % Index Number: 45 InProceedings{guccione-gonzales:93b, author = "S. A. Guccione and M. J. Gonzalez", title = "A neural network implementation using reconfigurable architectures", key = "app", booktitle = "More {FPGAs}: Proceedings of the 1993 International workshop on field-programmable logic and applications", year = "1993", month = sep, address = "Oxford, England", pages = "443--451", editor = "W. Moore and W. Luk", abstract = "Several architectures based on Field Programmable Gate Arrays (FPGAs) have recently been introduced. These machines have demonstrated a high level of performance for a variety of problems. Despite this success, software development on these systems is generally limited to hardware description languages. One programming model that has been proposed for use with reconfigurable architectures is the vector based data parallel model. This paper describes the implementation of a multi-layer feed-forward neural network using a vector based data parallel approach. The algorithm is described using a subset of the C programming language. This description is translated into a circuit which may be programmed into the FPGA based processor.", } %date of publication: 1994 % Index Number: 46 @InProceedings{erdogan-hong:93, author = "S. S. Erdogan and T. H. Hong", title = "Massively Parallel back-propagation algorithm using the reconfigurable machine", key = "app", booktitle = "World Congress on Neural Networks `93", year = "1993", address = "Portland, Oregon", pages = "4:861--864", abstract = "The potential of Artificial Neural Networks (ANNs) can be realized with successful mapping of these algorithms to massively parallel architectures which can optimize their intensive computational requirements. The Reconfigurable Machine (RM) is a parallel architecture which is built using Xilinx's 4005 Field Programmable Gate Array (FPGA) chips. Various popular neural models are currently being described using VHSIC Hardware Description Language (VHDL) to be mapped onto the RM. The logic synthesis and optimization tools for VHDL allow automatic generation of the target architecture for RM. In this paper, a fully parallel implementation of a fully connected three-layer Back-Propogation (BP) is studied. The mapping encompasses both the forward and backward passes. A novel approach based on weight duplication during learning allows a fully parallel implementation.", } % Index Number: 47 % Index Number: 48 @InProceedings{wazlowski-agarwal:93, author = "M. Wazlowski and L. Agarwal and T. Lee and A. Smith and E. Lam and P. Athanas and H. Silverman and S. Ghosh", title = "{PRISM-II} Compiler and Architecture", key = "system, tool", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1993", month = apr, address = "Napa, California", pages = "9--16", abstract = "This paper discusses the architecture and compiler for a general-purpose metamorphic computing platform called PRISM-II. PRISM-II improves the performance of many computationally- intensive tasks by augmenting the functionality of the core processor with new instructions that match the characteristics of targeted applications. In essence, PRISM is a general purpose hardware platform that behaves like an application-specific platform. Two methods for hardware synthesis, one using the VHDL Designer and the other using X-BLOX, are presented and synthesis results are compared.", } % Index Number: 49 @InProceedings{agarwal-wazlowski:94, author = "L. Agarwal and M. Wazlowski and S. Ghosh", title = "An asynchronous approach to efficient execution of programs on adaptive architectures utilizing {FPGA}s", key = "system, tool", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1994", month = apr, address = "Napa, California", pages = "101--110", abstract = "PRISM, a computer architecture consisting of a general-purpose core processor and a reconfigurable FPGA platform, was designed to bridge the gap between general-purpose and specialized computers. The proof-of-concept system, PRISM-I, suffers from several limitations, principal among them being: single bus-cycle restriction on the evaluation time of the function synthesized on an FPGA, inability to execute loops with dynamic loop-counts, and inefficient execution of control constructs such as {"}if-then- else{"}. This paper presents a novel execution model in PRISM-II, that addresses the above limitations in a general manner. Also presented is a new framework for translating a C function into an FPGA-based custom architecture.", } % Index Number: 50 @InProceedings{lopresti:91, author = "D. P. Lopresti", title = "Rapid implementation of a genetic sequence comparator using field-programmable gate arrays", key = "app", booktitle = "Advanced Research in {VLSI}: Proceedings of the 1991 University of California/Santa Cruz Conference", pages = "138--152", year = "1991", month = mar, address = "Santa Cruz, CA", editor = "C. Sequin", abstract = "This paper describes the implementation of a parallel algorithm for sequence comparison on the SPLASH programmable logic array. The algorithm, originally developed for a custom VLSI chip, has applications in molecular genetics and runs faster on SPLASH than it does on supercomputers. I discuss details of the problem and its systolic solution, the SPLASH architecture and design environment, and the implementations currently running on SPLASH.", } % Index Number: 51 @InProceedings{fawcett:93, author = "B. K. Fawcett", title = "Applications of Reconfigurable Logic", key = "app", booktitle = "More {FPGAs}: Proceedings of the 1993 International workshop on field-programmable logic and applications", year = "1993", month = sep, address = "Oxford, England", pages = "57--69", editor = "W. Moore and W. Luk", abstract = "Logic implemented in an SRAM-based FPGA is reconfigurable; that is, changes can be made to the system's logic functions by reprogramming the FPGA(s) in the system. Examples are cited of systems that make use of this in-system reconfigurability. These applications can be divided into three main categories based on how the FPGA's reconfigurability is applied: systems with built-in diagnostics, adaptable system designs, and systems with multi-purpose hardware.", } % Index Number: 52 @Proceedings{fpgas:91, title = "{FPGAs}: Proceedings of the 1991 International workshop on field-programmable logic and applications", editor = "W. Moore and W. Luk", publisher = "Abingdon EE and CS Books", address = "Oxford, England", month = sep, year = "1991", } % Index Number: 53 @Proceedings{fpgas:93, title = "More {FPGAs}: Proceedings of the 1993 International workshop on field-programmable logic and applications", editor = "W. Moore and W. Luk", publisher = "Abingdon EE and CS Books", address = "Oxford, England", month = sep, year = "1993", } %year of publication: 1994 % Index Number: 54 @Proceedings{fpgas:92, title = "{FPGAs}: Proceedings of the 1992 International workshop on field-programmable logic and applications", editor = "H. Grunbacher and R. Hartenstein", publisher = "Spinger-Verlag", address = "Vienna, Austria", month = sep, year = "1992", } % Index Number: 55 @Proceedings{fccm:93, title = "Proceedings of {IEEE} Workshop on {FPGA}s for Custom Computing Machines", year = "1993", month = apr, address = "Napa, CA", editor = "D. A. Buell and K. L. Pocek", } % Index Number: 56 @Proceedings{fccm:94, title = "Proceedings of {IEEE} Workshop on {FPGA}s for Custom Computing Machines", year = "1994", month = apr, address = "Napa, CA", editor = "D. A. Buell and K. L. Pocek", } % Index Number: 57 @InProceedings{lysaght-dunlop:93, author = "P. Lysaght and J. Dunlop", title = "Dynamic Reconfiguration of {FPGA}s", key = "system, app, rtr", booktitle = "More {FPGAs}: Proceedings of the 1993 International workshop on field-programmable logic and applications", year = "1993", month = sep, address = "Oxford, England", pages = "82--94", editor = "W. Moore and W. Luk", abstract = "This paper considers the dynamic reconfiguration of those cellular Field Programmable Gate Arrays (FPGAs) that employ static memory to store their device configuration data. A FPGA is calssified as dynamcically reconfigurable if it can be partially reconfigured while active. The circuits on the device that are not included in the selective reconfiguraion must continue to operate without interruption. Dynamically reconfigurable FPGAs form a new class of logic which suggests new methods of digital system synthesis and realisation with the potential for significant advantages relatvie to current systems. This paper investigates the importance of dynamic reconfiguration and introduces self-controlling, dynamically reconfigurable systems and the concept of Logic Caching.", } % Index Number: 58 @InProceedings{dehon:94, author = "A. DeHon", title = "{DPGA}-Coupled Microprocessors: Commodity {IC}s for the Early 21st Century", key = "device, system", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1994", month = apr, address = "Napa, CA", pages = "31--39", abstract = "During the past decade the microprocessor has become a key commodity component for building all kinds of computational systems. During this time frame large, reconfigurable logic arrays have exploited the same advances in IC fabrication technology to emerge as viable system building blocks. Looking at both the technology prospects and application requirements, there is compelling evidence that microprocessors with integrated reconfigurable logic arrays will be a primary building block for future computing systems. In this paper, we look at the role such components can play in building high-performance and economical systems, as well as the ripe technological outlook. We note how he tight integration of reconfigurable logic into the processor can overcome some of the major limitations of contemporary, attached reconfigurable compute engines. We specifically consider the use of integrated Dynamically Programmable Gate Array structures for the configurable logic and examine the advantages rapid reconfiguration provides in this application.", } % Index Number: 59 @Article{trimberger:93, author = "S. Trimberger", title = "A Reprogrammable Gate Array and Applications", key = "device", journal = "Proceedings of the IEEE", pages = "1030--1041", month = jul, year = "1993", abstract = "A field programmable gate array (FPGA) can implement thousands of gates of logic, has no up-front fixed costs, and can be programmed in a few minutes by users at their site. This paper describes an FPGA that is programmed by writing into on-chip static memory. This kind of FPGA can be reprogrammed any number of times, providing a versatile platform for rapid hardware implementation. Reprogrammable technology allows software-like design methodologies to be applied to logic design. This paper describes the construction of this kind of FPGA, design tradeoffs, and examples of applications that take advantage of reprogrammblilty.", } % Index Number: 60 @InProceedings{depreitere-neefs:94, author = "J. Depreitere and H. Neefs and H. Van Marck and J. Van Campenhout and R. Baets and B. Dhoedt and H. Thienpont and I. Veretennicoff", title = "An Optoelectronic {3-D} Field Programmable Gate Array", key = "device", booktitle = "Field-Programmable Logic: Architectures, Synthesis and Applications. 4th International Workshop on Field-Programmable Logic and Applications", pages = "352--360", month = sep, year = "1994", address = "Prague, Czech Republic", editor = "R. Hartenstein and M. Z. Servit", publisher = "Springer-Verlag", abstract = "Traditional Field-Programmable Gate Arrays suffer from a lack of routing resources when implementing complex logic designs. This paper proposes two possible improvements to the FPGA structure that could alleviate these problems. We suggest extending the FPGA class to 3-D architectures. The 3-D architectures could be constructed of a stack of optically interconnected 2-D planes. Furthermore, we suggest a hierarchical distribution of routing resources that closely matches the wire length distributions of the intended class of applications.", } % Index Number: 61 @Article{rose-brown:1991, author = "J. Rose and S. Brown", title = "Flexibility of Interconnection Structures for Field-Programmable Gate Arrays", key = "device", journal = "IEEE Journal of Solid State Circuits", year = "1991", volume = "26", number = "3", pages = "277--282", month = mar, abstract = "This paper explores the relationship between the routability of a field-programmable gate array (FPGA) and the flexibility of its interconnection structures. The flexibility of an FPGA is determined by the number and distribution of switches used in the interconnection. While good routability can be obtained with a high flexibility, a large number of switches will result in poor performance and logic density because each switch has significant delay and area. The minimum number of switches required to achieve good routability is determined by implementing several industrial circuits in a variety of interconnection architectures. These experiments indicate that high flexibility is essential for the connection block that joins the logic blocks to the routing channel, but a relatively low flexibility is sufficient for switch blocks at the junction of horizontal and vertical channels. Furthermore, it is necessary to use only a few more routing tracks than the absolute minimum possible with structures of surprisingly low flexibility.", } % Index Number: 62 @MastersThesis{eldredge:93, author = "J. G. Eldredge", title = "{FPGA} Density enhancement of a neural network through run-time reconfiguration", key = "app,rtr", school = "Brigham Young University", address = "Provo, UT", month = dec, year = "1993", } % Index Number: 63 @InProceedings{lazarus-meyer:93, author = "R. B. Lazarus and F. M. Meyer", title = "Realization of a Dynamically Reconfigurable Preprocessor", key = "system, app", booktitle = "Proceedings of the IEEE 1993 National Aerospace and Electronics Conference. {NAECON} 1993", year = "1993", month = aug, address = "Dayton, OH", pages = "74--80", abstract = "Recent advances in configurable logic technology provide sufficient processing density and bandwidth to directly implement image and signal processing algorithms in digital hardware. Our research demonstrates the feasibility of employing field programmable gate arrays (FPGAs) to realize high-speed algorithm- specific processing architectures for avionic signal processing applications. Architectures composed of FPGAs provide a low-cost and flexible alternative to custom hard-wired preprocessors and a lower-cost, physically smaller alternative to massively parallel processors (both SIMD and MIMD Machines). Algorithm segments which require processing hundreds of millions of operations per second have been mapped into a single FPGA device. This technology may ultimately fill a range of processing requirements in the areas of radar and communication processing as well as image enhancement applications. The application of configurable logic devices allows realization of processing architectures to efficiently compute low-level algorithmic functions, or segments. Reconfiguration of FPGAs to implement several algorithm segments is analogous to selecting subroutines to form a software algorithm suite in a conventional processor, since it can be accomplished without hardware modification. Specific architecture configurations corresponding to algorithm segments can be chosen from a library and immediately configured in hardware to realize the same algorithm suite that could be realized in software, but with greatly enhanced processing performance (typically two orders of magnitude). For example, the processing architecture can be reconfigured to realize an algorithm segment with a 5X5 filter window instead of 3X3 window, or replace a median filter segment with a morphological filter segment.", } %is this the right date? CONF LOCATION: Dayton, OH, USA; 24-28 May 1993 %is this the right address? PUBLISHER: IEEE; New York, NY, USA % Index Number: 64 @InProceedings{hill-woo:93, author = "D. Hill and N.-S. Woo", title = "The Benefits of Flexibility in Lookup Table-Based {FPGA}s", key = "device", booktitle = "IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems", volume = "12", year = "1993", month = feb, address = "USA", pages = "349--353", abstract = "FPGAs need not be limited to a single fixed-size truth table in each block. This paper discusses the utility of allowing each block's single large table (e.g., one 5-input, 32-bit table ) to be reconfigured into smaller table (e.g., eight 4-bit tables). Results describing the efficiency of packing some standard benchmark circuits into various configurations are presented and the cost/benefits discussed. We show that a logic block containing four lookup tables, each of which is 8-bit RAM, is the best choice if only the area efficiency is considered. We also show that if circuit speed is considered, a logic block, containing two lookup tables, each of which contains 16 bits of RAM, is the best choice.", } % Index Number: 65 @InProceedings{maki-whitaker:91, author = "G. Maki and S. Whitaker and G. Ganesh", title = "A Reconfigurable Data Path Processor", key = "system, app", booktitle = "Proceedings of the Fourth Annual IEEE International ASIC Conference and Exhibit", year = "1991", month = sep, address = "Rochester, NY", pages = "P18--4.1--4.4", abstract = "A configurable data path processor is presented which can be modified to optimize performance. FPGA, PLA and PAL devices provide a great amount of flexibility to realize arbitrary control functions. The new processor is specifically designed for arbitrary data path operations and can be dynamically reconfigured.", } % Index Number: 66 @InProceedings{korpiharju-viitanen:91, author = "T. Korpiharju and J. Viitanen and H. Kiminkinen and J. Takala and K. Kaski", title = "{TUTCA} Configurable Logic Cell Array Architecture", key = "device", booktitle = "Proceedings of the Fourth Annual IEEE International ASIC Conference and Exhibit", year = "1991", month = sep, address = "Rochester, NY", pages = "P3--3.1--3.4", abstract = "A processor array architecture based on dynamically configurable logic cell array is designed to contain an 8X8 array of processing units. This array is expandable to construct larger arrays by combining chips together in a matrix. The configuration data for the processing units is loaded parallel into an internal configuration RAM to enable quick reconfiguration for a new task.", } % Index Number: 67 @InProceedings{luk:94, author = "W. Luk and T. Wu and I. Page", title = "Hardware-Software Codesign of Multidimensional Programs", key = "tool, app", booktitle = "Proceedings of IEEE Workshop on {FPGA}s for Custom Computing Machines", editor = "D. A. Buell and K. L. Pocek", year = "1994", month = apr, address = "Napa, CA", pages = "82--90", abstract = "We present a method for parametrised partitioning of multidimensional programs for acceleration using a hardware coprocessor. The method involves a divide-and-conquer structure, with the `divide' and `merge' phases carried out by a general-purpose processor while the `conquer' phase is handled by application-specific-hardware. The partitioning strategy has been captured in a simple functional language, and we have automated the production of partitioned programs in this language. Our approach has been tested on an FPGA-based system using a number of computer vision algorithms, including the Canny edge detector, and the performance is compared against executing the programs on the PC host.", } % Index Number: 68 @InProceedings{bolotski-dehon:94, author = "{M. Bolotski, A. DeHon} and Jr. T. F. Knight", title = "Unifying {FPGA}s and {SIMD} Arrays", key = "device, system", booktitle = "FPGA '94 -- 2nd International ACM/SIGDA Workshop on FPGAs", year = "1994", address = "Berkeley, CA", month = mar, pages = "1--10", abstract = "Field-Programmable Gate Arrays (FPGAs) and Single-Instruction Multiple-Data (SIMD) processing arrays share many architectural features. In both architectures, an array of simple, fine- grained logic elements is employed to provide high-speed customizable, bit-wise computation. In this paper, we present a unified computational array model which encompasses both FPGAs and SIMD arrays, Within this framework, we examine the differences and similarities between these array structures and touch upon techniques and lessons which can be transfered between the architectures. The unified model also exposes promising prospects for hybrid array architectures. We introduce the Dynamically Programmable Gate Array which combines the best features from FPGAs and SIMD arrays into a single array architecture.", } % Index Number: 69 @InProceedings{barros-akil:92, author = "M. Alves De Barros and M. Akil", title = "Study and implementation of a real time 3*3 programmable convolver with reconfigurable technology", key = "app", booktitle = "Euro ASIC '92", year = "1992", pages = "392--395", publisher = "IEEE Computer Society Press", abstract = "The authors describe problems concerning the implementation of 2D convolution algorithms using reconfigurable technology. An approach for the automatic design of specific architectures in this technology is discussed. The Xilinx programmable gate array (PGA) resources are presented. The authors consider specially their time and area limits. They present an implementation of a real time 3*3 programmable convolver with Xilinx XC 3090 PGA.", } % Index Number:70 @InProceedings{hauck-borriello:92, author = "{S. Hauck, G. Borriello, S. Burns} and C. Ebeling", title = "{MONTAGE}: An {FPGA} for Synchronous and Asynchronous Circuits", key = "device", booktitle = "Field-Programmable Gate Arrays: Architectures and Tools for Rapid Prototyping. Second International Workshop on Field Programmable Logic and Applications", year = "1992", address = "Vienna, Austria", month = aug, page = "44-51", abstract = "Field-programmable gate arrays are frequently used to implement system interfaces and glue logic. However, there has been little attention given to the special problems of these types of circuits in FPGA architectures. In this paper we describe Montage, a Triptych-based FPGA designed for implementing asynchronous logic and interfacing separately-clocked synchronous circuits. Asynchronous circuits have different requirements than synchronous circuits, which make standard FPGAs unusable for asynchronous applications. At the same time, many asynchronous design methodologies allow components with greatly different performance to be substituted for one another, making a design environment which migrates between FPGA, MPGA, and semi-custom implementations very attractive. Similar problems also exist for interfacing separately-clocked synchronous circuits. We discuss these problems, and demonstrate how the Montage FPGA satisfies the demands of these classes of circuits.", } %above is the year of the conference. Year of publication:1993 %conference location: Vienna, Austria;publisher loc.: Berlin, Germany %conference dates: 31 Aug.-2 Sept. 1992 % Index Number: 71 @InProceedings{wu-perkowski:92, author = "L.-F. Wu and M. A. Perkowski", title = "Minimization of Permuted Reed-Muller Trees for Cellular Logic Programmable Gate Arrays", key = "tool", booktitle = "Field-Programmable Gate Arrays: Architectures and Tools for Rapid Prototyping. Second International Workshop on Field Programmable Logic and Applications", year = "1992", address = "Vienna, Austria", month = aug, pages = "78--87", abstract = "The new family of Field Programmable Gate Arrays CLI6000 from Concurrent Logic Inc realizes the truly Cellular Logic. It has been mainly designed for the realization of data path architectures. However, introduced by it new universal logic cell calls also for new logic synthesis methods based on approximate, for the minimization of Permuted Reed-Muller Trees that are obtained by repetitive application of Davio expansions (Shannon expansions for EXOR gates) in all possible orders of variable in subtrees. Such trees are particularly well matched to both the realization of logic cell and connection structure of the CLI6000 device. It is shown on several standard benchmarks that the heuristic algorithm gives good quality results in much less time than the exact algorithm.", } %above is the year of the conference; year of publication:1993 %above is the place of the conf.; place of publication:Berlin, Germany %date of conference: 31 Aug.-2 Sept. 1992 % Index Number: 72 @InProceedings{hill-britton:92, author = "{D. Hill, B. Britton, B. Oswald, N.-S. Woo, S. Singh, C.-T. Chen} and B. Krambeck", title = "{ORCA}: {A} New Architecture for High-Performance {FPGA}s", key = "device", booktitle = "Field-Programmable Gate Arrays: Architectures and Tools for Rapid Prototyping. Second International Workshop on Field Programmable Logic and Applications", year = "1992", address = "Vienna, Austria", month = aug, pages = "52--60", abstract = "AT&T's ORCA (Optimized Reconfigurable Cell Array) architecture extends FPGA applicability into a larger domain than is possible with today's parts, including datapath intensive designs such as memory controllers, signal processing parts, and telecommunication interfaces. Key to the suitability of the ORCA for these jobs is the fact that each of its basic blocks is capable of processing four bits. So, for example, a 16 bit adder requires exactly 4 blocks, not 9 or 16 as in other architectures. Yet the total complexity of each block is comparable to other current parts, thus yielding a significant improvement in functional density.", } %above is the year of the conference; year of publication:1993 %above is the place of the conf.; place of publication:Berlin, Germany %date of conference: 31 Aug.-2 Sept. 1992 % Index Number: 73 @InProceedings{kempa-jung:92, author = "G. Kempa and P. Jung", title = "{FPGA} Based Logic Synthesis of Squarers Using {VHDL}", key = "tool, app", booktitle = "Field-Programmable Gate Arrays: Architectures and Tools for Rapid Prototyping. Second International Workshop on Field Programmable Logic and Applications", year = "1992", address = "Vienna, Austria", month = aug, pages = "112--123", abstract = "In this paper, the design of VHDL coded squarers by using logic synthesis is considered. The square function is important for the digital processing of signals using e.g. matched filters and Viterbi equalizers in receivers for communication systems. However, many arithmetical functions like the square function are not supported by VHDL. Hence, two major drawbacks arise in the logic synthesis of VHDL code. Firstly, the designers are forced to implement the needed arithmetical functions in VHDL by themselves. Secondly, when implementing arithmetical functions such as the square function in VHDL, special care must by taken in order to circumvent massive hardware overhead of the synthesis results compared with manually designed architectures. In the case of the square function, this massive hardware overhead mainly stems from the fact that the synthesis results of squarers are as hardware expensive as the synthesis results of multipliers. In the course of the present paper the authors shall demonstrate how this hardware overhead of squarers can be reduced by using a modified square algorithm (MSA) which was developed by the authors. The MSA was derived based on the Dadda algorithm which will be discussed briefly.", } %above is the year of the conference; year of publication:1993 %above is the place of the conf.; place of publication:Berlin, Germany %date of conference: 31 Aug.-2 Sept. 1992 % Index Number: 74 @InProceedings{heeb-pfister:92, author = "B. Heeb and C. Pfister", title = "Chameleon: {A} Workstation of a Different Colour", key = "system, app", booktitle = "Field-Programmable Gate Arrays: Architectures and Tools for Rapid Prototyping. Second International Workshop on Field Programmable Logic and Applications", year = "1992", address = "Vienna, Austria", month = aug, pages = "152--161", abstract = "Chameleon is an experimental workstation based on a RISC processor. It provides unprecedented flexibility and speed for certain applications due to the use of RAM-configurable Field Programmable Gate Arrays (FPGAs). FPGAs are used to replace glue logic as well as to provide a non-dedicated computation resource. This resource can be regarded as a general purpose coprocessor which can be reconfigured and thus transformed into a special purpose coprocessor in milliseconds at run-time. The coprocessor can be used both for handling complex input/output functions as well as to replace time critical inner loops of user programs running on the central processing unit. Chameleon radically relies on FPGAs for all input/output functions. It serves as a means to probe the limits of FPGA usage while at the same time being the development system for its own FPGA circuits.", } %above is the year of the conference; year of publication:1993 %above is the place of the conf.; place of publication:Berlin, Germany %date of conference: 31 Aug.-2 Sept. 1992 % Index Number: 75 @InProceedings{brunvand:92, author = "E. Brunvand", title = "Using {FPGA}s to Prototype a Self-Timed Computer", key = "app", booktitle = "Field-Programmable Gate Arrays: Architectures and Tools for Rapid Prototyping. Second International Workshop on Field Programmable Logic and Applications", year = "1992", address = "Vienna, Austria", month = aug, pages = "192--198", abstract = "The NSR (non-synchronous RISC) architecture is an architecture for a general purpose processor structured as a collection of self-timed blocks that operate concurrently and communicate over bundled data channels in the style of micropipelines. A 16- bit version of the NSR architecture has been implemented using Actel field programmable gate arrays (FPGAs). Each of the major components of the NSR is implemented using one or two Actel FPGA chips using a library of self-timed circuit modules. This prototype implementation is being used to gain experience with the NSR architecture and to gather statistics about the architectural choices. The Actel FPGAs have proven to be extremely useful in quickly prototyping this novel computer architecture.", } %above is the year of the conference; year of publication:1993 %above is the place of the conf.; place of publication:Berlin, Germany %date of conference: 31 Aug.-2 Sept. 1992 % Index Number: 76 @InProceedings{shaw-milne:92, author = "P. Shaw and G. Milne", title = "A Highly Parallel {FPGA}-Based Machine and its Formal Verification", key = "system, app", booktitle = "Field-Programmable Gate Arrays: Architectures and Tools for Rapid Prototyping. Second International Workshop on Field Programmable Logic and Applications", year = "1992", address = "Vienna, Austria", month = aug, pages = "162--173", abstract = "The SPACE machine is introduced as a new type of computer architecture, capable of very fast simulation of highly concurrent systems. The machine is designed to be scalable, constructed from a vast array of boards. The decisions made in the design of the board are discussed, and the actual hardware (based on an array of Field Programmable Gate Array chips) is described. It is shown that this machine can be programmed by translating a subset of the Occam language into asynchronous modules. Using the Circal process algebra, a new method of formally verifying asynchronous modules for these circuits is presented. This method allows bounded gate delays to be included in a two-level modelling mechanism.", } %above is the year of the conference; year of publication:1993 %above is the place of the conf.; place of publication:Berlin, Germany %date of conference: 31 Aug.-2 Sept. 1992 % Index Number: 77 @Article{howard-taylor:92, author = "N. Howard and R. W. Taylor", title = "Reconfigurable logic: technology and applications", key = "app", journal = "Computing \& Control Engineering Journal", volume = "3", number = "5", month = sep, pages = "235--240", year = "1992", abstract = "Field programmable gate arrays (FPGAs) have the potential to revolutionise the design of modern computer systems. With the current generation of reconfigurable arrays, the distinction between hardware, software and firmware blurs, permitting the designer to mix and match according to application. The promise of logic systems that can be configured in fractions of a second holds out the very real possibility of designing adaptive hardware-machines that can optimise themselves for their environment. The article introduces the internal design of an ultra-fine grain FPGA family, the Plessey/Pilkington ERA, (e.g. ERA60100) and briefly compares this with other families. The integrated design synthesis for mixed hardware/software systems is discussed and the article concludes with two examples of FPGA systems. One is a distributed instrumentation system for process tomography (the use of non-invasive sensors to measure and reconstruct the internal state of an industrial system), the other is a fast cellular automata machine.", } % Index Number: 78 @Article{cockshott-shaw:92, author = "P. Cockshott and P. Shaw and P. Barrie and G. J. Milne", title = "Scalable cellular array architecture", key = "system, tool, app", journal = "Computing \& Control Engineering Journal", volume = "3", number = "5", pages = "219--224", month = sep, year = "1992", abstract = "A new class of universal computers, cellular array machines, is introduced. The development of reprogrammable logic leading up to the development of field programmable gate arrays is reviewed. An architecture for constructing modular and scalable general-purpose computers from reprogrammable logic is presented. It is shown that such a machine can be programmed by translating Occam into asynchronous digital logic circuits. Examples are given of circuit components which implement Occam operators.", } % Index Number: 79 @InProceedings{sueyoshi-apduhan:92, author = "T. Sueyoshi and B. O. Apduhan and S. Funakoshi and I. Arita", title = "A new approach towards realization of reconfigurable interconnection networks", key = "system", booktitle = "Eleventh Annual International Phoenix Conference on Computers and Communications", pages = "456--463", year = "1992", abstract = "A new approach to realize a reconfigurable interconnection network which is the key factor in constructing reconfigurable parallel computers, utilizing the reconfigurability features of a field programmable gate array (FPGA), is presented. The organization of the reconfigurable interconnection network and the mapping strategy for static and dynamic networks are discussed. Mapping examples are included. The control mechanism and interface that make it possible to implement the optimum interconnection topologies for interprocessor communication patterns on the interconnection network for the efficient execution of application programs on a multiprocessor system are outlined. The system organization of a reconfigurable interconnection network for a massively parallel multiprocessor is described.", } % Index Number: 80 @InProceedings{albaharna-cheung:94, author = "O. T. Albaharna and P. Y. K. Cheung and T. J. Clarke", title = "Area \& Time Limitiations of {FPGA}-based Virtual Hardware", key = "device", booktitle = "Proceedings of the IEEE International Conference on Computer Design", pages = "184--189", location = "Cambridge, Massachusetts", month = oct, year = "1994", abstract = "This paper examines the limitations of integrating programmable logic with a powerful core processor on the same die. An abstract model to investigate the area and delay of Field Programmable Gate Array architectures is presented. The model is used to show that a system implemented on FPGAs will require as much as 100 times more die area than its custom VLSI implementation and would be about 10 times slower. Our analysis shows that this high cost, inherent to the current FPGA-based architectures, is a severe limitation to virtual hardware development. A new approach is needed to deliver high computational speed-ups comparable to multiple processor systems with the same total die area.", } % Index Number: 81 @InProceedings{jhitta:93, author = "M. S. Jhitta", title = "Introduction of a New {FPGA} Architecture", key = "device", booktitle = "More FPGAs. Oxford International Workshop on Field-Programmable Logic and Applications", pages = "13--23", month = aug, year = "1993", address = "Oxford, England", editor = "W. Moore and W. Luk", publisher = "Abingdon EE\&CS Books", abstract = "This paper outlines the architecture of the fourth generation Dynamically Programmable Logic Device (DPLD), a high performance, fine grain, SRAM field programmable architecture developed by Pilkington Micro-electronics. The changes and enhancements made are based on extensive market research and evaluation/benchmarking. The resulting architecture specification differs significantly from previous generations, with an enhanced cell structure and improved routing structures.", } % Index Number: 82 @InProceedings{dehon:91, author = "Andr\'{e} DeHon", title = "Practical Schemes for Fat-Tree Network Construction", key = "device", booktitle = "Advanced Research in {VLSI}: Proceedings of the 1991 University of California/Santa Cruz Conference", year = "1991", month = mar, address = "Santa Cruz, CA", editor = "C. Sequin", pages = "307--322", abstract = "As multiprocessor computer networks are scaled to support thousands and millions of processors, we must exploit locality in order to avoid uniform degradation in network performance. Fat- tree networks offer a topology that theoretically scales arbitrarily while allowing the exploitation of considerable locality. In this paper, I present a scheme for constructing practical fat-tree networks. Integrating expanders for redundant multipath switching networks, I incorporate fault-tolerance into the fat-tree network. I present primitive building blocks for the construction of these networks and describe how these building blocks can be synthesized using current technology. I also present organizational structures for composing these primitives into arbitrarily large networks. This synthesis results in a practical scheme for building large-scale, high-performance multiprocessor computer networks. With suitable locality and technology, a 786,432 processor network can route a message on the first attempt with over 70% probability when the network is fully loaded. The latency through the network from one endpoint to another is at most 320 ns. For more local connections, the network latency can be as small as 40 ns.", } % Index Number: 83 @InProceedings{grunbacher-jaud:92, author = "H. Grunbacher and A. Jaud", title = "{JAPROC} - An 8 bit Micro Controller Design and its Test Environment", key = "system", booktitle = "Field-Programmable Gate Arrays: Architectures and Tools for Rapid Prototyping. Second International Workshop on Field Programmable Logic and Applications", year = "1992", address = "Vienna, Austria", month = aug, pages = "146--151", abstract = "This paper describes the design of JAPROC, an 8-bit micro controller. JAPROC is a processor-core which is being developed within the EUREKA project JAMIE. The design consists of Approximately 5000 gates and has been implemented in a FPGA Xilinx X4005. For testing purposes a PC board has been developed which allows to configure the FPGA, download and execute micro controller code and compare the results to an emulator.", } % Index Number:84 @InProceedings{surmann-ungering:92, author = "H. Surmann and A. Ungering and K. Goser", title = "Optimized Fuzzy Controller Architecture for Field Programmable Gate Arrays", key = "app", booktitle = "Field-Programmable Gate Arrays: Architectures and Tools for Rapid Prototyping. Second International Workshop on Field Programmable Logic and Applications", year = "1992", address = "Vienna, Austria", month = aug, pages = "124--133", abstract = "This paper describes an optimized fuzzy controller (FC) architecture and its realization with field programmable gate arrays (FPGAs). In consideration of data dependencies and minor user restrictions within the definition of fuzzy rules (FRs), it is possible to develop a high speed FPGA architecture. A prototype of the FC operates at 5 MHz and needs 50 mu s operation time (8 bit resolution) independent of the number of inputs/outputs with 256 fuzzy rules. A pipeline architecture is used to achieve a high processing speed.", } %above is the year of the conference; year of publication:1993 %above is the place of the conf.; place of publication:Berlin, Germany %date of conference: 31 Aug.-2 Sept. 1992 % Index Number: 85 @Article{dillien-phillips:89, author = "P. Dillien and I. Phillips", title = "{ASIC} design flexibility with {ERA}s", key = "device", journal = "Electronic Product Design", volume = "10", number = "10", pages = "29,31--32,34", month = oct, year = "1989", abstract = "The ERA has a number of aspects; it can be viewed as a proving vehicle for conventional masked gate array; as a programmable logic device in its own right for applications with lower volumes or where NREs cannot be carried; but it also opens up new possibilities in system design. For example, the implementation of dynamically reconfigurable hardware for multi-tasking applications, fault-tolerant systems and neural network computers. Classified as a field programmable gate array (FPGA), the ERA uses an embedded static RAM to control the routeing of signals between its logic elements. Even though its application areas overlap considerably with those for high-performance programmable logic devices (PLDs), the ERA should not be confused with conventional PLDs. Unlike some competing FPGAs which were designed upwards from PLD architectures, the ERA has an architecture which is much closer to a conventional gate array than to a PLD.", } % Index Number: 86 @InProceedings{wolfe-shen:88, author = "A. Wolfe and J. P. Shen", title = "Flexible Processors: a Promising Application-Specific Processor Design Approach", key = "system, app", booktitle = "Proceedings of the 21st Annual Workshop on Microprogramming and Microarchitecture - MICRO '21", year = "1988", address = "San Diego, CA", month = nov, pages = "30--39", abstract = "A new approach to application specific processor design is presented in this paper. Existing application specific processors are either based on existing general purpose processors or custom designed special purpose processors. The availability of a new technology, the Xilinx Logic Cell Array, presents the opportunity for a new alternative. The Flexible Processor Cell is a prototype of an extremely reconfigurable application specific processor. Flexible processors can potentially provide the performance advantages of special purpose processors. The flexible processor concept opens many potential areas for future research in processor architecture and implementation. This paper presents the design, implementation, and preliminary performance evaluation of an experimental flexible processor.", } % Index Number: 87 @Article{fagin:93, author = "B. S. Fagin", title = "Quantitative Measurements of {FPGA} Utility in Special and General Purpose Processors", key = "app, system", journal = "Journal of VLSI Signal Processing", volume = "6", number = "2", year = "1993", address = "Boston, Massachusetts", month = aug, pages = "129--137", abstract = "We present experimental results on FPGA use in special and general purpose processors, using as case studies a computational accelerator for gene sequence analysis, an integer implementation of the DLX microprocessor and a real-time signal processor for rocket telemetry. All these devices have been successfully prototyped, and are now completely functional. We present detailed analysis of our experience with FPGAs in these machines, describing typically an order of magnitude improvement over discrete IC implementations.", } % Index Number: 88 @InProceedings{erdogan-wahab:92, author = "S. S. Erdogan and A. Wahab", title = "Design of {RM-nc}: a reconfigurable neurocomputer for massively parallel-pipelined computations", key = "system, app", booktitle = "IJCNN International Joint Conference on Neural Networks", volume = "2", pages = "33--38", year = "1992", abstract = "The design of RM-nc, a reconfigurable machine for massively parallel-pipelined computations, is considered with the objective of demonstrating that a completely reconfigurable platform, not only in the domain of communication and control but also in the domain of processing elements (PEs), is feasible. The implementation of a fast computational element and control environment for neural network simulations is presented in order to assess the cost of providing reconfigurability at computational level. The implementation of a fast floating-point sum-of-products circuit using special carry-save multipliers and extensive pipelining is outlined on a field programmable gate array (FPGA) platform. It is shown