mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			155 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			Verilog
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			155 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			Verilog
		
	
	
		
			Executable File
		
	
	
	
	
////////////////////////////////////////////////////////////////////////////////
 | 
						|
// Original Author: Schuyler Eldridge
 | 
						|
// Contact Point: Schuyler Eldridge (schuyler.eldridge@gmail.com)
 | 
						|
// sqrt_pipelined.v
 | 
						|
// Created: 4.2.2012
 | 
						|
// Modified: 4.5.2012
 | 
						|
//
 | 
						|
// Implements a fixed-point parameterized pipelined square root
 | 
						|
// operation on an unsigned input of any bit length. The number of
 | 
						|
// stages in the pipeline is equal to the number of output bits in the
 | 
						|
// computation. This pipelien sustains a throughput of one computation
 | 
						|
// per clock cycle.
 | 
						|
// 
 | 
						|
// Copyright (C) 2012 Schuyler Eldridge, Boston University
 | 
						|
//
 | 
						|
// This program is free software: you can redistribute it and/or modify
 | 
						|
// it under the terms of the GNU General Public License as published by
 | 
						|
// the Free Software Foundation, either version 3 of the License.
 | 
						|
//
 | 
						|
// This program is distributed in the hope that it will be useful,
 | 
						|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
// GNU General Public License for more details.
 | 
						|
//
 | 
						|
// You should have received a copy of the GNU General Public License
 | 
						|
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
						|
////////////////////////////////////////////////////////////////////////////////
 | 
						|
`timescale 1ns / 1ps
 | 
						|
module sqrt_pipelined
 | 
						|
  (
 | 
						|
   input                        clk,        // clock
 | 
						|
   input                        reset_n,    // asynchronous reset
 | 
						|
   input                        start,      // optional start signal
 | 
						|
   input [INPUT_BITS-1:0]       radicand,   // unsigned radicand
 | 
						|
   output reg                   data_valid, // optional data valid signal
 | 
						|
   output reg [OUTPUT_BITS-1:0] root        // unsigned root 
 | 
						|
   );
 | 
						|
 | 
						|
  // WARNING!!! THESE PARAMETERS ARE INTENDED TO BE MODIFIED IN A TOP
 | 
						|
  // LEVEL MODULE. LOCAL CHANGES HERE WILL, MOST LIKELY, BE
 | 
						|
  // OVERWRITTEN!
 | 
						|
  parameter
 | 
						|
    INPUT_BITS   = 16; // number of input bits (any integer)
 | 
						|
  localparam
 | 
						|
    OUTPUT_BITS  = INPUT_BITS / 2 + INPUT_BITS % 2; // number of output bits
 | 
						|
  
 | 
						|
  reg [OUTPUT_BITS-1:0]         start_gen; // valid data propagation
 | 
						|
  reg [OUTPUT_BITS*INPUT_BITS-1:0] root_gen; // root values
 | 
						|
  reg [OUTPUT_BITS*INPUT_BITS-1:0] radicand_gen; // radicand values
 | 
						|
  wire [OUTPUT_BITS*INPUT_BITS-1:0] mask_gen; // mask values
 | 
						|
 | 
						|
  // This is the first stage of the pipeline.
 | 
						|
  always @ (posedge clk or negedge reset_n) begin
 | 
						|
    if (!reset_n) begin
 | 
						|
      start_gen[0]                 <= 0;
 | 
						|
      radicand_gen[INPUT_BITS-1:0] <= 0;
 | 
						|
      root_gen[INPUT_BITS-1:0]     <= 0;
 | 
						|
    end
 | 
						|
    else begin
 | 
						|
      start_gen[0] <= start;
 | 
						|
      if ( mask_gen[INPUT_BITS-1:0] <= radicand ) begin
 | 
						|
        radicand_gen[INPUT_BITS-1:0] <= radicand - mask_gen[INPUT_BITS-1:0];
 | 
						|
        root_gen[INPUT_BITS-1:0] <= mask_gen[INPUT_BITS-1:0];
 | 
						|
      end
 | 
						|
      else begin
 | 
						|
        radicand_gen[INPUT_BITS-1:0] <= radicand;
 | 
						|
        root_gen[INPUT_BITS-1:0] <= 0;
 | 
						|
      end
 | 
						|
    end
 | 
						|
  end
 | 
						|
 | 
						|
  // Main generate loop to create the masks and pipeline stages.
 | 
						|
  generate
 | 
						|
    genvar i;
 | 
						|
    // Generate all the mask values. These are built up in the
 | 
						|
    // following fashion:
 | 
						|
    // LAST MASK:  0x00...001 
 | 
						|
    //             0x00...004  Increasing # OUTPUT_BITS
 | 
						|
    //             0x00...010          |
 | 
						|
    //             0x00...040          v
 | 
						|
    //                 ...
 | 
						|
    // FIRST MASK: 0x10...000  # masks == # OUTPUT_BITS
 | 
						|
    // 
 | 
						|
    // Note that the first mask used can either be of the 0x1... or
 | 
						|
    // 0x4... variety. This is purely determined by the number of
 | 
						|
    // computation stages. However, the last mask used will always be
 | 
						|
    // 0x1 and the second to last mask used will always be 0x4.
 | 
						|
    for (i = 0; i < OUTPUT_BITS; i = i + 1) begin: mask_4
 | 
						|
      if (i % 2) // i is odd, this is a 4 mask
 | 
						|
        assign mask_gen[INPUT_BITS*(OUTPUT_BITS-i)-1:INPUT_BITS*(OUTPUT_BITS-i-1)]  = 4 << 4 * (i/2);
 | 
						|
      else // i is even, this is a 1 mask
 | 
						|
        assign mask_gen[INPUT_BITS*(OUTPUT_BITS-i)-1:INPUT_BITS*(OUTPUT_BITS-i-1)]  = 1 << 4 * (i/2);
 | 
						|
    end
 | 
						|
    // Generate all the pipeline stages to compute the square root of
 | 
						|
    // the input radicand stream. The general approach is to compare
 | 
						|
    // the current values of the root plus the mask to the
 | 
						|
    // radicand. If root/mask sum is greater than the radicand,
 | 
						|
    // subtract the mask and the root from the radicand and store the
 | 
						|
    // radicand for the next stage. Additionally, the root is
 | 
						|
    // increased by the value of the mask and stored for the next
 | 
						|
    // stage. If this test fails, then the radicand and the root
 | 
						|
    // retain their value through to the next stage. The one weird
 | 
						|
    // thing is that the mask indices appear to be incremented by one
 | 
						|
    // additional position. This is not the case, however, because the
 | 
						|
    // first mask is used in the first stage (always block after the
 | 
						|
    // generate statement).
 | 
						|
    for (i = 0; i < OUTPUT_BITS - 1; i = i + 1) begin: pipeline
 | 
						|
      always @ (posedge clk or negedge reset_n) begin : pipeline_stage
 | 
						|
        if (!reset_n) begin
 | 
						|
          start_gen[i+1]                                    <= 0;
 | 
						|
          radicand_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)] <= 0;
 | 
						|
          root_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)]     <= 0;
 | 
						|
        end
 | 
						|
        else begin
 | 
						|
          start_gen[i+1] <= start_gen[i];
 | 
						|
          if ((root_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i] + 
 | 
						|
               mask_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)]) <= radicand_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i]) begin
 | 
						|
	    radicand_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)] <= radicand_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i] - 
 | 
						|
                                                                 mask_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)] - 
 | 
						|
                                                                 root_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i];
 | 
						|
	    root_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)] <= (root_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i] >> 1) + 
 | 
						|
                                                             mask_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)];
 | 
						|
          end
 | 
						|
          else begin
 | 
						|
	    radicand_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)] <= radicand_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i];
 | 
						|
	    root_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)]     <= root_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i] >> 1;
 | 
						|
          end
 | 
						|
        end
 | 
						|
      end
 | 
						|
    end
 | 
						|
  endgenerate
 | 
						|
 | 
						|
  // This is the final stage which just implements a rounding
 | 
						|
  // operation. This stage could be tacked on as a combinational logic
 | 
						|
  // stage, but who cares about latency, anyway? This is NOT a true
 | 
						|
  // rounding stage. In order to add convergent rounding, you need to
 | 
						|
  // increase the input bit width by 2 (increase the number of
 | 
						|
  // pipeline stages by 1) and implement rounding in the module that
 | 
						|
  // instantiates this one. 
 | 
						|
  always @ (posedge clk or negedge reset_n) begin
 | 
						|
    if (!reset_n) begin
 | 
						|
      data_valid <= 0;
 | 
						|
      root       <= 0;
 | 
						|
    end
 | 
						|
    else begin
 | 
						|
      data_valid <= start_gen[OUTPUT_BITS-1];
 | 
						|
      if (root_gen[OUTPUT_BITS*INPUT_BITS-1:OUTPUT_BITS*INPUT_BITS-INPUT_BITS] > root_gen[OUTPUT_BITS*INPUT_BITS-1:OUTPUT_BITS*INPUT_BITS-INPUT_BITS])
 | 
						|
        root <= root_gen[OUTPUT_BITS*INPUT_BITS-1:OUTPUT_BITS*INPUT_BITS-INPUT_BITS] + 1;
 | 
						|
      else
 | 
						|
        root  <= root_gen[OUTPUT_BITS*INPUT_BITS-1:OUTPUT_BITS*INPUT_BITS-INPUT_BITS];
 | 
						|
    end
 | 
						|
  end
 | 
						|
 | 
						|
endmodule
 |