Files
linguist/samples/Verilog/sqrt_pipelined.v
2012-07-23 15:52:49 -05:00

155 lines
7.3 KiB
Verilog
Executable File

////////////////////////////////////////////////////////////////////////////////
// Original Author: Schuyler Eldridge
// Contact Point: Schuyler Eldridge (schuyler.eldridge@gmail.com)
// sqrt_pipelined.v
// Created: 4.2.2012
// Modified: 4.5.2012
//
// Implements a fixed-point parameterized pipelined square root
// operation on an unsigned input of any bit length. The number of
// stages in the pipeline is equal to the number of output bits in the
// computation. This pipelien sustains a throughput of one computation
// per clock cycle.
//
// Copyright (C) 2012 Schuyler Eldridge, Boston University
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
////////////////////////////////////////////////////////////////////////////////
`timescale 1ns / 1ps
module sqrt_pipelined
(
input clk, // clock
input reset_n, // asynchronous reset
input start, // optional start signal
input [INPUT_BITS-1:0] radicand, // unsigned radicand
output reg data_valid, // optional data valid signal
output reg [OUTPUT_BITS-1:0] root // unsigned root
);
// WARNING!!! THESE PARAMETERS ARE INTENDED TO BE MODIFIED IN A TOP
// LEVEL MODULE. LOCAL CHANGES HERE WILL, MOST LIKELY, BE
// OVERWRITTEN!
parameter
INPUT_BITS = 16; // number of input bits (any integer)
localparam
OUTPUT_BITS = INPUT_BITS / 2 + INPUT_BITS % 2; // number of output bits
reg [OUTPUT_BITS-1:0] start_gen; // valid data propagation
reg [OUTPUT_BITS*INPUT_BITS-1:0] root_gen; // root values
reg [OUTPUT_BITS*INPUT_BITS-1:0] radicand_gen; // radicand values
wire [OUTPUT_BITS*INPUT_BITS-1:0] mask_gen; // mask values
// This is the first stage of the pipeline.
always @ (posedge clk or negedge reset_n) begin
if (!reset_n) begin
start_gen[0] <= 0;
radicand_gen[INPUT_BITS-1:0] <= 0;
root_gen[INPUT_BITS-1:0] <= 0;
end
else begin
start_gen[0] <= start;
if ( mask_gen[INPUT_BITS-1:0] <= radicand ) begin
radicand_gen[INPUT_BITS-1:0] <= radicand - mask_gen[INPUT_BITS-1:0];
root_gen[INPUT_BITS-1:0] <= mask_gen[INPUT_BITS-1:0];
end
else begin
radicand_gen[INPUT_BITS-1:0] <= radicand;
root_gen[INPUT_BITS-1:0] <= 0;
end
end
end
// Main generate loop to create the masks and pipeline stages.
generate
genvar i;
// Generate all the mask values. These are built up in the
// following fashion:
// LAST MASK: 0x00...001
// 0x00...004 Increasing # OUTPUT_BITS
// 0x00...010 |
// 0x00...040 v
// ...
// FIRST MASK: 0x10...000 # masks == # OUTPUT_BITS
//
// Note that the first mask used can either be of the 0x1... or
// 0x4... variety. This is purely determined by the number of
// computation stages. However, the last mask used will always be
// 0x1 and the second to last mask used will always be 0x4.
for (i = 0; i < OUTPUT_BITS; i = i + 1) begin: mask_4
if (i % 2) // i is odd, this is a 4 mask
assign mask_gen[INPUT_BITS*(OUTPUT_BITS-i)-1:INPUT_BITS*(OUTPUT_BITS-i-1)] = 4 << 4 * (i/2);
else // i is even, this is a 1 mask
assign mask_gen[INPUT_BITS*(OUTPUT_BITS-i)-1:INPUT_BITS*(OUTPUT_BITS-i-1)] = 1 << 4 * (i/2);
end
// Generate all the pipeline stages to compute the square root of
// the input radicand stream. The general approach is to compare
// the current values of the root plus the mask to the
// radicand. If root/mask sum is greater than the radicand,
// subtract the mask and the root from the radicand and store the
// radicand for the next stage. Additionally, the root is
// increased by the value of the mask and stored for the next
// stage. If this test fails, then the radicand and the root
// retain their value through to the next stage. The one weird
// thing is that the mask indices appear to be incremented by one
// additional position. This is not the case, however, because the
// first mask is used in the first stage (always block after the
// generate statement).
for (i = 0; i < OUTPUT_BITS - 1; i = i + 1) begin: pipeline
always @ (posedge clk or negedge reset_n) begin : pipeline_stage
if (!reset_n) begin
start_gen[i+1] <= 0;
radicand_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)] <= 0;
root_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)] <= 0;
end
else begin
start_gen[i+1] <= start_gen[i];
if ((root_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i] +
mask_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)]) <= radicand_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i]) begin
radicand_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)] <= radicand_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i] -
mask_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)] -
root_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i];
root_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)] <= (root_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i] >> 1) +
mask_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)];
end
else begin
radicand_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)] <= radicand_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i];
root_gen[INPUT_BITS*(i+2)-1:INPUT_BITS*(i+1)] <= root_gen[INPUT_BITS*(i+1)-1:INPUT_BITS*i] >> 1;
end
end
end
end
endgenerate
// This is the final stage which just implements a rounding
// operation. This stage could be tacked on as a combinational logic
// stage, but who cares about latency, anyway? This is NOT a true
// rounding stage. In order to add convergent rounding, you need to
// increase the input bit width by 2 (increase the number of
// pipeline stages by 1) and implement rounding in the module that
// instantiates this one.
always @ (posedge clk or negedge reset_n) begin
if (!reset_n) begin
data_valid <= 0;
root <= 0;
end
else begin
data_valid <= start_gen[OUTPUT_BITS-1];
if (root_gen[OUTPUT_BITS*INPUT_BITS-1:OUTPUT_BITS*INPUT_BITS-INPUT_BITS] > root_gen[OUTPUT_BITS*INPUT_BITS-1:OUTPUT_BITS*INPUT_BITS-INPUT_BITS])
root <= root_gen[OUTPUT_BITS*INPUT_BITS-1:OUTPUT_BITS*INPUT_BITS-INPUT_BITS] + 1;
else
root <= root_gen[OUTPUT_BITS*INPUT_BITS-1:OUTPUT_BITS*INPUT_BITS-INPUT_BITS];
end
end
endmodule