design/sys/iop/spc/fgu/rtl/fgu_fdc_ctl.v

// ========== Copyright Header Begin ==========================================
//
// OpenSPARC T2 Processor File: fgu_fdc_ctl.v
// Copyright (C) 1995-2007 Sun Microsystems, Inc. All Rights Reserved
// 4150 Network Circle, Santa Clara, California 95054, U.S.A.
//
// * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; version 2 of the License.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// For the avoidance of doubt, and except that if any non-GPL license
// choice is available it will apply instead, Sun elects to use only
// the General Public License version 2 (GPLv2) at this time for any
// software where a choice of GPL license versions is made
// available with the language indicating that GPLv2 or any later version
// may be used, or where a choice of which version of the GPL is applied is
// otherwise unspecified.
//
// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
// CA 95054 USA or visit www.sun.com if you need additional information or
// have any questions.
//
// ========== Copyright Header End ============================================
module fgu_fdc_ctl (
  l2clk,
  scan_in,
  tcu_pce_ov,
  spc_aclk,
  spc_bclk,
  tcu_scan_en,
  scan_out,
  fac_div_flush_fx3,
  fpe_rs2_fmt_fx1_b0,
  fpf_hi_bof_fx1,
  fpf_lo_bof_fx1,
  fpf_sa_xor_sb_fx1,
  fac_div_valid_fx1,
  fac_divq_valid_fx1,
  fac_div_control_fx1,
  fpc_rd_mode_fx3,
  fpc_emin_fx3,
  div_clken,
  fdd_pe_clth,
  fdd_cla_zero32_,
  fdd_cla_zero64_,
  fdd_result,
  fdd_fdx_din0,
  fdd_fdx_din1,
  fdd_fdx_cin64,
  fdd_fdq00_10_sum,
  fdd_fdq00_10_carry,
  fdd_fdq1p_sum,
  fdd_fdq1p_carry,
  fdd_fdq1n_sum,
  fdd_fdq1n_carry,
  fgu_fdiv_stall,
  fgu_idiv_stall,
  fdc_dec_exp_early,
  fdc_icc_v_early,
  fdc_xicc_z_early,
  fdc_finish_int_early,
  fdc_finish_fltd_early,
  fdc_finish_flts_early,
  fdc_flt_inexact,
  fdc_asign_lth,
  fdc_bsign_lth,
  fdc_bsign_lth_,
  fdc_pe_cycle3,
  fdc_pe_cmux_sel,
  fdc_pe_smux_sel,
  fdc_pe_xsht_ctl,
  fdc_ie_fsqrt_valid_even,
  fdc_ie_fsqrt_valid_even_,
  fdc_ie_fsqrt_valid_odd,
  fdc_ie_fsqrt_valid_odd_,
  fdc_ie_rmux_sel,
  fdc_ie_dmux_sel,
  fdc_flt_increment,
  fdc_pte_clasel,
  fdc_pte_csa_cin,
  fdc_pte_cycle2,
  fdc_emin_lth,
  fdc_pte_qsel,
  fdc_pte_stall_,
  fdc_flt_round,
  fdc_idiv_ctl,
  fdc_fdx_cin_in,
  fdc_qsel00,
  fdc_qsel1,
  fdc_q_in,
  fdc_qm1_in);
wire pce_ov;
wire stop;
wire siclk;
wire soclk;
wire se;
wire l1clk_pm1;
wire spares_scanin;
wire spares_scanout;
wire incoming_sign_fx1;
wire [7:0] qcontrol_in;
wire fsqrt_fract_all_ones;
wire [7:0] qcontrol_fx1;
wire qdata_lth_scanin;
wire qdata_lth_scanout;
wire valid_in;
wire valid_lth;
wire [3:0] fdc_pte_cycle;
wire div_flush_lth;
wire [3:1] pe_cycle_in;
wire [3:1] fdc_pe_cycle;
wire engine_running_in;
wire engine_stop;
wire engine_running_lth;
wire engine_on;
wire [7:0] pe_ndq;
wire [3:0] pte_cycle_in;
wire [4:0] control_lth;
wire cntl_lth_scanin;
wire cntl_lth_scanout;
wire [2:0] pe_hmux_sel;
wire b_neg_one;
wire [7:0] pe_xsht_amt;
wire a_neg_max;
wire ovfl_64_in;
wire b_neg_one_lth;
wire finish_raw;
wire ovfl_64_lth;
wire flt_shift_sel_;
wire ovlf_lth_scanin;
wire ovlf_lth_scanout;
wire cla_zero64_lth_;
wire cla_zero32_lth_;
wire fdiv_stall_in;
wire stall_hold;
wire idiv_stall_in;
wire stall_hold_in;
wire [3:0] stall_cnt_raw;
wire [3:0] stall_cnt_in;
wire finish_lth;
wire [3:0] stall_cnt;
wire stall_lth_scanin;
wire stall_lth_scanout;
wire finish_lth_in;
wire idiv_stall_lth;
wire [4:0] control_in;
wire asign_in;
wire bsign_in;
wire ndq_odd_in;
wire data_lth_scanin;
wire data_lth_scanout;
wire ndq_odd_lth;
wire ndq_odd_2lth;
wire [63:0] clth;
wire b0_nor_76;
wire b0_nor_54;
wire b0_nor_32;
wire b0_nor_10;
wire b0_zeroh_;
wire b0_zerol_;
wire b0_zero_;
wire [1:0] b0_cnth;
wire [1:0] b0_cntl;
wire [2:0] b0_cnt;
wire b1_nor_76;
wire b1_nor_54;
wire b1_nor_32;
wire b1_nor_10;
wire b1_zeroh_;
wire b1_zerol_;
wire b1_zero_;
wire [1:0] b1_cnth;
wire [1:0] b1_cntl;
wire [2:0] b1_cnt;
wire b2_nor_76;
wire b2_nor_54;
wire b2_nor_32;
wire b2_nor_10;
wire b2_zeroh_;
wire b2_zerol_;
wire b2_zero_;
wire [1:0] b2_cnth;
wire [1:0] b2_cntl;
wire [2:0] b2_cnt;
wire b3_nor_76;
wire b3_nor_54;
wire b3_nor_32;
wire b3_nor_10;
wire b3_zeroh_;
wire b3_zerol_;
wire b3_zero_;
wire [1:0] b3_cnth;
wire [1:0] b3_cntl;
wire [2:0] b3_cnt;
wire b4_nor_76;
wire b4_nor_54;
wire b4_nor_32;
wire b4_nor_10;
wire b4_zeroh_;
wire b4_zerol_;
wire b4_zero_;
wire [1:0] b4_cnth;
wire [1:0] b4_cntl;
wire [2:0] b4_cnt;
wire b5_nor_76;
wire b5_nor_54;
wire b5_nor_32;
wire b5_nor_10;
wire b5_zeroh_;
wire b5_zerol_;
wire b5_zero_;
wire [1:0] b5_cnth;
wire [1:0] b5_cntl;
wire [2:0] b5_cnt;
wire b6_nor_76;
wire b6_nor_54;
wire b6_nor_32;
wire b6_nor_10;
wire b6_zeroh_;
wire b6_zerol_;
wire b6_zero_;
wire [1:0] b6_cnth;
wire [1:0] b6_cntl;
wire [2:0] b6_cnt;
wire b7_nor_76;
wire b7_nor_54;
wire b7_nor_32;
wire b7_nor_10;
wire b7_zeroh_;
wire b7_zerol_;
wire b7_zero_;
wire [1:0] b7_cnth;
wire [1:0] b7_cntl;
wire [2:0] b7_cnt;
wire b3_0sel;
wire b2_0sel;
wire b1_0sel;
wire b0_0sel;
wire [4:0] cntl0l;
wire b7_0sel;
wire b6_0sel;
wire b5_0sel;
wire b4_0sel;
wire [4:0] cntl0h;
wire cntl0_selh;
wire cntl0_sell;
wire [6:0] cntl0;
wire b7_nand_74;
wire b7_nand_30;
wire b7_ones;
wire b7_ones_;
wire [1:0] b7_cnt1h;
wire [1:0] b7_cnt1l;
wire [2:0] b7_cnt1;
wire b6_nand_74;
wire b6_nand_30;
wire b6_ones;
wire b6_ones_;
wire [1:0] b6_cnt1h;
wire [1:0] b6_cnt1l;
wire [2:0] b6_cnt1;
wire b5_nand_74;
wire b5_nand_30;
wire b5_ones;
wire b5_ones_;
wire [1:0] b5_cnt1h;
wire [1:0] b5_cnt1l;
wire [2:0] b5_cnt1;
wire b4_nand_74;
wire b4_nand_30;
wire b4_ones;
wire b4_ones_;
wire [1:0] b4_cnt1h;
wire [1:0] b4_cnt1l;
wire [2:0] b4_cnt1;
wire b3_nand_74;
wire b3_nand_30;
wire b3_ones;
wire b3_ones_;
wire [1:0] b3_cnt1h;
wire [1:0] b3_cnt1l;
wire [2:0] b3_cnt1;
wire b2_nand_74;
wire b2_nand_30;
wire b2_ones;
wire b2_ones_;
wire [1:0] b2_cnt1h;
wire [1:0] b2_cnt1l;
wire [2:0] b2_cnt1;
wire b1_nand_74;
wire b1_nand_30;
wire b1_ones;
wire b1_ones_;
wire [1:0] b1_cnt1h;
wire [1:0] b1_cnt1l;
wire [2:0] b1_cnt1;
wire b0_nand_74;
wire [1:0] b0_cnt1h;
wire [1:0] b0_cnt1l;
wire [2:0] b0_cnt1;
wire b3_1sel;
wire b2_1sel;
wire b1_1sel;
wire b0_1sel;
wire [4:0] cntl1l;
wire b7_1sel;
wire b6_1sel;
wire b5_1sel;
wire b4_1sel;
wire [4:0] cntl1h;
wire cntl1_selh;
wire [5:0] cntl1;
wire xsht_amt_sel10;
wire xsht_amt_sel11;
wire xsht_amt_sel20;
wire xsht_amt_sel21;
wire [7:0] xsht_amt_in;
wire [7:0] pe_hamt_lth;
wire [7:0] pe_hamt_in;
wire xsht_lth_scanin;
wire xsht_lth_scanout;
wire hamt_lth_scanin;
wire hamt_lth_scanout;
wire [5:0] xsht_ctl_in;
wire xcntl_lth_scanin;
wire xcntl_lth_scanout;
wire engine_valid_fx1;
wire engine_valid_fx2;
wire queue_valid_lth_fx2;
wire engine_valid_lth_fx2;
wire engine_valid_fx3;
wire queue_valid_lth_fx3;
wire engine_valid_lth_fx3;
wire queue_valid_fx1;
wire queue_valid_fx2;
wire q2e_fx3p;
wire xrnd_vld_lth_scanin;
wire xrnd_vld_lth_scanout;
wire [1:0] eround_mode_in;
wire [1:0] qround_mode_lth;
wire [1:0] eround_mode_lth;
wire e_emin_in;
wire q_emin_lth;
wire [1:0] qround_mode_in;
wire q_emin_in;
wire float_sign_in;
wire float_sign_lth;
wire flt_sqrte_kill_dec;
wire inexact_in;
wire final_sticky;
wire final_guard;
wire xrnd_lth_scanin;
wire xrnd_lth_scanout;
wire sticky_pte1;
wire sticky_pte0;
wire final_lsb;
wire flt_rnd00_en;
wire flt_rnd1x_en;
wire fsqrt_special_in;
wire fsqrt_special_lth;
wire spec_sqrt_lth_scanin;
wire spec_sqrt_lth_scanout;
wire cla_64;
wire cin_in_raw;
wire [3:0] fdq00_sum;
wire [3:0] fdq00_carry;
wire [3:0] pr00;
wire [3:0] pr1p;
wire [2:0] qsel1p;
wire [3:0] fdq10_sum;
wire [3:0] fdq10_carry;
wire [3:0] pr10;
wire [2:0] qsel10;
wire [3:0] pr1n;
wire [2:0] qsel1n;
wire engine_start;


// Timing constraints definition :
//   For Inputs  : Required setup to the end of the cycle
//   For Outputs : Actual time the signal leaves block measured from L1CLK rise
//   For pin location : I am assuming dataflow direction is vertical


// *** globals ***
input           l2clk;
input           scan_in;
input           tcu_pce_ov;             // scan signals
input           spc_aclk;
input           spc_bclk;
input           tcu_scan_en;
output          scan_out;

input           fac_div_flush_fx3;
input           fpe_rs2_fmt_fx1_b0;
input           fpf_hi_bof_fx1;
input           fpf_lo_bof_fx1;
input           fpf_sa_xor_sb_fx1;

input           fac_div_valid_fx1;      //  div_valid  divq_valid  |  action
input           fac_divq_valid_fx1;     //  ---------  ----------  |  ---------------------------------------
                                        //     1           0       |  start divide from FX1   RS1/RS2/control
                                        //     0           1       |  load  queue  from FX1   RS1/RS2/control
                                        //     1           1       |  start divide from queue RS1/RS2/control

input  [4:0]    fac_div_control_fx1;   // 0in value -var fac_div_control_fx1[3:0] -val 4'b0000 4'b0010 4'b0100 4'b0101 4'b0110 4'b0111 4'b1000 4'b1010 -active (fac_div_valid_fx1 ^ fac_divq_valid_fx1)
                                       // [3:0]  :                            [4] : Thread Group
                                       //  0000  : Float Divide Single
                                       //  0010  : Float Divide Double
                                       //  0100  : Integer Unsigned - 32 bit
                                       //  0101  : Integer   Signed - 32 bit
                                       //  0110  : Integer Unsigned - 64 bit
                                       //  0111  : Integer   Signed - 64 bit
                                       //  1000  : Float SQRT   Single
                                       //  1010  : Float SQRT   Double

input  [1:0]    fpc_rd_mode_fx3;
input           fpc_emin_fx3;

input           div_clken;             // div clken


// *** locals  ***
input  [63:0]   fdd_pe_clth;
input           fdd_cla_zero32_;
input           fdd_cla_zero64_;
input  [63:9]   fdd_result;

input           fdd_fdx_din0;
input           fdd_fdx_din1;
input           fdd_fdx_cin64;

input  [4:0]    fdd_fdq00_10_sum;
input  [4:0]    fdd_fdq00_10_carry;
input  [3:0]    fdd_fdq1p_sum;
input  [3:0]    fdd_fdq1p_carry;
input  [3:0]    fdd_fdq1n_sum;
input  [3:0]    fdd_fdq1n_carry;


// *** globals ***
output          fgu_fdiv_stall;
output [1:0]    fgu_idiv_stall;         // Stall by Thread Group
output          fdc_dec_exp_early;
output          fdc_icc_v_early;
output [1:0]    fdc_xicc_z_early;
output          fdc_finish_int_early;
output          fdc_finish_fltd_early;
output          fdc_finish_flts_early;
output          fdc_flt_inexact;


// *** locals  ***
output          fdc_asign_lth;
output          fdc_bsign_lth;
output          fdc_bsign_lth_;
output          fdc_pe_cycle3;
output          fdc_pe_cmux_sel;
output [2:0]    fdc_pe_smux_sel;
output [5:0]    fdc_pe_xsht_ctl;
output          fdc_ie_fsqrt_valid_even;
output          fdc_ie_fsqrt_valid_even_;
output          fdc_ie_fsqrt_valid_odd;
output          fdc_ie_fsqrt_valid_odd_;
output [4:0]    fdc_ie_rmux_sel;
output [2:0]    fdc_ie_dmux_sel;
output          fdc_flt_increment;
output [1:0]    fdc_pte_clasel;
output          fdc_pte_csa_cin;
output          fdc_pte_cycle2;
output          fdc_emin_lth;
output [2:0]    fdc_pte_qsel;
output          fdc_pte_stall_;

output [1:0]    fdc_flt_round;
output [4:0]    fdc_idiv_ctl;           // 0in bits_on -max 1 -var fdc_idiv_ctl[3:0]
                                        //  3210   [4] = integer
                                        //  ----
                                        //  0001 : 8000 0000 0000 0000
                                        //  0010 : FFFF FFFF 8000 0000
                                        //  0100 : 0000 0000 7FFF FFFF
                                        //  1000 : 0000 0000 FFFF FFFF

output          fdc_fdx_cin_in;
output [2:0]    fdc_qsel00;
output [2:0]    fdc_qsel1;

output [1:0]    fdc_q_in;
output [1:0]    fdc_qm1_in;

// scan renames
assign pce_ov = tcu_pce_ov;
assign stop   = 1'b0;
assign siclk  = spc_aclk;
assign soclk  = spc_bclk;
assign se     = tcu_scan_en;
// end scan


fgu_fdc_ctl_l1clkhdr_ctl_macro clkgen_pm1 (
  .l2clk(l2clk),
  .l1en (div_clken),
  .l1clk(l1clk_pm1),
  .pce_ov(pce_ov),
  .stop(stop),
  .se(se)
  );

fgu_fdc_ctl_spare_ctl_macro__num_3 spares  (  // spares: 13 gates + 1 flop for each "num"
  .scan_in(spares_scanin),
  .scan_out(spares_scanout),
  .l1clk(l1clk_pm1),
  .siclk(siclk),
  .soclk(soclk)
  );


assign incoming_sign_fx1= fpf_sa_xor_sb_fx1 & ~fac_div_control_fx1[3];  // Turn off for Square Root

assign qcontrol_in[7:0] = ({8{ fac_divq_valid_fx1}} & {fsqrt_fract_all_ones,incoming_sign_fx1,fpe_rs2_fmt_fx1_b0,fac_div_control_fx1[4:0]}) |
                          ({8{~fac_divq_valid_fx1}} & qcontrol_fx1[7:0]                                                                   );

fgu_fdc_ctl_msff_ctl_macro__width_8     qdata_lth        (
 .scan_in(qdata_lth_scanin),
 .scan_out(qdata_lth_scanout),
 .l1clk( l1clk_pm1         ),
 .din  ({qcontrol_in[7:0] }),
 .dout ({qcontrol_fx1[7:0]}),
  .siclk(siclk),
  .soclk(soclk));


// * * * * * * * * * * * * Main Controller * * * * * * * * * * * *


// *** State control ***

//* * * * * * * * * * * * "pre-engine" (integer only)* * * * *
//
// cycle  0   :  fac_div_valid_fx1  A&B are transmitted to divide hardware
//                                  A loaded into Slth
//                                  B loaded into Clth;
//
// cycle  1   :  pe_cycle[1]        B into CNTL0 and CNTL1 -> compute "lsb";
//                                  A loaded into Clth;
//                                  B loaded into Slth;
//
// cycle  2   :  pe_cycle[2]        A into CNTL0 and CNTL1 -> compute "lsa";
//                                  B shifts by "lsb" amount
//                                  A loaded into Slth;
//                                  Xsht loaded into Clth;  (this is Bsh)
//
// cycle  3   :  pe_cycle[3]        A shifts by "lsa" amount;
//               engine_start       Bsh is XORed to produce positive divisor
//                                  compute ndq = lsb - lsa + 1;
//                                  finished if ndq <= 0;        (ie B > A)
//
//* * * * * * * * * * * * * "engine" * * * * * * * * * * * * *
//
// See Integer "engine" run-time below for how ndq is computed.
//
// if (even ndq)
//    then X =  ndq / 2
//    else X = (ndq - 1) / 2
//
// for X cycles
//    perform loop
//
//  cycle X+3 :  engine_stop      last loop
//
//* * * * * * * * * * * * "post-engine" * * * * * * * * * * * *
//
//  cycle X+4 :  pte_cycle[3]       load "S0" and "C0" latches into adder latches
//                                  For an odd ndq, we actually compute the last Q and Qm1
//                                  and then load "C1" and "S1" into the adder latches.
//
//  cycle X+5 :  pte_cycle[2]       compute Sign of Remainder
//                                  compute zero remainder (used as Sticky and in correction)
//                                  load Q and Qm1 into adder latches
//                                  make correction if necessary
//                                  turn off valid_lth (new divide will NOT affect result)
//
//  cycle X+6 :  pte_cycle[1]       compute Qf = Q - Qm1 + correction;
//                                  64-bit Integer - load into Result latch
//
//  cycle X+7    pte_cycle[0]       32-bit Integer - Overflow detection and correction
//                                  Floating Point - Round
//
//  cycle X+7/8  fdc_finish         transmit Result
//
//
//* * * * * * * * * * * * Integer "engine" run-time * * * * * * * * * *
//
//
// Define : lsa  : number of Leading Sign bits in A (n-1 for negative)
//          lsb  : number of Leading Sign bits in B (n   for negative)
//          nda  : number of digits in A (nda=64-lsa)
//          ndb  : number of digits in B (ndb=64-lsb)
//          ndq  : number of digits in the Quotient Q  (MAX)
//
// Then ndq = nda - (ndb - 1);
//
// The minus one comes from the fact that dividing by '1' does not reduce
// the number of significant bits in the dividend (A operand).
//
// By substitution :
//
//      ndq = [64 - lsa] - ([64 - lsb] - 1);
//          = lsb - lsa + 1;
//
//
// Example :      A = 0000 1111 (+15)    lsa = 4
// (8-bit)        B = 0000 0010 (+2)     lsb = 6
//                ------------------
//                R = 0000 0111 (+7)     notice ndq=3
//
//                compute ndq = lsb - lsa + 1
//                            =  6  -  4  + 1
//                            =  3             (this is a MAX ndq computation)
//
//
// Example :      A = 0000 1000 (+8)     lsa = 4
// (8-bit)        B = 0000 0011 (+3)     lsb = 6
//                ------------------
//                R = 0000 0010 (+2)     notice ndq=2
//
//                compute ndq = lsb - lsa + 1
//                            =  6  -  4  + 1
//                            =  3             (this is a MAX ndq computation)
//
//
// Example :      A = 1111 0000 (-16)    lsa = 3  (n-1 for negative)
// (8-bit)        B = 0000 0010 (+2)     lsb = 6
//                ------------------
//                R = 1111 1000 (-8)     notice ndq=4
//
//                compute ndq = lsb - lsa + 1
//                            =  6  -  3  + 1
//                            =  4             (this is a MAX ndq computation)
//
//
//
//
//
//* * * * * * * * * * * * * Total cycle count * * * * * * * * * * * *
//
// *** 32-bit Integer Division***
//
// FX1    transmit of RS1 and RS2
//  3     "pre-engine"
//  X     "engine"        where   X = (lsb-lsa)// 2; {0 to 32 cycles}
//  4     "post-engine"   where   4th cycle is Overflow detection and correction
//  S     "engine-stall"  where   S = 4 - X;  {0 to 4 cycles}  (needed to sync up with bubble)
// FX5    transmit to EXU
// W
// ---
// 10+X+S   {14 to 42 cycles}
//
//
// *** 64-bit Integer Division***
//
// FX1    transmit of RS1 and RS2
//  3     "pre-engine"
//  X     "engine"        where   X = (lsb-lsa)// 2; {0 to 32 cycles}
//  3     "post-engine"
//  S     "engine-stall"  where   S = 4 - X;  {0 to 4 cycles}  (needed to sync up with bubble)
// FX5    transmit to EXU
// W
// ---
//  9+X+S   {13 to 41 cycles}
//
//
// *** Float Double Precision divide and square root***
//
// FX1    transmit of RS1 and RS2
//  27    "engine"        need 53 mantissa + 1 guard + 1 for 0.1 = 55
//                        27 cycles compute 54 quotient digits.  The last bit is computed during pte_cycle[3].
//  4     "post-engine"
// FB
// FW
// FW1
// ---
// 35 cycles
//
//
// *** Float Single Precision divide and square root ***
//
// FX1    transmit of RS1 and RS2
//  13    "engine"        need 24 mantissa + 1 guard + 1 for 0.1 = 26
//  4     "post-engine"
// FB
// FW
// FW1
// ---
// 21 cycles


assign valid_in           = (fac_div_valid_fx1                                            ) |
                            (valid_lth          & ~fdc_pte_cycle[2]       & ~div_flush_lth);

assign pe_cycle_in[1]     = (fac_div_valid_fx1  &  fac_div_control_fx1[2] & ~fac_divq_valid_fx1) |
                            (fac_div_valid_fx1  &         qcontrol_fx1[2] &  fac_divq_valid_fx1);

assign pe_cycle_in[3:2]   =  fdc_pe_cycle[2:1]                            & {2{~div_flush_lth}};

assign engine_running_in  = (fac_div_valid_fx1  & ~fac_div_control_fx1[2] & ~fac_divq_valid_fx1) |  // start FLT
                            (fac_div_valid_fx1  &        ~qcontrol_fx1[2] &  fac_divq_valid_fx1) |  // start FLT
                            (fdc_pe_cycle[3]    & ~engine_stop            & ~div_flush_lth     ) |  // start INT
                            (engine_running_lth & ~engine_stop            & ~div_flush_lth     );

assign engine_on          =  fdc_pe_cycle[3] | engine_running_lth;

// 0in assert_timer -var (engine_running_lth & (control_lth[2:0] == 3'b000)) -max 13 -message "FDIV/FSQRT engine running > 13 cycles for SP"
// 0in assert_timer -var (engine_running_lth & (control_lth[2:0] == 3'b010)) -max 27 -message "FDIV/FSQRT engine running > 27 cycles for DP"
// 0in assert_timer -var (engine_running_lth & (control_lth[3:2] == 2'b01 )) -max 32 -message "IDIV engine running > 32 cycles"

assign engine_stop        = ((pe_ndq[6:1] == 6'b000000) & engine_on) |
                            ( pe_ndq[7]                 & engine_on);

assign pte_cycle_in[3:1]  = {engine_stop,fdc_pte_cycle[3:2]}                       & {3{~div_flush_lth}};
assign pte_cycle_in[0]    =  fdc_pte_cycle[1] & ~(control_lth[2] & control_lth[1]) &    ~div_flush_lth;

fgu_fdc_ctl_msff_ctl_macro__width_10    cntl_lth         (
 .scan_in(cntl_lth_scanin),
 .scan_out(cntl_lth_scanout),
 .l1clk( l1clk_pm1                                                                                   ),
 .din  ({valid_in  , pe_cycle_in[3:1]  , engine_running_in  , pte_cycle_in[3:0]  , fac_div_flush_fx3}),
 .dout ({valid_lth , fdc_pe_cycle[3:1] , engine_running_lth , fdc_pte_cycle[3:0] , div_flush_lth    }),
  .siclk(siclk),
  .soclk(soclk));

// 0in bits_on -max 1 -var {fdc_pe_cycle[3:1], engine_running_lth, fdc_pte_cycle[3:0], finish_lth}

// 0in state_transition -var {fac_div_valid_fx1, pe_cycle_in[3:1], engine_running_in} -val {1'b1, 3'b000, 1'b0} -next {1'b0, 3'b001, 1'b0} {1'b0, 3'b000, 1'b1} {1'b0, 3'b000, 1'b0} -match_by_cycle
// 0in state_transition -var  pe_cycle_in[3:1] -val 3'b000 -next 3'b001 3'b000
// 0in state_transition -var  pe_cycle_in[3:1] -val 3'b001 -next 3'b010 3'b000 -match_by_cycle
// 0in state_transition -var  pe_cycle_in[3:1] -val 3'b010 -next 3'b100 3'b000 -match_by_cycle
// 0in state_transition -var {engine_running_in, pe_cycle_in[3:1]} -val {1'b0, 3'b100} -next {1'b1, 3'b000} {1'b0, 3'b000} -match_by_cycle
// 0in state_transition -var {pte_cycle_in[3:0], engine_running_in} -val {4'b0000, 1'b1} -next {4'b1000, 1'b0} {4'b0000, 1'b0}
// 0in state_transition -var  pte_cycle_in[3:0] -val 4'b0000 -next 4'b1000
// 0in state_transition -var  pte_cycle_in[3:0] -val 4'b1000 -next 4'b0100 -match_by_cycle
// 0in state_transition -var  pte_cycle_in[3:0] -val 4'b0100 -next 4'b0010 -match_by_cycle
// 0in state_transition -var {pte_cycle_in[3:0], finish_lth_in} -val {4'b0010, 1'b0} -next {4'b0001, 1'b0} {4'b0000, 1'b1}
// 0in state_transition -var {pte_cycle_in[3:0], finish_lth_in} -val {4'b0001, 1'b0} -next {4'b0000, 1'b1}

assign fdc_pe_cycle3      = fdc_pe_cycle[3];    // Tools issues with single bit buses downstream
assign fdc_pte_cycle2     = fdc_pte_cycle[2];   // Tools issues with single bit buses downstream

assign fdc_pe_cmux_sel    =   fdc_pe_cycle[1]   | fdc_pe_cycle[2];

// For neg B, left shift by 1 to compensate for 'n-1' shift amount
assign fdc_pe_smux_sel[0] =   fdc_pe_cycle[1]   & fdd_pe_clth[63] & control_lth[0];
assign fdc_pe_smux_sel[1] =   fdc_pe_cycle[1]   | fdc_pe_cycle[2];
assign fdc_pe_smux_sel[2] =   fac_div_valid_fx1 & ~fac_divq_valid_fx1;

assign     pe_hmux_sel[0] = ( fac_div_valid_fx1 & ~fac_div_control_fx1[2] &  fac_div_control_fx1[1] & ~fac_divq_valid_fx1) |
                            ( fac_div_valid_fx1 &        ~qcontrol_fx1[2] &         qcontrol_fx1[1] &  fac_divq_valid_fx1);
assign     pe_hmux_sel[1] = ( fac_div_valid_fx1 & ~fac_div_control_fx1[2] & ~fac_div_control_fx1[1] & ~fac_divq_valid_fx1) |
                            ( fac_div_valid_fx1 &        ~qcontrol_fx1[2] &        ~qcontrol_fx1[1] &  fac_divq_valid_fx1);
assign     pe_hmux_sel[2] =  ~fdc_pe_cycle[1]   &  valid_lth;


// *** Integer Overflow Detection ***

// fdc_idiv_ctl
      //  3210
      //  ----
      //  0001 : 8000 0000 0000 0000  ovfl_64x
      //  0010 : FFFF FFFF 8000 0000  ovfl_32n
      //  0100 : 0000 0000 7FFF FFFF  ovfl_32p
      //  1000 : 0000 0000 FFFF FFFF  ovfl_32u


// For 64-bit divide, the only OVFL condition exits is :  neg max / -1
// This results in a constant of "8000 0000 0000 0000" defined on pg 196.

assign b_neg_one       =  fdc_pe_cycle[2] & fdc_bsign_lth &
                         (pe_xsht_amt[6:0] == 7'b0111111);

assign a_neg_max       =  fdc_pe_cycle[3] & fdc_asign_lth &
                         (pe_xsht_amt[6:0] == 7'b1111111);  // xsht_amt is inverted by cycle[3]

assign ovfl_64_in      = ( a_neg_max      &  b_neg_one_lth & ~div_flush_lth) |  // 64-bit divide
                         (~finish_raw     &  ovfl_64_lth   & ~div_flush_lth);

assign fdc_idiv_ctl[0] =  fdc_pte_cycle[0] & ovfl_64_lth   & control_lth[1];

// For 64-bit/32-bit, three OVFL constants are possible.  (see pages 152-154)
// For - signed : if quotient <= (-2^31 - 1) then result = FFFF FFFF 8000 0000 (-2^31    )
// For + signed : if quotient >= ( 2^31    ) then result = 0000 0000 7FFF FFFF ( 2^31 - 1)
// For unsigned : if quotient >= ( 2^32    ) then result = 0000 0000 FFFF FFFF ( 2^32 - 1)

assign fdc_idiv_ctl[1] =  fdc_pte_cycle[0] &  (control_lth[2:0] == 3'b101) &
                          fdd_result[63]   &  (fdd_result[62:31] != 32'hFFFFFFFF)  & ~ovfl_64_lth;

assign fdc_idiv_ctl[2] =  fdc_pte_cycle[0] &  (control_lth[2:0] == 3'b101) &
                       ((~fdd_result[63]   &  (fdd_result[62:31] != 32'h00000000)) |  ovfl_64_lth);

assign fdc_idiv_ctl[3] =  fdc_pte_cycle[0] &  (control_lth[2:0] == 3'b100) &
                                              (fdd_result[63:32] != 32'h00000000);

assign fdc_idiv_ctl[4] =  fdc_pte_cycle[0] & ~control_lth[2] & ~flt_shift_sel_ & ~fdc_flt_increment;

assign fdc_icc_v_early = | fdc_idiv_ctl[3:0];


fgu_fdc_ctl_msff_ctl_macro__width_4     ovlf_lth         (
 .scan_in(ovlf_lth_scanin),
 .scan_out(ovlf_lth_scanout),
 .l1clk( l1clk_pm1                                                       ),
 .din  ({ovfl_64_in  , b_neg_one     , fdd_cla_zero64_ , fdd_cla_zero32_}),
 .dout ({ovfl_64_lth , b_neg_one_lth , cla_zero64_lth_ , cla_zero32_lth_}),
  .siclk(siclk),
  .soclk(soclk));


assign fdc_xicc_z_early[1] = ~cla_zero64_lth_;
assign fdc_xicc_z_early[0] = ~cla_zero32_lth_ & ~fdc_icc_v_early;


// *** Engine stall ***

// The INTEGER divide has a variable timing dependent on the operand data.
// The divide must provide a STALL signal to the issue logic to ensure no
// collision on the shared FGU to EXU bus.  The timing of the IDIV_STALL
// is given below.
//
//  t-1     t     t+1    t+2    t+3    t+4    t+5    t+6    t+7
//  -----|------|------|------|------|------|------|------|------|
// idiv  | idiv |  D   |  E   | fx1  | fx2  | fx3  | fx4  | fx5  |
// stall | stall|      |      |      |      |      |      |      |
// in    |      |      |      |engine|pte[3]|pte[2]|pte[1]|finish|
//       |      |      |      | stop |      |      |      |      |
//       |      |      |      |      |      |      |      |      |
//   8/9 | 6/7  | 4/5  | 2/3  | 0/1  |      |      |      |      |
//       |      |      |      |      |      |      |      |      |
//   +0  |  +1  |  +2  |  +3  |  +4  |      |      |      |      |
//
//
// The Floating Point Divide and Square Root has a fixed latency.
// The divide must provide a STALL signal to the issue logic to ensure no
// collision at the W2 port to the FRF.
//
//  t-1     t     t+1    t+2    t+3    t+4
//  -----|------|------|------|------|------|
// fdiv  | fdiv |  D   |  E   |  M   | fb/B |
// stall | stall|      |      |      |      |
// in    |      |      |      |      |      |
//       |      |      |      |      |      |
//       |pte[3]|pte[2]|pte[1]|pte[0]|finish|
//       |      |      |      |      |      |
//       |      |      |      |      |      |
//   8/9 | 6/7  |      |      |      |      |
//       |      |      |      |      |      |
//   +0  |  +1  |      |      |      |      |
//
//
// at engine_start :
//                    stall_cnt
//                    3 2 1 0
//                    -------
//    ndq = neg   ->  1 1 1 1
//    ndq = 0/1   ->  1 1 1 1    [6:1] = 000 000
//    ndq = 2/3   ->  0 1 1 1          = 000 001
//    ndq = 4/5   ->  0 0 1 1          = 000 010
//    ndq = 6/7   ->  0 0 0 1          = 000 011
//    ndq >=8/9   ->  0 0 0 0          = 000 100   idiv_stall_in


assign fdiv_stall_in     = ((pe_ndq[6:1] == 6'b000000) & engine_on & ~control_lth[2]                  & ~stall_hold);

assign idiv_stall_in     = ((pe_ndq[6:1] == 6'b000100) & engine_on &  control_lth[2] & control_lth[1] & ~stall_hold & ~div_flush_lth) |
                           ((pe_ndq[6:3] == 4'b0000  ) & engine_on &  control_lth[2]                  & ~stall_hold & ~div_flush_lth) |
                           ( pe_ndq[7]                 & engine_on &  control_lth[2]                  & ~stall_hold & ~div_flush_lth);


assign stall_hold_in     = (fdiv_stall_in & ~div_flush_lth              ) |
                           (idiv_stall_in & ~div_flush_lth              ) |
                           (stall_hold    & ~div_flush_lth & ~finish_raw);


assign stall_cnt_raw[3]  = (pe_ndq[6:1] == 6'b000000) |
                           (pe_ndq[7]               );

assign stall_cnt_raw[2]  = (pe_ndq[6:2] == 5'b00000 ) |
                           (pe_ndq[7]               );

assign stall_cnt_raw[1]  = (pe_ndq[6:1] == 6'b000010) |
                           (pe_ndq[6:2] == 5'b00000 ) |
                           (pe_ndq[7]               );

assign stall_cnt_raw[0]  = (pe_ndq[6:3] == 4'b0000  ) |
                           (pe_ndq[7]               );

assign stall_cnt_in[3:0] = ({4{ fdc_pe_cycle[3] &  control_lth[1] & ~div_flush_lth}} &       stall_cnt_raw[3:0] ) |  // INT64 engine_start
                           ({4{ fdc_pe_cycle[3] & ~control_lth[1] & ~div_flush_lth}} & {1'b0,stall_cnt_raw[3:1]}) |  // INT32 engine_start
                           ({4{~fdc_pe_cycle[3] & ~finish_lth     & ~div_flush_lth}} &       stall_cnt[3:0]     ) |
                           ({4{                    finish_lth     & ~div_flush_lth}} & {1'b0,stall_cnt[3:1]    });

fgu_fdc_ctl_msff_ctl_macro__width_8     stall_lth        (
 .scan_in(stall_lth_scanin),
 .scan_out(stall_lth_scanout),
 .l1clk( l1clk_pm1                                                                           ),
 .din  ({finish_lth_in , fdiv_stall_in  , idiv_stall_in  , stall_hold_in , stall_cnt_in[3:0]}),
 .dout ({finish_lth    , fgu_fdiv_stall , idiv_stall_lth , stall_hold    , stall_cnt[3:0]   }),
  .siclk(siclk),
  .soclk(soclk));


assign fgu_idiv_stall[1]   =   control_lth[4] & idiv_stall_lth;         // Threads 4-7
assign fgu_idiv_stall[0]   =  ~control_lth[4] & idiv_stall_lth;         // Threads 0-3


assign finish_lth_in         =  (fdc_pte_cycle[0]                                                                     & ~div_flush_lth) |
                                (fdc_pte_cycle[1] &                                  control_lth[2] &  control_lth[1] & ~div_flush_lth) |
                                (finish_lth       &  stall_cnt[0]                                                     & ~div_flush_lth);


assign finish_raw            =   finish_lth       & ~stall_cnt[0];

assign fdc_finish_int_early  =  (fdc_pte_cycle[0] & ~stall_cnt[0]                 &  control_lth[2]                  ) |
                                (fdc_pte_cycle[1] & ~stall_cnt[0]                 &  control_lth[2] &  control_lth[1]) |
                                (finish_lth       &  stall_cnt[0] & ~stall_cnt[1] &  control_lth[2]                  );

assign fdc_finish_fltd_early = fdc_pte_cycle[0] &                                   ~control_lth[2] &  control_lth[1];
assign fdc_finish_flts_early = fdc_pte_cycle[0] &                                   ~control_lth[2] & ~control_lth[1];


assign fdc_pte_stall_      =   fdc_pte_cycle[1] |
                               fdc_idiv_ctl[0]  |  fdc_idiv_ctl[1] |  fdc_idiv_ctl[2] |  fdc_idiv_ctl[3] |
                              (fdc_pte_cycle[0] & ~flt_shift_sel_ & ~control_lth[2]) |
                              (fdc_pte_cycle[0] &  fdc_flt_increment               );


// *** State data ***

assign control_in[4:0]    = ({5{ fac_div_valid_fx1 & ~fac_divq_valid_fx1}} & fac_div_control_fx1[4:0]) |
                            ({5{ fac_div_valid_fx1 &  fac_divq_valid_fx1}} &        qcontrol_fx1[4:0]) |
                            ({5{~fac_div_valid_fx1                      }} &         control_lth[4:0]);

assign asign_in           = ( fdc_pe_cycle[2] & fdd_pe_clth[63] &  control_lth[0]   ) |
                            (~fdc_pe_cycle[2] & fdc_asign_lth   & ~fac_div_valid_fx1);

assign bsign_in           = ( fdc_pe_cycle[1] & fdd_pe_clth[63] &  control_lth[0]   ) |
                            (~fdc_pe_cycle[1] & fdc_bsign_lth   & ~fac_div_valid_fx1);

assign ndq_odd_in         = ~pe_ndq[7] & pe_ndq[0];

fgu_fdc_ctl_msff_ctl_macro__width_9     data_lth         (
 .scan_in(data_lth_scanin),
 .scan_out(data_lth_scanout),
 .l1clk( l1clk_pm1                                                              ),
 .din  ({control_in[4:0] ,asign_in     ,bsign_in     ,ndq_odd_in ,ndq_odd_lth  }),
 .dout ({control_lth[4:0],fdc_asign_lth,fdc_bsign_lth,ndq_odd_lth,ndq_odd_2lth }),
  .siclk(siclk),
  .soclk(soclk));

assign fdc_pte_clasel[0]  =  fdc_pte_cycle[3] & ~ndq_odd_lth;
assign fdc_pte_clasel[1]  =  fdc_pte_cycle[3] &  ndq_odd_lth;

assign fdc_pte_qsel[0]    =  control_lth[2] & ~ndq_odd_2lth;    // INT even
assign fdc_pte_qsel[1]    =  control_lth[2] &  ndq_odd_2lth;    // INT odd
assign fdc_pte_qsel[2]    = ~control_lth[2] &  control_lth[1];  // FLT DP


assign fdc_bsign_lth_     = ~fdc_bsign_lth;
assign fdc_pte_csa_cin    =  fdc_asign_lth ^ fdc_bsign_lth;


// * * * * * * * * * * * Interface to engine * * * * * * * * * * *

// integer select by "fdc_pe_cycle[3]"

// fac_div_control_fx1[3:0] ==
//   [3:0]  :
//    0000  : Float Divide Single
//    0010  : Float Divide Double
//    0100  : Integer Unsigned - 32 bit
//    0101  : Integer   Signed - 32 bit
//    0110  : Integer Unsigned - 64 bit
//    0111  : Integer   Signed - 64 bit
//    1000  : Float SQRT   Single
//    1010  : Float SQRT   Double


assign fdc_ie_rmux_sel[0] = ~fac_div_valid_fx1;                                                  // integer
assign fdc_ie_rmux_sel[1] =  fac_div_valid_fx1 &  fac_div_control_fx1[3] & ~fac_divq_valid_fx1;  // float sqrt
assign fdc_ie_rmux_sel[2] =  fac_div_valid_fx1 & ~fac_div_control_fx1[3] & ~fac_divq_valid_fx1;  // float div
assign fdc_ie_rmux_sel[3] =  fac_div_valid_fx1 &         qcontrol_fx1[3] &  fac_divq_valid_fx1;  // float sqrt
assign fdc_ie_rmux_sel[4] =  fac_div_valid_fx1 &        ~qcontrol_fx1[3] &  fac_divq_valid_fx1;  // float div

assign fdc_ie_dmux_sel[0] = ~fac_div_valid_fx1;                                                  // integer
assign fdc_ie_dmux_sel[1] =  fac_div_valid_fx1 & ~fac_div_control_fx1[3] & ~fac_divq_valid_fx1;  // float div
assign fdc_ie_dmux_sel[2] =  fac_div_valid_fx1 &        ~qcontrol_fx1[3] &  fac_divq_valid_fx1;  // float div


// must be qualified w/ valid so INT is not corrupted by garbage on bus during pe_cycle[3]
assign fdc_ie_fsqrt_valid_even  = (fac_div_valid_fx1 & fac_div_control_fx1[3] &  fpe_rs2_fmt_fx1_b0 & ~fac_divq_valid_fx1) |
                                  (fac_div_valid_fx1 &        qcontrol_fx1[3] &  qcontrol_fx1[5]    &  fac_divq_valid_fx1);

assign fdc_ie_fsqrt_valid_odd   = (fac_div_valid_fx1 & fac_div_control_fx1[3] & ~fpe_rs2_fmt_fx1_b0 & ~fac_divq_valid_fx1) |
                                  (fac_div_valid_fx1 &        qcontrol_fx1[3] & ~qcontrol_fx1[5]    &  fac_divq_valid_fx1);

assign fdc_ie_fsqrt_valid_even_ = ~fdc_ie_fsqrt_valid_even;
assign fdc_ie_fsqrt_valid_odd_  = ~fdc_ie_fsqrt_valid_odd;


// * * * * * * * * * * * * start : Integer CNTL0 * * * * * * * * * * * * *

//reg [6:0] cntl0;
//
//always @ (fdd_pe_clth[63:0])
//
// begin
//
// casex (fdd_pe_clth[63:0])
//  64'b1???????????????????????????????????????????????????????????????: cntl0[6:0]  =  7'b0000000;
//  64'b01??????????????????????????????????????????????????????????????: cntl0[6:0]  =  7'b0000001;
//  64'b001?????????????????????????????????????????????????????????????: cntl0[6:0]  =  7'b0000010;
//  64'b0001????????????????????????????????????????????????????????????: cntl0[6:0]  =  7'b0000011;
//  64'b00001???????????????????????????????????????????????????????????: cntl0[6:0]  =  7'b0000100;
//  64'b000001??????????????????????????????????????????????????????????: cntl0[6:0]  =  7'b0000101;
//  64'b0000001?????????????????????????????????????????????????????????: cntl0[6:0]  =  7'b0000110;
//  64'b00000001????????????????????????????????????????????????????????: cntl0[6:0]  =  7'b0000111;
//
//  64'b000000001???????????????????????????????????????????????????????: cntl0[6:0]  =  7'b0001000;
//  64'b0000000001??????????????????????????????????????????????????????: cntl0[6:0]  =  7'b0001001;
//  64'b00000000001?????????????????????????????????????????????????????: cntl0[6:0]  =  7'b0001010;
//  64'b000000000001????????????????????????????????????????????????????: cntl0[6:0]  =  7'b0001011;
//  64'b0000000000001???????????????????????????????????????????????????: cntl0[6:0]  =  7'b0001100;
//  64'b00000000000001??????????????????????????????????????????????????: cntl0[6:0]  =  7'b0001101;
//  64'b000000000000001?????????????????????????????????????????????????: cntl0[6:0]  =  7'b0001110;
//  64'b0000000000000001????????????????????????????????????????????????: cntl0[6:0]  =  7'b0001111;
//
//  64'b00000000000000001???????????????????????????????????????????????: cntl0[6:0]  =  7'b0010000;
//  64'b000000000000000001??????????????????????????????????????????????: cntl0[6:0]  =  7'b0010001;
//  64'b0000000000000000001?????????????????????????????????????????????: cntl0[6:0]  =  7'b0010010;
//  64'b00000000000000000001????????????????????????????????????????????: cntl0[6:0]  =  7'b0010011;
//  64'b000000000000000000001???????????????????????????????????????????: cntl0[6:0]  =  7'b0010100;
//  64'b0000000000000000000001??????????????????????????????????????????: cntl0[6:0]  =  7'b0010101;
//  64'b00000000000000000000001?????????????????????????????????????????: cntl0[6:0]  =  7'b0010110;
//  64'b000000000000000000000001????????????????????????????????????????: cntl0[6:0]  =  7'b0010111;
//
//  64'b0000000000000000000000001???????????????????????????????????????: cntl0[6:0]  =  7'b0011000;
//  64'b00000000000000000000000001??????????????????????????????????????: cntl0[6:0]  =  7'b0011001;
//  64'b000000000000000000000000001?????????????????????????????????????: cntl0[6:0]  =  7'b0011010;
//  64'b0000000000000000000000000001????????????????????????????????????: cntl0[6:0]  =  7'b0011011;
//  64'b00000000000000000000000000001???????????????????????????????????: cntl0[6:0]  =  7'b0011100;
//  64'b000000000000000000000000000001??????????????????????????????????: cntl0[6:0]  =  7'b0011101;
//  64'b0000000000000000000000000000001?????????????????????????????????: cntl0[6:0]  =  7'b0011110;
//  64'b00000000000000000000000000000001????????????????????????????????: cntl0[6:0]  =  7'b0011111;
//
//  64'b000000000000000000000000000000001???????????????????????????????: cntl0[6:0]  =  7'b0100000;
//  64'b0000000000000000000000000000000001??????????????????????????????: cntl0[6:0]  =  7'b0100001;
//  64'b00000000000000000000000000000000001?????????????????????????????: cntl0[6:0]  =  7'b0100010;
//  64'b000000000000000000000000000000000001????????????????????????????: cntl0[6:0]  =  7'b0100011;
//  64'b0000000000000000000000000000000000001???????????????????????????: cntl0[6:0]  =  7'b0100100;
//  64'b00000000000000000000000000000000000001??????????????????????????: cntl0[6:0]  =  7'b0100101;
//  64'b000000000000000000000000000000000000001?????????????????????????: cntl0[6:0]  =  7'b0100110;
//  64'b0000000000000000000000000000000000000001????????????????????????: cntl0[6:0]  =  7'b0100111;
//
//  64'b00000000000000000000000000000000000000001???????????????????????: cntl0[6:0]  =  7'b0101000;
//  64'b000000000000000000000000000000000000000001??????????????????????: cntl0[6:0]  =  7'b0101001;
//  64'b0000000000000000000000000000000000000000001?????????????????????: cntl0[6:0]  =  7'b0101010;
//  64'b00000000000000000000000000000000000000000001????????????????????: cntl0[6:0]  =  7'b0101011;
//  64'b000000000000000000000000000000000000000000001???????????????????: cntl0[6:0]  =  7'b0101100;
//  64'b0000000000000000000000000000000000000000000001??????????????????: cntl0[6:0]  =  7'b0101101;
//  64'b00000000000000000000000000000000000000000000001?????????????????: cntl0[6:0]  =  7'b0101110;
//  64'b000000000000000000000000000000000000000000000001????????????????: cntl0[6:0]  =  7'b0101111;
//
//  64'b0000000000000000000000000000000000000000000000001???????????????: cntl0[6:0]  =  7'b0110000;
//  64'b00000000000000000000000000000000000000000000000001??????????????: cntl0[6:0]  =  7'b0110001;
//  64'b000000000000000000000000000000000000000000000000001?????????????: cntl0[6:0]  =  7'b0110010;
//  64'b0000000000000000000000000000000000000000000000000001????????????: cntl0[6:0]  =  7'b0110011;
//  64'b00000000000000000000000000000000000000000000000000001???????????: cntl0[6:0]  =  7'b0110100;
//  64'b000000000000000000000000000000000000000000000000000001??????????: cntl0[6:0]  =  7'b0110101;
//  64'b0000000000000000000000000000000000000000000000000000001?????????: cntl0[6:0]  =  7'b0110110;
//  64'b00000000000000000000000000000000000000000000000000000001????????: cntl0[6:0]  =  7'b0110111;
//
//  64'b000000000000000000000000000000000000000000000000000000001???????: cntl0[6:0]  =  7'b0111000;
//  64'b0000000000000000000000000000000000000000000000000000000001??????: cntl0[6:0]  =  7'b0111001;
//  64'b00000000000000000000000000000000000000000000000000000000001?????: cntl0[6:0]  =  7'b0111010;
//  64'b000000000000000000000000000000000000000000000000000000000001????: cntl0[6:0]  =  7'b0111011;
//  64'b0000000000000000000000000000000000000000000000000000000000001???: cntl0[6:0]  =  7'b0111100;
//  64'b00000000000000000000000000000000000000000000000000000000000001??: cntl0[6:0]  =  7'b0111101;
//  64'b000000000000000000000000000000000000000000000000000000000000001?: cntl0[6:0]  =  7'b0111110;
//  64'b0000000000000000000000000000000000000000000000000000000000000001: cntl0[6:0]  =  7'b0111111;
//
//  64'b0000000000000000000000000000000000000000000000000000000000000000: cntl0[6:0]  =  7'b1000000;
//
//  default:                                                              cntl0[6:0]  =  7'bxxxxxxx;
//
// endcase
//
//end


// The real count leading zero (CNTL0) circuit must be coded at the gate level.
// For each 8-bit byte, a 3-bit count and an "all zero" will be computed.  The
// "all zero" will then be used to find which byte contains the leading 1.
// The 3-bit count from each byte will be muxed using those "all zero" controls
// to form the 3 LSB's of the CNTL0.  The upper 4 bits of the CNTL0 are
// computed directly from the byte "all zero" controls.  See the truth tables
// below for more details.
//
//                                    byte
//         clth[7:0]    |  cnt[2:0]   zero
//     -----------------|-----------------
//     1 x x x x x x x  |   0 0 0      0
//     0 1 x x x x x x  |   0 0 1      0
//     0 0 1 x x x x x  |   0 1 0      0
//     0 0 0 1 x x x x  |   0 1 1      0
//     0 0 0 0 1 x x x  |   1 0 0      0
//     0 0 0 0 0 1 x x  |   1 0 1      0
//     0 0 0 0 0 0 1 x  |   1 1 0      0
//     0 0 0 0 0 0 0 1  |   1 1 1      0
//     0 0 0 0 0 0 0 0  |   0 0 0      1
//
//
//          Byte Zero_
//    z7 z6 z5 z4 z3 z2 z1 z0  |   cnt[6:3]
//    -------------------------|-------------
//    1  x  x  x  x  x  x  x   |   0 0 0 0
//    0  1  x  x  x  x  x  x   |   0 0 0 1
//    0  0  1  x  x  x  x  x   |   0 0 1 0
//    0  0  0  1  x  x  x  x   |   0 0 1 1
//    0  0  0  0  1  x  x  x   |   0 1 0 0
//    0  0  0  0  0  1  x  x   |   0 1 0 1
//    0  0  0  0  0  0  1  x   |   0 1 1 0
//    0  0  0  0  0  0  0  1   |   0 1 1 1
//    0  0  0  0  0  0  0  0   |   1 x x x (divide ENDS!)
//
//
// Byte Zero_ is an 8-way OR of all bits in that byte.
// This can be accomplished by 4 * Nor2 + Nand4
//
// In order to compute the 3-bit count, we must further
// divide the byte down into an upper and lower half.
//
// Estimated critical path :
//    NOR2 + NAND4 + PE(3->4) + MUX4 + MUX2 + MUX3(merge CNTL0 + CNTL1)


assign clth[63:0] = fdd_pe_clth[63:0];


// ************************ BYTE 0 => 07:00 **************************

assign b0_nor_76  = ~(clth[7] | clth[6]);
assign b0_nor_54  = ~(clth[5] | clth[4]);
assign b0_nor_32  = ~(clth[3] | clth[2]);
assign b0_nor_10  = ~(clth[1] | clth[0]);

assign b0_zeroh_  = ~(b0_nor_76 & b0_nor_54);
assign b0_zerol_  = ~(b0_nor_32 & b0_nor_10);
assign b0_zero_   = ~(b0_nor_76 & b0_nor_54 & b0_nor_32 & b0_nor_10);

assign b0_cnth[0] =  (~clth[7] &  clth[6]                        ) |
                     (~clth[7] &             ~clth[5] &  clth[4]);

assign b0_cnth[1] =  (~clth[7] & ~clth[6] &  clth[5]            ) |
                     (~clth[7] & ~clth[6] &              clth[4]);

assign b0_cntl[0] =  (~clth[3] &  clth[2]                        ) |
                     (~clth[3] &             ~clth[1] &  clth[0]);

assign b0_cntl[1] =  (~clth[3] & ~clth[2] &  clth[1]            ) |
                     (~clth[3] & ~clth[2] &              clth[0]);

assign b0_cnt[0]  =  ( b0_zeroh_ & b0_cnth[0]) |
                     (~b0_zeroh_ & b0_cntl[0]);

assign b0_cnt[1]  =  ( b0_zeroh_ & b0_cnth[1]) |
                     (~b0_zeroh_ & b0_cntl[1]);

assign b0_cnt[2]  =  (~b0_zeroh_ & b0_zerol_);


// ************************ BYTE 1 => 15:08 **************************

assign b1_nor_76  = ~(clth[15] | clth[14]);
assign b1_nor_54  = ~(clth[13] | clth[12]);
assign b1_nor_32  = ~(clth[11] | clth[10]);
assign b1_nor_10  = ~(clth[9] | clth[8]);

assign b1_zeroh_  = ~(b1_nor_76 & b1_nor_54);
assign b1_zerol_  = ~(b1_nor_32 & b1_nor_10);
assign b1_zero_   = ~(b1_nor_76 & b1_nor_54 & b1_nor_32 & b1_nor_10);

assign b1_cnth[0] =  (~clth[15] &  clth[14]                        ) |
                     (~clth[15] &             ~clth[13] &  clth[12]);

assign b1_cnth[1] =  (~clth[15] & ~clth[14] &  clth[13]            ) |
                     (~clth[15] & ~clth[14] &              clth[12]);

assign b1_cntl[0] =  (~clth[11] &  clth[10]                        ) |
                     (~clth[11] &             ~clth[9] &  clth[8]);

assign b1_cntl[1] =  (~clth[11] & ~clth[10] &  clth[9]            ) |
                     (~clth[11] & ~clth[10] &              clth[8]);

assign b1_cnt[0]  =  ( b1_zeroh_ & b1_cnth[0]) |
                     (~b1_zeroh_ & b1_cntl[0]);

assign b1_cnt[1]  =  ( b1_zeroh_ & b1_cnth[1]) |
                     (~b1_zeroh_ & b1_cntl[1]);

assign b1_cnt[2]  =  (~b1_zeroh_ & b1_zerol_);


// ************************ BYTE 2 => 23:16 **************************

assign b2_nor_76  = ~(clth[23] | clth[22]);
assign b2_nor_54  = ~(clth[21] | clth[20]);
assign b2_nor_32  = ~(clth[19] | clth[18]);
assign b2_nor_10  = ~(clth[17] | clth[16]);

assign b2_zeroh_  = ~(b2_nor_76 & b2_nor_54);
assign b2_zerol_  = ~(b2_nor_32 & b2_nor_10);
assign b2_zero_   = ~(b2_nor_76 & b2_nor_54 & b2_nor_32 & b2_nor_10);

assign b2_cnth[0] =  (~clth[23] &  clth[22]                        ) |
                     (~clth[23] &             ~clth[21] &  clth[20]);

assign b2_cnth[1] =  (~clth[23] & ~clth[22] &  clth[21]            ) |
                     (~clth[23] & ~clth[22] &              clth[20]);

assign b2_cntl[0] =  (~clth[19] &  clth[18]                        ) |
                     (~clth[19] &             ~clth[17] &  clth[16]);

assign b2_cntl[1] =  (~clth[19] & ~clth[18] &  clth[17]            ) |
                     (~clth[19] & ~clth[18] &              clth[16]);

assign b2_cnt[0]  =  ( b2_zeroh_ & b2_cnth[0]) |
                     (~b2_zeroh_ & b2_cntl[0]);

assign b2_cnt[1]  =  ( b2_zeroh_ & b2_cnth[1]) |
                     (~b2_zeroh_ & b2_cntl[1]);

assign b2_cnt[2]  =  (~b2_zeroh_ & b2_zerol_);


// ************************ BYTE 3 => 31:24 **************************

assign b3_nor_76  = ~(clth[31] | clth[30]);
assign b3_nor_54  = ~(clth[29] | clth[28]);
assign b3_nor_32  = ~(clth[27] | clth[26]);
assign b3_nor_10  = ~(clth[25] | clth[24]);

assign b3_zeroh_  = ~(b3_nor_76 & b3_nor_54);
assign b3_zerol_  = ~(b3_nor_32 & b3_nor_10);
assign b3_zero_   = ~(b3_nor_76 & b3_nor_54 & b3_nor_32 & b3_nor_10);

assign b3_cnth[0] =  (~clth[31] &  clth[30]                        ) |
                     (~clth[31] &             ~clth[29] &  clth[28]);

assign b3_cnth[1] =  (~clth[31] & ~clth[30] &  clth[29]            ) |
                     (~clth[31] & ~clth[30] &              clth[28]);

assign b3_cntl[0] =  (~clth[27] &  clth[26]                        ) |
                     (~clth[27] &             ~clth[25] &  clth[24]);

assign b3_cntl[1] =  (~clth[27] & ~clth[26] &  clth[25]            ) |
                     (~clth[27] & ~clth[26] &              clth[24]);

assign b3_cnt[0]  =  ( b3_zeroh_ & b3_cnth[0]) |
                     (~b3_zeroh_ & b3_cntl[0]);

assign b3_cnt[1]  =  ( b3_zeroh_ & b3_cnth[1]) |
                     (~b3_zeroh_ & b3_cntl[1]);

assign b3_cnt[2]  =  (~b3_zeroh_ & b3_zerol_);


// ************************ BYTE 4 => 39:32 **************************

assign b4_nor_76  = ~(clth[39] | clth[38]);
assign b4_nor_54  = ~(clth[37] | clth[36]);
assign b4_nor_32  = ~(clth[35] | clth[34]);
assign b4_nor_10  = ~(clth[33] | clth[32]);

assign b4_zeroh_  = ~(b4_nor_76 & b4_nor_54);
assign b4_zerol_  = ~(b4_nor_32 & b4_nor_10);
assign b4_zero_   = ~(b4_nor_76 & b4_nor_54 & b4_nor_32 & b4_nor_10);

assign b4_cnth[0] =  (~clth[39] &  clth[38]                        ) |
                     (~clth[39] &             ~clth[37] &  clth[36]);

assign b4_cnth[1] =  (~clth[39] & ~clth[38] &  clth[37]            ) |
                     (~clth[39] & ~clth[38] &              clth[36]);

assign b4_cntl[0] =  (~clth[35] &  clth[34]                        ) |
                     (~clth[35] &             ~clth[33] &  clth[32]);

assign b4_cntl[1] =  (~clth[35] & ~clth[34] &  clth[33]            ) |
                     (~clth[35] & ~clth[34] &              clth[32]);

assign b4_cnt[0]  =  ( b4_zeroh_ & b4_cnth[0]) |
                     (~b4_zeroh_ & b4_cntl[0]);

assign b4_cnt[1]  =  ( b4_zeroh_ & b4_cnth[1]) |
                     (~b4_zeroh_ & b4_cntl[1]);

assign b4_cnt[2]  =  (~b4_zeroh_ & b4_zerol_);


// ************************ BYTE 5 => 47:40 **************************

assign b5_nor_76  = ~(clth[47] | clth[46]);
assign b5_nor_54  = ~(clth[45] | clth[44]);
assign b5_nor_32  = ~(clth[43] | clth[42]);
assign b5_nor_10  = ~(clth[41] | clth[40]);

assign b5_zeroh_  = ~(b5_nor_76 & b5_nor_54);
assign b5_zerol_  = ~(b5_nor_32 & b5_nor_10);
assign b5_zero_   = ~(b5_nor_76 & b5_nor_54 & b5_nor_32 & b5_nor_10);

assign b5_cnth[0] =  (~clth[47] &  clth[46]                        ) |
                     (~clth[47] &             ~clth[45] &  clth[44]);

assign b5_cnth[1] =  (~clth[47] & ~clth[46] &  clth[45]            ) |
                     (~clth[47] & ~clth[46] &              clth[44]);

assign b5_cntl[0] =  (~clth[43] &  clth[42]                        ) |
                     (~clth[43] &             ~clth[41] &  clth[40]);

assign b5_cntl[1] =  (~clth[43] & ~clth[42] &  clth[41]            ) |
                     (~clth[43] & ~clth[42] &              clth[40]);

assign b5_cnt[0]  =  ( b5_zeroh_ & b5_cnth[0]) |
                     (~b5_zeroh_ & b5_cntl[0]);

assign b5_cnt[1]  =  ( b5_zeroh_ & b5_cnth[1]) |
                     (~b5_zeroh_ & b5_cntl[1]);

assign b5_cnt[2]  =  (~b5_zeroh_ & b5_zerol_);


// ************************ BYTE 6 => 55:48 **************************

assign b6_nor_76  = ~(clth[55] | clth[54]);
assign b6_nor_54  = ~(clth[53] | clth[52]);
assign b6_nor_32  = ~(clth[51] | clth[50]);
assign b6_nor_10  = ~(clth[49] | clth[48]);

assign b6_zeroh_  = ~(b6_nor_76 & b6_nor_54);
assign b6_zerol_  = ~(b6_nor_32 & b6_nor_10);
assign b6_zero_   = ~(b6_nor_76 & b6_nor_54 & b6_nor_32 & b6_nor_10);

assign b6_cnth[0] =  (~clth[55] &  clth[54]                        ) |
                     (~clth[55] &             ~clth[53] &  clth[52]);

assign b6_cnth[1] =  (~clth[55] & ~clth[54] &  clth[53]            ) |
                     (~clth[55] & ~clth[54] &              clth[52]);

assign b6_cntl[0] =  (~clth[51] &  clth[50]                        ) |
                     (~clth[51] &             ~clth[49] &  clth[48]);

assign b6_cntl[1] =  (~clth[51] & ~clth[50] &  clth[49]            ) |
                     (~clth[51] & ~clth[50] &              clth[48]);

assign b6_cnt[0]  =  ( b6_zeroh_ & b6_cnth[0]) |
                     (~b6_zeroh_ & b6_cntl[0]);

assign b6_cnt[1]  =  ( b6_zeroh_ & b6_cnth[1]) |
                     (~b6_zeroh_ & b6_cntl[1]);

assign b6_cnt[2]  =  (~b6_zeroh_ & b6_zerol_);


// ************************ BYTE 7 => 63:56 **************************

assign b7_nor_76  = ~(clth[63] | clth[62]);
assign b7_nor_54  = ~(clth[61] | clth[60]);
assign b7_nor_32  = ~(clth[59] | clth[58]);
assign b7_nor_10  = ~(clth[57] | clth[56]);

assign b7_zeroh_  = ~(b7_nor_76 & b7_nor_54);
assign b7_zerol_  = ~(b7_nor_32 & b7_nor_10);
assign b7_zero_   = ~(b7_nor_76 & b7_nor_54 & b7_nor_32 & b7_nor_10);

assign b7_cnth[0] =  (~clth[63] &  clth[62]                        ) |
                     (~clth[63] &             ~clth[61] &  clth[60]);

assign b7_cnth[1] =  (~clth[63] & ~clth[62] &  clth[61]            ) |
                     (~clth[63] & ~clth[62] &              clth[60]);

assign b7_cntl[0] =  (~clth[59] &  clth[58]                        ) |
                     (~clth[59] &             ~clth[57] &  clth[56]);

assign b7_cntl[1] =  (~clth[59] & ~clth[58] &  clth[57]            ) |
                     (~clth[59] & ~clth[58] &              clth[56]);

assign b7_cnt[0]  =  ( b7_zeroh_ & b7_cnth[0]) |
                     (~b7_zeroh_ & b7_cntl[0]);

assign b7_cnt[1]  =  ( b7_zeroh_ & b7_cnth[1]) |
                     (~b7_zeroh_ & b7_cntl[1]);

assign b7_cnt[2]  =  (~b7_zeroh_ & b7_zerol_);


// ************************  Global CNTL0   **************************

// When CNTL0[6] = 1   all other bits become a DON'T CARE

assign b3_0sel     =  b3_zero_                        ;
assign b2_0sel     = ~b3_zero_ &  b2_zero_            ;
assign b1_0sel     = ~b3_zero_ & ~b2_zero_ &  b1_zero_;
assign b0_0sel     = ~b3_zero_ & ~b2_zero_ & ~b1_zero_;

assign cntl0l[4:0] = ({5{b3_0sel}} & {2'b00,b3_cnt[2:0]}) |
                     ({5{b2_0sel}} & {2'b01,b2_cnt[2:0]}) |
                     ({5{b1_0sel}} & {2'b10,b1_cnt[2:0]}) |
                     ({5{b0_0sel}} & {2'b11,b0_cnt[2:0]});


assign b7_0sel     =  b7_zero_                        ;
assign b6_0sel     = ~b7_zero_ &  b6_zero_            ;
assign b5_0sel     = ~b7_zero_ & ~b6_zero_ &  b5_zero_;
assign b4_0sel     = ~b7_zero_ & ~b6_zero_ & ~b5_zero_;

assign cntl0h[4:0] = ({5{b7_0sel}} & {2'b00,b7_cnt[2:0]}) |
                     ({5{b6_0sel}} & {2'b01,b6_cnt[2:0]}) |
                     ({5{b5_0sel}} & {2'b10,b5_cnt[2:0]}) |
                     ({5{b4_0sel}} & {2'b11,b4_cnt[2:0]});

assign cntl0_selh  =  b7_zero_ | b6_zero_ | b5_zero_ | b4_zero_;
assign cntl0_sell  =  b3_zero_ | b2_zero_ | b1_zero_ | b0_zero_;
assign cntl0[6]    = ~(cntl0_selh | cntl0_sell);

assign cntl0[5:0]  = ({6{ cntl0_selh}} & {1'b0, cntl0h[4:0]}) |
                     ({6{~cntl0_selh}} & {1'b1, cntl0l[4:0]});


// * * * * * * * * * * * * End   : Integer CNTL0 * * * * * * * * * * * * *


// * * * * * * * * * * * * Start : Integer CNTL1 * * * * * * * * * * * * *

//reg [6:0] cntl1;
//
//always @ (fdd_pe_clth[63:0])
//
// begin
//
//
// casex (fdd_pe_clth[63:0])
//  64'b0???????????????????????????????????????????????????????????????: cntl1[6:0]  =  7'b0000000;
//
//  64'b10??????????????????????????????????????????????????????????????: cntl1[6:0]  =  7'b0000000;
//  64'b110?????????????????????????????????????????????????????????????: cntl1[6:0]  =  7'b0000001;
//  64'b1110????????????????????????????????????????????????????????????: cntl1[6:0]  =  7'b0000010;
//  64'b11110???????????????????????????????????????????????????????????: cntl1[6:0]  =  7'b0000011;
//  64'b111110??????????????????????????????????????????????????????????: cntl1[6:0]  =  7'b0000100;
//  64'b1111110?????????????????????????????????????????????????????????: cntl1[6:0]  =  7'b0000101;
//  64'b11111110????????????????????????????????????????????????????????: cntl1[6:0]  =  7'b0000110;
//  64'b111111110???????????????????????????????????????????????????????: cntl1[6:0]  =  7'b0000111;
//
//  64'b1111111110??????????????????????????????????????????????????????: cntl1[6:0]  =  7'b0001000;
//  64'b11111111110?????????????????????????????????????????????????????: cntl1[6:0]  =  7'b0001001;
//  64'b111111111110????????????????????????????????????????????????????: cntl1[6:0]  =  7'b0001010;
//  64'b1111111111110???????????????????????????????????????????????????: cntl1[6:0]  =  7'b0001011;
//  64'b11111111111110??????????????????????????????????????????????????: cntl1[6:0]  =  7'b0001100;
//  64'b111111111111110?????????????????????????????????????????????????: cntl1[6:0]  =  7'b0001101;
//  64'b1111111111111110????????????????????????????????????????????????: cntl1[6:0]  =  7'b0001110;
//  64'b11111111111111110???????????????????????????????????????????????: cntl1[6:0]  =  7'b0001111;
//
//  64'b111111111111111110??????????????????????????????????????????????: cntl1[6:0]  =  7'b0010000;
//  64'b1111111111111111110?????????????????????????????????????????????: cntl1[6:0]  =  7'b0010001;
//  64'b11111111111111111110????????????????????????????????????????????: cntl1[6:0]  =  7'b0010010;
//  64'b111111111111111111110???????????????????????????????????????????: cntl1[6:0]  =  7'b0010011;
//  64'b1111111111111111111110??????????????????????????????????????????: cntl1[6:0]  =  7'b0010100;
//  64'b11111111111111111111110?????????????????????????????????????????: cntl1[6:0]  =  7'b0010101;
//  64'b111111111111111111111110????????????????????????????????????????: cntl1[6:0]  =  7'b0010110;
//  64'b1111111111111111111111110???????????????????????????????????????: cntl1[6:0]  =  7'b0010111;
//
//  64'b11111111111111111111111110??????????????????????????????????????: cntl1[6:0]  =  7'b0011000;
//  64'b111111111111111111111111110?????????????????????????????????????: cntl1[6:0]  =  7'b0011001;
//  64'b1111111111111111111111111110????????????????????????????????????: cntl1[6:0]  =  7'b0011010;
//  64'b11111111111111111111111111110???????????????????????????????????: cntl1[6:0]  =  7'b0011011;
//  64'b111111111111111111111111111110??????????????????????????????????: cntl1[6:0]  =  7'b0011100;
//  64'b1111111111111111111111111111110?????????????????????????????????: cntl1[6:0]  =  7'b0011101;
//  64'b11111111111111111111111111111110????????????????????????????????: cntl1[6:0]  =  7'b0011110;
//  64'b111111111111111111111111111111110???????????????????????????????: cntl1[6:0]  =  7'b0011111;
//
//  64'b1111111111111111111111111111111110??????????????????????????????: cntl1[6:0]  =  7'b0100000;
//  64'b11111111111111111111111111111111110?????????????????????????????: cntl1[6:0]  =  7'b0100001;
//  64'b111111111111111111111111111111111110????????????????????????????: cntl1[6:0]  =  7'b0100010;
//  64'b1111111111111111111111111111111111110???????????????????????????: cntl1[6:0]  =  7'b0100011;
//  64'b11111111111111111111111111111111111110??????????????????????????: cntl1[6:0]  =  7'b0100100;
//  64'b111111111111111111111111111111111111110?????????????????????????: cntl1[6:0]  =  7'b0100101;
//  64'b1111111111111111111111111111111111111110????????????????????????: cntl1[6:0]  =  7'b0100110;
//  64'b11111111111111111111111111111111111111110???????????????????????: cntl1[6:0]  =  7'b0100111;
//
//  64'b111111111111111111111111111111111111111110??????????????????????: cntl1[6:0]  =  7'b0101000;
//  64'b1111111111111111111111111111111111111111110?????????????????????: cntl1[6:0]  =  7'b0101001;
//  64'b11111111111111111111111111111111111111111110????????????????????: cntl1[6:0]  =  7'b0101010;
//  64'b111111111111111111111111111111111111111111110???????????????????: cntl1[6:0]  =  7'b0101011;
//  64'b1111111111111111111111111111111111111111111110??????????????????: cntl1[6:0]  =  7'b0101100;
//  64'b11111111111111111111111111111111111111111111110?????????????????: cntl1[6:0]  =  7'b0101101;
//  64'b111111111111111111111111111111111111111111111110????????????????: cntl1[6:0]  =  7'b0101110;
//  64'b1111111111111111111111111111111111111111111111110???????????????: cntl1[6:0]  =  7'b0101111;
//
//  64'b11111111111111111111111111111111111111111111111110??????????????: cntl1[6:0]  =  7'b0110000;
//  64'b111111111111111111111111111111111111111111111111110?????????????: cntl1[6:0]  =  7'b0110001;
//  64'b1111111111111111111111111111111111111111111111111110????????????: cntl1[6:0]  =  7'b0110010;
//  64'b11111111111111111111111111111111111111111111111111110???????????: cntl1[6:0]  =  7'b0110011;
//  64'b111111111111111111111111111111111111111111111111111110??????????: cntl1[6:0]  =  7'b0110100;
//  64'b1111111111111111111111111111111111111111111111111111110?????????: cntl1[6:0]  =  7'b0110101;
//  64'b11111111111111111111111111111111111111111111111111111110????????: cntl1[6:0]  =  7'b0110110;
//  64'b111111111111111111111111111111111111111111111111111111110???????: cntl1[6:0]  =  7'b0110111;
//
//  64'b1111111111111111111111111111111111111111111111111111111110??????: cntl1[6:0]  =  7'b0111000;
//  64'b11111111111111111111111111111111111111111111111111111111110?????: cntl1[6:0]  =  7'b0111001;
//  64'b111111111111111111111111111111111111111111111111111111111110????: cntl1[6:0]  =  7'b0111010;
//  64'b1111111111111111111111111111111111111111111111111111111111110???: cntl1[6:0]  =  7'b0111011;
//  64'b11111111111111111111111111111111111111111111111111111111111110??: cntl1[6:0]  =  7'b0111100;
//  64'b111111111111111111111111111111111111111111111111111111111111110?: cntl1[6:0]  =  7'b0111101;
//  64'b1111111111111111111111111111111111111111111111111111111111111110: cntl1[6:0]  =  7'b0111110;
//  64'b1111111111111111111111111111111111111111111111111111111111111111: cntl1[6:0]  =  7'b0111111;
//
//  default:                                                              cntl1[6:0]  =  7'b0000000;
//
// endcase
//
//end


// The count leading one (CNTL1) here must compute "n-1"
// leading 1's.  To do this, each local byte will receive
// an offset input.
// Note : If clth[63] = 0   then CNTL1 is a DON'T CARE

//                                    byte
//         clth[7:0]    |  cnt[2:0]   ones
//     -----------------|-----------------
//     0 x x x x x x x  |   0 0 0      0
//     1 0 x x x x x x  |   0 0 1      0
//     1 1 0 x x x x x  |   0 1 0      0
//     1 1 1 0 x x x x  |   0 1 1      0
//     1 1 1 1 0 x x x  |   1 0 0      0
//     1 1 1 1 1 0 x x  |   1 0 1      0
//     1 1 1 1 1 1 0 x  |   1 1 0      0
//     1 1 1 1 1 1 1 0  |   1 1 1      0
//     1 1 1 1 1 1 1 1  |   0 0 0      1    (byte 0 is '111' here)
//
//
//          Byte Ones_
//    z7 z6 z5 z4 z3 z2 z1 z0  |   cnt[6:3]
//    -------------------------|-------------
//    1  x  x  x  x  x  x  x   |   0 0 0 0
//    0  1  x  x  x  x  x  x   |   0 0 0 1
//    0  0  1  x  x  x  x  x   |   0 0 1 0
//    0  0  0  1  x  x  x  x   |   0 0 1 1
//    0  0  0  0  1  x  x  x   |   0 1 0 0
//    0  0  0  0  0  1  x  x   |   0 1 0 1
//    0  0  0  0  0  0  1  x   |   0 1 1 0
//    0  0  0  0  0  0  0  1   |   0 1 1 1
//    0  0  0  0  0  0  0  0   |   1 x x x  (divide ENDS!)
//
//
// Estimated critical path :
//    NAND4 + NOR2 + INV + PE(3->4) + MUX4 + MUX2 + MUX3(merge CNTL0 + CNTL1)


// ************************ BYTE 7 => 62:55 **************************

assign b7_nand_74  = ~(clth[62] & clth[61] & clth[60] & clth[59]);
assign b7_nand_30  = ~(clth[58] & clth[57] & clth[56] & clth[55]);

assign b7_ones     = ~(b7_nand_74 | b7_nand_30);
assign b7_ones_    = ~b7_ones;

assign b7_cnt1h[0] =  ( clth[62] & ~clth[61]                        ) |
                      ( clth[62] &              clth[60] & ~clth[59]);

assign b7_cnt1h[1] =  ( clth[62] &  clth[61] & ~clth[60]            ) |
                      ( clth[62] &  clth[61] &             ~clth[59]);

assign b7_cnt1l[0] =  ( clth[58] & ~clth[57]                        ) |
                      ( clth[58] &              clth[56] & ~clth[55]);

assign b7_cnt1l[1] =  ( clth[58] &  clth[57] & ~clth[56]            ) |
                      ( clth[58] &  clth[57] &             ~clth[55]);

assign b7_cnt1[0]  =  ( b7_nand_74 & b7_cnt1h[0]) |
                      (~b7_nand_74 & b7_cnt1l[0]);

assign b7_cnt1[1]  =  ( b7_nand_74 & b7_cnt1h[1]) |
                      (~b7_nand_74 & b7_cnt1l[1]);

assign b7_cnt1[2]  =  (~b7_nand_74 & b7_nand_30);


// ************************ BYTE 6 => 54:47 **************************

assign b6_nand_74  = ~(clth[54] & clth[53] & clth[52] & clth[51]);
assign b6_nand_30  = ~(clth[50] & clth[49] & clth[48] & clth[47]);

assign b6_ones     = ~(b6_nand_74 | b6_nand_30);
assign b6_ones_    = ~b6_ones;

assign b6_cnt1h[0] =  ( clth[54] & ~clth[53]                        ) |
                      ( clth[54] &              clth[52] & ~clth[51]);

assign b6_cnt1h[1] =  ( clth[54] &  clth[53] & ~clth[52]            ) |
                      ( clth[54] &  clth[53] &             ~clth[51]);

assign b6_cnt1l[0] =  ( clth[50] & ~clth[49]                        ) |
                      ( clth[50] &              clth[48] & ~clth[47]);

assign b6_cnt1l[1] =  ( clth[50] &  clth[49] & ~clth[48]            ) |
                      ( clth[50] &  clth[49] &             ~clth[47]);

assign b6_cnt1[0]  =  ( b6_nand_74 & b6_cnt1h[0]) |
                      (~b6_nand_74 & b6_cnt1l[0]);

assign b6_cnt1[1]  =  ( b6_nand_74 & b6_cnt1h[1]) |
                      (~b6_nand_74 & b6_cnt1l[1]);

assign b6_cnt1[2]  =  (~b6_nand_74 & b6_nand_30);


// ************************ BYTE 5 => 46:39 **************************

assign b5_nand_74  = ~(clth[46] & clth[45] & clth[44] & clth[43]);
assign b5_nand_30  = ~(clth[42] & clth[41] & clth[40] & clth[39]);

assign b5_ones     = ~(b5_nand_74 | b5_nand_30);
assign b5_ones_    = ~b5_ones;

assign b5_cnt1h[0] =  ( clth[46] & ~clth[45]                        ) |
                      ( clth[46] &              clth[44] & ~clth[43]);

assign b5_cnt1h[1] =  ( clth[46] &  clth[45] & ~clth[44]            ) |
                      ( clth[46] &  clth[45] &             ~clth[43]);

assign b5_cnt1l[0] =  ( clth[42] & ~clth[41]                        ) |
                      ( clth[42] &              clth[40] & ~clth[39]);

assign b5_cnt1l[1] =  ( clth[42] &  clth[41] & ~clth[40]            ) |
                      ( clth[42] &  clth[41] &             ~clth[39]);

assign b5_cnt1[0]  =  ( b5_nand_74 & b5_cnt1h[0]) |
                      (~b5_nand_74 & b5_cnt1l[0]);

assign b5_cnt1[1]  =  ( b5_nand_74 & b5_cnt1h[1]) |
                      (~b5_nand_74 & b5_cnt1l[1]);

assign b5_cnt1[2]  =  (~b5_nand_74 & b5_nand_30);


// ************************ BYTE 4 => 38:31 **************************

assign b4_nand_74  = ~(clth[38] & clth[37] & clth[36] & clth[35]);
assign b4_nand_30  = ~(clth[34] & clth[33] & clth[32] & clth[31]);

assign b4_ones     = ~(b4_nand_74 | b4_nand_30);
assign b4_ones_    = ~b4_ones;

assign b4_cnt1h[0] =  ( clth[38] & ~clth[37]                        ) |
                      ( clth[38] &              clth[36] & ~clth[35]);

assign b4_cnt1h[1] =  ( clth[38] &  clth[37] & ~clth[36]            ) |
                      ( clth[38] &  clth[37] &             ~clth[35]);

assign b4_cnt1l[0] =  ( clth[34] & ~clth[33]                        ) |
                      ( clth[34] &              clth[32] & ~clth[31]);

assign b4_cnt1l[1] =  ( clth[34] &  clth[33] & ~clth[32]            ) |
                      ( clth[34] &  clth[33] &             ~clth[31]);

assign b4_cnt1[0]  =  ( b4_nand_74 & b4_cnt1h[0]) |
                      (~b4_nand_74 & b4_cnt1l[0]);

assign b4_cnt1[1]  =  ( b4_nand_74 & b4_cnt1h[1]) |
                      (~b4_nand_74 & b4_cnt1l[1]);

assign b4_cnt1[2]  =  (~b4_nand_74 & b4_nand_30);


// ************************ BYTE 3 => 30:23 **************************

assign b3_nand_74  = ~(clth[30] & clth[29] & clth[28] & clth[27]);
assign b3_nand_30  = ~(clth[26] & clth[25] & clth[24] & clth[23]);

assign b3_ones     = ~(b3_nand_74 | b3_nand_30);
assign b3_ones_    = ~b3_ones;

assign b3_cnt1h[0] =  ( clth[30] & ~clth[29]                        ) |
                      ( clth[30] &              clth[28] & ~clth[27]);

assign b3_cnt1h[1] =  ( clth[30] &  clth[29] & ~clth[28]            ) |
                      ( clth[30] &  clth[29] &             ~clth[27]);

assign b3_cnt1l[0] =  ( clth[26] & ~clth[25]                        ) |
                      ( clth[26] &              clth[24] & ~clth[23]);

assign b3_cnt1l[1] =  ( clth[26] &  clth[25] & ~clth[24]            ) |
                      ( clth[26] &  clth[25] &             ~clth[23]);

assign b3_cnt1[0]  =  ( b3_nand_74 & b3_cnt1h[0]) |
                      (~b3_nand_74 & b3_cnt1l[0]);

assign b3_cnt1[1]  =  ( b3_nand_74 & b3_cnt1h[1]) |
                      (~b3_nand_74 & b3_cnt1l[1]);

assign b3_cnt1[2]  =  (~b3_nand_74 & b3_nand_30);


// ************************ BYTE 2 => 22:15 **************************

assign b2_nand_74  = ~(clth[22] & clth[21] & clth[20] & clth[19]);
assign b2_nand_30  = ~(clth[18] & clth[17] & clth[16] & clth[15]);

assign b2_ones     = ~(b2_nand_74 | b2_nand_30);
assign b2_ones_    = ~b2_ones;

assign b2_cnt1h[0] =  ( clth[22] & ~clth[21]                        ) |
                      ( clth[22] &              clth[20] & ~clth[19]);

assign b2_cnt1h[1] =  ( clth[22] &  clth[21] & ~clth[20]            ) |
                      ( clth[22] &  clth[21] &             ~clth[19]);

assign b2_cnt1l[0] =  ( clth[18] & ~clth[17]                        ) |
                      ( clth[18] &              clth[16] & ~clth[15]);

assign b2_cnt1l[1] =  ( clth[18] &  clth[17] & ~clth[16]            ) |
                      ( clth[18] &  clth[17] &             ~clth[15]);

assign b2_cnt1[0]  =  ( b2_nand_74 & b2_cnt1h[0]) |
                      (~b2_nand_74 & b2_cnt1l[0]);

assign b2_cnt1[1]  =  ( b2_nand_74 & b2_cnt1h[1]) |
                      (~b2_nand_74 & b2_cnt1l[1]);

assign b2_cnt1[2]  =  (~b2_nand_74 & b2_nand_30);


// ************************ BYTE 1 => 14:07 **************************

assign b1_nand_74  = ~(clth[14] & clth[13] & clth[12] & clth[11]);
assign b1_nand_30  = ~(clth[10] & clth[9] & clth[8] & clth[7]);

assign b1_ones     = ~(b1_nand_74 | b1_nand_30);
assign b1_ones_    = ~b1_ones;

assign b1_cnt1h[0] =  ( clth[14] & ~clth[13]                        ) |
                      ( clth[14] &              clth[12] & ~clth[11]);

assign b1_cnt1h[1] =  ( clth[14] &  clth[13] & ~clth[12]            ) |
                      ( clth[14] &  clth[13] &             ~clth[11]);

assign b1_cnt1l[0] =  ( clth[10] & ~clth[9]                        ) |
                      ( clth[10] &              clth[8] & ~clth[7]);

assign b1_cnt1l[1] =  ( clth[10] &  clth[9] & ~clth[8]            ) |
                      ( clth[10] &  clth[9] &             ~clth[7]);

assign b1_cnt1[0]  =  ( b1_nand_74 & b1_cnt1h[0]) |
                      (~b1_nand_74 & b1_cnt1l[0]);

assign b1_cnt1[1]  =  ( b1_nand_74 & b1_cnt1h[1]) |
                      (~b1_nand_74 & b1_cnt1l[1]);

assign b1_cnt1[2]  =  (~b1_nand_74 & b1_nand_30);


// ************************ BYTE 0 => 06:00 **************************

// Note : Byte 0 is unique since cnt1[2:0] must be 3'b111 for byte 'all ones' case!
//  64'b1111111111111111111111111111111111111111111111111111111110??????: cntl1[6:0]  =  7'b0111000;
//  64'b11111111111111111111111111111111111111111111111111111111110?????: cntl1[6:0]  =  7'b0111001;
//  64'b111111111111111111111111111111111111111111111111111111111110????: cntl1[6:0]  =  7'b0111010;
//  64'b1111111111111111111111111111111111111111111111111111111111110???: cntl1[6:0]  =  7'b0111011;
//  64'b11111111111111111111111111111111111111111111111111111111111110??: cntl1[6:0]  =  7'b0111100;
//  64'b111111111111111111111111111111111111111111111111111111111111110?: cntl1[6:0]  =  7'b0111101;
//  64'b1111111111111111111111111111111111111111111111111111111111111110: cntl1[6:0]  =  7'b0111110;
//  64'b1111111111111111111111111111111111111111111111111111111111111111: cntl1[6:0]  =  7'b0111111;


assign b0_nand_74  = ~(clth[6] & clth[5] & clth[4] & clth[3]);

assign b0_cnt1h[0] =  ( clth[6] & ~clth[5]                        ) |
                      ( clth[6] &              clth[4] & ~clth[3]);

assign b0_cnt1h[1] =  ( clth[6] &  clth[5] & ~clth[4]            ) |
                      ( clth[6] &  clth[5] &             ~clth[3]);

assign b0_cnt1l[0] =  ( clth[2] & ~clth[1]                        ) |
                      ( clth[2] &              clth[0]            );

assign b0_cnt1l[1] =  ( clth[2] &  clth[1]                        );

assign b0_cnt1[0]  =  ( b0_nand_74 & b0_cnt1h[0]) |
                      (~b0_nand_74 & b0_cnt1l[0]);

assign b0_cnt1[1]  =  ( b0_nand_74 & b0_cnt1h[1]) |
                      (~b0_nand_74 & b0_cnt1l[1]);

assign b0_cnt1[2]  =  (~b0_nand_74             );


// ************************  Global CNTL1   **************************

assign b3_1sel     =  b3_ones_                        ;
assign b2_1sel     = ~b3_ones_ &  b2_ones_            ;
assign b1_1sel     = ~b3_ones_ & ~b2_ones_ &  b1_ones_;
assign b0_1sel     = ~b3_ones_ & ~b2_ones_ & ~b1_ones_;

assign cntl1l[4:0] = ({5{b3_1sel}} & {2'b00,b3_cnt1[2:0]}) |
                     ({5{b2_1sel}} & {2'b01,b2_cnt1[2:0]}) |
                     ({5{b1_1sel}} & {2'b10,b1_cnt1[2:0]}) |
                     ({5{b0_1sel}} & {2'b11,b0_cnt1[2:0]});


assign b7_1sel     =  b7_ones_                        ;
assign b6_1sel     = ~b7_ones_ &  b6_ones_            ;
assign b5_1sel     = ~b7_ones_ & ~b6_ones_ &  b5_ones_;
assign b4_1sel     = ~b7_ones_ & ~b6_ones_ & ~b5_ones_;

assign cntl1h[4:0] = ({5{b7_1sel}} & {2'b00,b7_cnt1[2:0]}) |
                     ({5{b6_1sel}} & {2'b01,b6_cnt1[2:0]}) |
                     ({5{b5_1sel}} & {2'b10,b5_cnt1[2:0]}) |
                     ({5{b4_1sel}} & {2'b11,b4_cnt1[2:0]});


assign cntl1_selh  = b7_ones_ | b6_ones_ | b5_ones_ | b4_ones_;

assign cntl1[5:0]  = ({6{ cntl1_selh}} & {1'b0, cntl1h[4:0]}) |
                     ({6{~cntl1_selh}} & {1'b1, cntl1l[4:0]});


// * * * * * * * * * * * * End   : Integer CNTL1 * * * * * * * * * * * * *


assign xsht_amt_sel10   = ~(fdd_pe_clth[63] & control_lth[0]) & fdc_pe_cycle[1];
assign xsht_amt_sel11   =  (fdd_pe_clth[63] & control_lth[0]) & fdc_pe_cycle[1];
assign xsht_amt_sel20   = ~(fdd_pe_clth[63] & control_lth[0]) & fdc_pe_cycle[2];
assign xsht_amt_sel21   =  (fdd_pe_clth[63] & control_lth[0]) & fdc_pe_cycle[2];

assign xsht_amt_in[7:0] = ({8{engine_on     }} &  8'b11111101       ) |
                          ({8{xsht_amt_sel10}} & {1'b0 , cntl0[6:0]}) |
                          ({8{xsht_amt_sel11}} & {2'b00, cntl1[5:0]}) |
                          ({8{xsht_amt_sel20}} & {1'b1 ,~cntl0[6:0]}) |
                          ({8{xsht_amt_sel21}} & {2'b11,~cntl1[5:0]});


assign pe_ndq[7:0]      = pe_hamt_lth[7:0] + pe_xsht_amt[7:0] + 8'b0000_0001;

assign pe_hamt_in[7:0]  = ({8{    pe_hmux_sel[0]}} & 8'b00110100) |     // FLT DP
                          ({8{    pe_hmux_sel[1]}} & 8'b00010111) |     // FLT SP
                          ({8{fdc_pe_smux_sel[0]}} & 8'b00000001) |     // INT neg B correction
                          ({8{    pe_hmux_sel[2]}} & pe_ndq[7:0]);      // SRT loop counter


fgu_fdc_ctl_msff_ctl_macro__width_8     xsht_lth         (
 .scan_in(xsht_lth_scanin),
 .scan_out(xsht_lth_scanout),
 .l1clk( l1clk_pm1        ),
 .din  ( xsht_amt_in[7:0] ),
 .dout ( pe_xsht_amt[7:0] ),
  .siclk(siclk),
  .soclk(soclk));


fgu_fdc_ctl_msff_ctl_macro__width_8     hamt_lth         (
 .scan_in(hamt_lth_scanin),
 .scan_out(hamt_lth_scanout),
 .l1clk( l1clk_pm1              ),
 .din  ( pe_hamt_in[7:0]        ),
 .dout ( pe_hamt_lth[7:0]       ),
  .siclk(siclk),
  .soclk(soclk));


assign xsht_ctl_in[5:0] = ({6{xsht_amt_sel10}} & cntl0[5:0]) |
                          ({6{xsht_amt_sel11}} & cntl1[5:0]) |
                          ({6{xsht_amt_sel20}} & cntl0[5:0]) |
                          ({6{xsht_amt_sel21}} & cntl1[5:0]);

fgu_fdc_ctl_msff_ctl_macro__width_6     xcntl_lth        (
 .scan_in(xcntl_lth_scanin),
 .scan_out(xcntl_lth_scanout),
 .l1clk( l1clk_pm1            ),
 .din  ( xsht_ctl_in[5:0]     ),
 .dout ( fdc_pe_xsht_ctl[5:0] ),
  .siclk(siclk),
  .soclk(soclk));


// *** Floating Point Rounding ***


assign engine_valid_fx1 =   fac_div_valid_fx1    &  ~fac_divq_valid_fx1;
assign engine_valid_fx2 =  (queue_valid_lth_fx2  &  (fac_div_valid_fx1 & fac_divq_valid_fx1)) | engine_valid_lth_fx2;
assign engine_valid_fx3 =  (queue_valid_lth_fx3  &  (fac_div_valid_fx1 & fac_divq_valid_fx1)) | engine_valid_lth_fx3;

assign queue_valid_fx1  =  ~fac_div_valid_fx1    &   fac_divq_valid_fx1;
assign queue_valid_fx2  =   queue_valid_lth_fx2  & ~(fac_div_valid_fx1 & fac_divq_valid_fx1);
//sign queue_valid_fx3  =   queue_valid_lth_fx3  & ~(fac_div_valid_fx1 & fac_divq_valid_fx1);

assign q2e_fx3p         =  (fac_div_valid_fx1 & fac_divq_valid_fx1) & ~queue_valid_lth_fx2 & ~queue_valid_lth_fx3;


fgu_fdc_ctl_msff_ctl_macro__width_4     xrnd_vld_lth     (
 .scan_in(xrnd_vld_lth_scanin),
 .scan_out(xrnd_vld_lth_scanout),
 .l1clk( l1clk_pm1                                                                         ),
 .din  ({engine_valid_fx1    ,engine_valid_fx2    ,queue_valid_fx1    ,queue_valid_fx2}    ),
 .dout ({engine_valid_lth_fx2,engine_valid_lth_fx3,queue_valid_lth_fx2,queue_valid_lth_fx3}),
  .siclk(siclk),
  .soclk(soclk));


// SPARC v9 : pg 44
//
//   RD  |  Round toward
//  ---  |  ------------
//   00  |  Nearest (even if tie)
//   01  |     0
//   10  |  +INF
//   11  |  -INF


assign eround_mode_in[1:0]= ({2{ engine_valid_fx3            }} & fpc_rd_mode_fx3[1:0]) |
                            ({2{                     q2e_fx3p}} & qround_mode_lth[1:0]) |
                            ({2{~engine_valid_fx3 & ~q2e_fx3p}} & eround_mode_lth[1:0]);

assign e_emin_in          = (    engine_valid_fx3               & fpc_emin_fx3        ) |
                            (                        q2e_fx3p   & q_emin_lth          ) |
                            (   ~engine_valid_fx3 & ~q2e_fx3p   & fdc_emin_lth        );


assign qround_mode_in[1:0]= ({2{ queue_valid_lth_fx3}} & fpc_rd_mode_fx3[1:0]) |
                            ({2{~queue_valid_lth_fx3}} & qround_mode_lth[1:0]);

assign q_emin_in          = (    queue_valid_lth_fx3   & fpc_emin_fx3        ) |
                            (   ~queue_valid_lth_fx3   & q_emin_lth          );


assign float_sign_in      = (    fac_div_valid_fx1 & ~fac_divq_valid_fx1   & incoming_sign_fx1       ) |
                            (    fac_div_valid_fx1 &  fac_divq_valid_fx1   & qcontrol_fx1[6]         ) |
                            (   ~fac_div_valid_fx1                         & float_sign_lth          );

assign fdc_dec_exp_early  =  fdc_pte_cycle[0] & ~control_lth[2] & ~fdd_result[63] & ~flt_sqrte_kill_dec;

assign inexact_in         =  fdc_pte_cycle[0] & ~control_lth[2] & (final_sticky | final_guard);


fgu_fdc_ctl_msff_ctl_macro__width_10    xrnd_lth         (
 .scan_in(xrnd_lth_scanin),
 .scan_out(xrnd_lth_scanout),
 .l1clk( l1clk_pm1                                                                                                                                  ),
 .din  ({float_sign_in  , eround_mode_in[1:0]  , qround_mode_in[1:0]  , fdd_cla_zero64_ , sticky_pte1 , inexact_in      , e_emin_in    , q_emin_in }),
 .dout ({float_sign_lth , eround_mode_lth[1:0] , qround_mode_lth[1:0] , sticky_pte1     , sticky_pte0 , fdc_flt_inexact , fdc_emin_lth , q_emin_lth}),
  .siclk(siclk),
  .soclk(soclk));


assign fdc_flt_round[1]   = ~control_lth[2] & ~control_lth[1];  // SP
assign fdc_flt_round[0]   = ~control_lth[2] &  control_lth[1];  // DP


assign flt_shift_sel_     =   fdd_result[63] |  fdc_emin_lth;


assign final_sticky       = ( control_lth[1] &  flt_shift_sel_ & (fdd_result[9] | sticky_pte0)) |       // DP "1."
                            ( control_lth[1] & ~flt_shift_sel_ & (                 sticky_pte0)) |      // DP "0."
                            (~control_lth[1] &  flt_shift_sel_ & (fdd_result[38] | sticky_pte0)) |      // SP "1."
                            (~control_lth[1] & ~flt_shift_sel_ & (                 sticky_pte0));       // SP "0."

assign final_guard        = ( control_lth[1] &  flt_shift_sel_ &  fdd_result[10]               ) |      // DP "1."
                            ( control_lth[1] & ~flt_shift_sel_ &  fdd_result[9]               ) |       // DP "0."
                            (~control_lth[1] &  flt_shift_sel_ &  fdd_result[39]               ) |      // SP "1."
                            (~control_lth[1] & ~flt_shift_sel_ &  fdd_result[38]               );       // SP "0."

assign final_lsb          = ( control_lth[1] &  flt_shift_sel_ &  fdd_result[11]               ) |      // DP "1."
                            ( control_lth[1] & ~flt_shift_sel_ &  fdd_result[10]               ) |      // DP "0."
                            (~control_lth[1] &  flt_shift_sel_ &  fdd_result[40]               ) |      // SP "1."
                            (~control_lth[1] & ~flt_shift_sel_ &  fdd_result[39]               );       // SP "0."


assign flt_rnd00_en       =  ~control_lth[2] & (eround_mode_lth[1:0] == 2'b00);
assign flt_rnd1x_en       = (~control_lth[2] & (eround_mode_lth[1:0] == 2'b10) & ~float_sign_lth) |
                            (~control_lth[2] & (eround_mode_lth[1:0] == 2'b11) &  float_sign_lth);

assign fdc_flt_increment  = ( flt_rnd00_en  & final_guard & final_sticky            ) |
                            ( flt_rnd00_en  & final_guard &                final_lsb) |
                            ( flt_rnd1x_en  & final_guard                           ) |
                            ( flt_rnd1x_en  &               final_sticky            );


// *** Floating Point Square Root Special Case ***

// For an odd exponent Square Root, the mantissa is shifted one bit position right.
// In most cases, the final result will end up in the form of "0.1".  We then normalize
// this result and decrement the result exponent.  However, if the mantissa is all ONES,
// this does not hold.  If you take the square root of 0.1111111...1 (after the 1-bit shift),
// the result will move closer to 1.00000000.  In the Round to +INF only,
// the rounded result will be 1.000000 and no decrementing of the exponent will occur.

//                                  1=DP , 0=SP              SQRTE
assign fsqrt_fract_all_ones     = (~fac_div_control_fx1[1] & fac_div_control_fx1[3] & fpf_hi_bof_fx1                 ) |
                                  ( fac_div_control_fx1[1] & fac_div_control_fx1[3] & fpf_hi_bof_fx1 & fpf_lo_bof_fx1);

assign fsqrt_special_in         = ( fac_div_valid_fx1 & ~fac_divq_valid_fx1 & fsqrt_fract_all_ones) |
                                  ( fac_div_valid_fx1 &  fac_divq_valid_fx1 & qcontrol_fx1[7]     ) |
                                  (~fac_div_valid_fx1 &                       fsqrt_special_lth   );

fgu_fdc_ctl_msff_ctl_macro__width_1     spec_sqrt_lth    (
 .scan_in(spec_sqrt_lth_scanin),
 .scan_out(spec_sqrt_lth_scanout),
 .l1clk( l1clk_pm1              ),
 .din  ( fsqrt_special_in       ),
 .dout ( fsqrt_special_lth      ),
  .siclk(siclk),
  .soclk(soclk));

assign flt_sqrte_kill_dec       =   fsqrt_special_lth & (eround_mode_lth[1:0] == 2'b10);  // +inf


// *** FDX Custom ***


assign cla_64           = fdd_fdx_din0 ^ fdd_fdx_din1 ^ fdd_fdx_cin64;

assign cin_in_raw       = (~fdc_asign_lth & ~fdc_bsign_lth & ~cla_64                   ) |
                          (~fdc_asign_lth &  fdc_bsign_lth &  cla_64                   ) |
                          ( fdc_asign_lth & ~fdc_bsign_lth & ~cla_64 &  fdd_cla_zero64_) |
                          ( fdc_asign_lth &  fdc_bsign_lth &           ~fdd_cla_zero64_) |
                          ( fdc_asign_lth &  fdc_bsign_lth &  cla_64                   );

assign fdc_fdx_cin_in   = fdc_pte_cycle2 & cin_in_raw;


// *** FDQ00 Custom ***

assign fdq00_sum[3:0]   = fdd_fdq00_10_sum[4:1];    // s0[65:62]
assign fdq00_carry[3:0] = fdd_fdq00_10_carry[4:1];  // c0[65:62]

assign pr00[0]          = fdq00_sum[0]   | fdq00_carry[0];
assign pr00[3:1]        = fdq00_sum[3:1] + fdq00_carry[3:1];

assign fdc_qsel00[0]    = ( pr00[3] & ~pr00[2]) |                       // 10.0x ; 10.1x
                          ( pr00[3] & ~pr00[1]) |                       // 10.0x ; 11.0x
                          ( pr00[3] & ~pr00[0]);                        // 11.10

assign fdc_qsel00[1]    = ( pr00[3] &  pr00[2] &  pr00[1] &  pr00[0]) | // 11.11
                          (~pr00[3] & ~pr00[2] & ~pr00[1] & ~pr00[0]);  // 00.00


assign fdc_qsel00[2]    = (~pr00[3] &  pr00[2]) |                       // 01.1x ; 01.0x
                          (~pr00[3] &  pr00[1]) |                       // 01.1x ; 00.1x
                          (~pr00[3] &  pr00[0]);                        // 00.01


// *** FDQ1p Custom ***

assign pr1p[0]          = fdd_fdq1p_sum[0]   | fdd_fdq1p_carry[0];
assign pr1p[3:1]        = fdd_fdq1p_sum[3:1] + fdd_fdq1p_carry[3:1];

assign qsel1p[0]        = ( pr1p[3] & ~pr1p[2]) |                       // 10.0x ; 10.1x
                          ( pr1p[3] & ~pr1p[1]) |                       // 10.0x ; 11.0x
                          ( pr1p[3] & ~pr1p[0]);                        // 11.10

assign qsel1p[1]        = ( pr1p[3] &  pr1p[2] &  pr1p[1] &  pr1p[0]) | // 11.11
                          (~pr1p[3] & ~pr1p[2] & ~pr1p[1] & ~pr1p[0]);  // 00.00


assign qsel1p[2]        = (~pr1p[3] &  pr1p[2]) |                       // 01.1x ; 01.0x
                          (~pr1p[3] &  pr1p[1]) |                       // 01.1x ; 00.1x
                          (~pr1p[3] &  pr1p[0]);                        // 00.01


// *** FDQ10 Custom ***

assign fdq10_sum[3:0]   = fdd_fdq00_10_sum[3:0];    // s0[64:61]
assign fdq10_carry[3:0] = fdd_fdq00_10_carry[3:0];  // c0[64:61]

assign pr10[0]          = fdq10_sum[0]   | fdq10_carry[0];
assign pr10[3:1]        = fdq10_sum[3:1] + fdq10_carry[3:1];

assign qsel10[0]        = ( pr10[3] & ~pr10[2]) |                       // 10.0x ; 10.1x
                          ( pr10[3] & ~pr10[1]) |                       // 10.0x ; 11.0x
                          ( pr10[3] & ~pr10[0]);                        // 11.10

assign qsel10[1]        = ( pr10[3] &  pr10[2] &  pr10[1] &  pr10[0]) | // 11.11
                          (~pr10[3] & ~pr10[2] & ~pr10[1] & ~pr10[0]);  // 00.00


assign qsel10[2]        = (~pr10[3] &  pr10[2]) |                       // 01.1x ; 01.0x
                          (~pr10[3] &  pr10[1]) |                       // 01.1x ; 00.1x
                          (~pr10[3] &  pr10[0]);                        // 00.01


// *** FDQ1n Custom ***

assign pr1n[0]          = fdd_fdq1n_sum[0]   | fdd_fdq1n_carry[0];
assign pr1n[3:1]        = fdd_fdq1n_sum[3:1] + fdd_fdq1n_carry[3:1];

assign qsel1n[0]        = ( pr1n[3] & ~pr1n[2]) |                       // 10.0x ; 10.1x
                          ( pr1n[3] & ~pr1n[1]) |                       // 10.0x ; 11.0x
                          ( pr1n[3] & ~pr1n[0]);                        // 11.10

assign qsel1n[1]        = ( pr1n[3] &  pr1n[2] &  pr1n[1] &  pr1n[0]) | // 11.11
                          (~pr1n[3] & ~pr1n[2] & ~pr1n[1] & ~pr1n[0]);  // 00.00


assign qsel1n[2]        = (~pr1n[3] &  pr1n[2]) |                       // 01.1x ; 01.0x
                          (~pr1n[3] &  pr1n[1]) |                       // 01.1x ; 00.1x
                          (~pr1n[3] &  pr1n[0]);                        // 00.01


assign engine_start     = fac_div_valid_fx1 | fdc_pe_cycle3;

assign fdc_qsel1[2:0]   = ({3{~engine_start & fdc_qsel00[0]}} & qsel1p[2:0]) |
                          ({3{~engine_start & fdc_qsel00[1]}} & qsel10[2:0]) |
                          ({3{~engine_start & fdc_qsel00[2]}} & qsel1n[2:0]);


// *** Misc Logic from FDD ***

assign fdc_q_in[1]      = (   ~engine_start & fdc_qsel00[0]   & fdc_bsign_lth ) |
                          (   ~engine_start & fdc_qsel00[2]   & fdc_bsign_lth_);

assign fdc_qm1_in[1]    = (   ~engine_start & fdc_qsel00[0]   & fdc_bsign_lth_) |
                          (   ~engine_start & fdc_qsel00[2]   & fdc_bsign_lth );

assign fdc_q_in[0]      = (                   fdc_qsel1[0]    & fdc_bsign_lth ) |
                          (                   fdc_qsel1[2]    & fdc_bsign_lth_);

assign fdc_qm1_in[0]    = (                   fdc_qsel1[0]    & fdc_bsign_lth_) |
                          (                   fdc_qsel1[2]    & fdc_bsign_lth );


supply0 vss;
supply1 vdd;


// fixscan start:
assign spares_scanin             = scan_in                  ;
assign qdata_lth_scanin          = spares_scanout           ;
assign cntl_lth_scanin           = qdata_lth_scanout        ;
assign ovlf_lth_scanin           = cntl_lth_scanout         ;
assign stall_lth_scanin          = ovlf_lth_scanout         ;
assign data_lth_scanin           = stall_lth_scanout        ;
assign xsht_lth_scanin           = data_lth_scanout         ;
assign hamt_lth_scanin           = xsht_lth_scanout         ;
assign xcntl_lth_scanin          = hamt_lth_scanout         ;
assign xrnd_vld_lth_scanin       = xcntl_lth_scanout        ;
assign xrnd_lth_scanin           = xrnd_vld_lth_scanout     ;
assign spec_sqrt_lth_scanin      = xrnd_lth_scanout         ;
assign scan_out                  = spec_sqrt_lth_scanout    ;
// fixscan end:
endmodule


// any PARAMS parms go into naming of macro

module fgu_fdc_ctl_l1clkhdr_ctl_macro (
  l2clk,
  l1en,
  pce_ov,
  stop,
  se,
  l1clk);


  input l2clk;
  input l1en;
  input pce_ov;
  input stop;
  input se;
  output l1clk;


cl_sc1_l1hdr_8x c_0 (


   .l2clk(l2clk),
   .pce(l1en),
   .l1clk(l1clk),
  .se(se),
  .pce_ov(pce_ov),
  .stop(stop)
);


endmodule


//  Description:        Spare gate macro for control blocks
//
//  Param num controls the number of times the macro is added
//  flops=0 can be used to use only combination spare logic


module fgu_fdc_ctl_spare_ctl_macro__num_3 (
  l1clk,
  scan_in,
  siclk,
  soclk,
  scan_out);
wire si_0;
wire so_0;
wire spare0_flop_unused;
wire spare0_buf_32x_unused;
wire spare0_nand3_8x_unused;
wire spare0_inv_8x_unused;
wire spare0_aoi22_4x_unused;
wire spare0_buf_8x_unused;
wire spare0_oai22_4x_unused;
wire spare0_inv_16x_unused;
wire spare0_nand2_16x_unused;
wire spare0_nor3_4x_unused;
wire spare0_nand2_8x_unused;
wire spare0_buf_16x_unused;
wire spare0_nor2_16x_unused;
wire spare0_inv_32x_unused;
wire si_1;
wire so_1;
wire spare1_flop_unused;
wire spare1_buf_32x_unused;
wire spare1_nand3_8x_unused;
wire spare1_inv_8x_unused;
wire spare1_aoi22_4x_unused;
wire spare1_buf_8x_unused;
wire spare1_oai22_4x_unused;
wire spare1_inv_16x_unused;
wire spare1_nand2_16x_unused;
wire spare1_nor3_4x_unused;
wire spare1_nand2_8x_unused;
wire spare1_buf_16x_unused;
wire spare1_nor2_16x_unused;
wire spare1_inv_32x_unused;
wire si_2;
wire so_2;
wire spare2_flop_unused;
wire spare2_buf_32x_unused;
wire spare2_nand3_8x_unused;
wire spare2_inv_8x_unused;
wire spare2_aoi22_4x_unused;
wire spare2_buf_8x_unused;
wire spare2_oai22_4x_unused;
wire spare2_inv_16x_unused;
wire spare2_nand2_16x_unused;
wire spare2_nor3_4x_unused;
wire spare2_nand2_8x_unused;
wire spare2_buf_16x_unused;
wire spare2_nor2_16x_unused;
wire spare2_inv_32x_unused;


input           l1clk;
input           scan_in;
input           siclk;
input           soclk;
output          scan_out;

cl_sc1_msff_8x spare0_flop (.l1clk(l1clk),
                               .siclk(siclk),
                               .soclk(soclk),
                               .si(si_0),
                               .so(so_0),
                               .d(1'b0),
                               .q(spare0_flop_unused));
assign si_0 = scan_in;

cl_u1_buf_32x   spare0_buf_32x (.in(1'b1),
                                   .out(spare0_buf_32x_unused));
cl_u1_nand3_8x spare0_nand3_8x (.in0(1'b1),
                                   .in1(1'b1),
                                   .in2(1'b1),
                                   .out(spare0_nand3_8x_unused));
cl_u1_inv_8x    spare0_inv_8x (.in(1'b1),
                                  .out(spare0_inv_8x_unused));
cl_u1_aoi22_4x spare0_aoi22_4x (.in00(1'b1),
                                   .in01(1'b1),
                                   .in10(1'b1),
                                   .in11(1'b1),
                                   .out(spare0_aoi22_4x_unused));
cl_u1_buf_8x    spare0_buf_8x (.in(1'b1),
                                  .out(spare0_buf_8x_unused));
cl_u1_oai22_4x spare0_oai22_4x (.in00(1'b1),
                                   .in01(1'b1),
                                   .in10(1'b1),
                                   .in11(1'b1),
                                   .out(spare0_oai22_4x_unused));
cl_u1_inv_16x   spare0_inv_16x (.in(1'b1),
                                   .out(spare0_inv_16x_unused));
cl_u1_nand2_16x spare0_nand2_16x (.in0(1'b1),
                                     .in1(1'b1),
                                     .out(spare0_nand2_16x_unused));
cl_u1_nor3_4x spare0_nor3_4x (.in0(1'b0),
                                 .in1(1'b0),
                                 .in2(1'b0),
                                 .out(spare0_nor3_4x_unused));
cl_u1_nand2_8x spare0_nand2_8x (.in0(1'b1),
                                   .in1(1'b1),
                                   .out(spare0_nand2_8x_unused));
cl_u1_buf_16x   spare0_buf_16x (.in(1'b1),
                                   .out(spare0_buf_16x_unused));
cl_u1_nor2_16x spare0_nor2_16x (.in0(1'b0),
                                   .in1(1'b0),
                                   .out(spare0_nor2_16x_unused));
cl_u1_inv_32x   spare0_inv_32x (.in(1'b1),
                                   .out(spare0_inv_32x_unused));

cl_sc1_msff_8x spare1_flop (.l1clk(l1clk),
                               .siclk(siclk),
                               .soclk(soclk),
                               .si(si_1),
                               .so(so_1),
                               .d(1'b0),
                               .q(spare1_flop_unused));
assign si_1 = so_0;

cl_u1_buf_32x   spare1_buf_32x (.in(1'b1),
                                   .out(spare1_buf_32x_unused));
cl_u1_nand3_8x spare1_nand3_8x (.in0(1'b1),
                                   .in1(1'b1),
                                   .in2(1'b1),
                                   .out(spare1_nand3_8x_unused));
cl_u1_inv_8x    spare1_inv_8x (.in(1'b1),
                                  .out(spare1_inv_8x_unused));
cl_u1_aoi22_4x spare1_aoi22_4x (.in00(1'b1),
                                   .in01(1'b1),
                                   .in10(1'b1),
                                   .in11(1'b1),
                                   .out(spare1_aoi22_4x_unused));
cl_u1_buf_8x    spare1_buf_8x (.in(1'b1),
                                  .out(spare1_buf_8x_unused));
cl_u1_oai22_4x spare1_oai22_4x (.in00(1'b1),
                                   .in01(1'b1),
                                   .in10(1'b1),
                                   .in11(1'b1),
                                   .out(spare1_oai22_4x_unused));
cl_u1_inv_16x   spare1_inv_16x (.in(1'b1),
                                   .out(spare1_inv_16x_unused));
cl_u1_nand2_16x spare1_nand2_16x (.in0(1'b1),
                                     .in1(1'b1),
                                     .out(spare1_nand2_16x_unused));
cl_u1_nor3_4x spare1_nor3_4x (.in0(1'b0),
                                 .in1(1'b0),
                                 .in2(1'b0),
                                 .out(spare1_nor3_4x_unused));
cl_u1_nand2_8x spare1_nand2_8x (.in0(1'b1),
                                   .in1(1'b1),
                                   .out(spare1_nand2_8x_unused));
cl_u1_buf_16x   spare1_buf_16x (.in(1'b1),
                                   .out(spare1_buf_16x_unused));
cl_u1_nor2_16x spare1_nor2_16x (.in0(1'b0),
                                   .in1(1'b0),
                                   .out(spare1_nor2_16x_unused));
cl_u1_inv_32x   spare1_inv_32x (.in(1'b1),
                                   .out(spare1_inv_32x_unused));

cl_sc1_msff_8x spare2_flop (.l1clk(l1clk),
                               .siclk(siclk),
                               .soclk(soclk),
                               .si(si_2),
                               .so(so_2),
                               .d(1'b0),
                               .q(spare2_flop_unused));
assign si_2 = so_1;

cl_u1_buf_32x   spare2_buf_32x (.in(1'b1),
                                   .out(spare2_buf_32x_unused));
cl_u1_nand3_8x spare2_nand3_8x (.in0(1'b1),
                                   .in1(1'b1),
                                   .in2(1'b1),
                                   .out(spare2_nand3_8x_unused));
cl_u1_inv_8x    spare2_inv_8x (.in(1'b1),
                                  .out(spare2_inv_8x_unused));
cl_u1_aoi22_4x spare2_aoi22_4x (.in00(1'b1),
                                   .in01(1'b1),
                                   .in10(1'b1),
                                   .in11(1'b1),
                                   .out(spare2_aoi22_4x_unused));
cl_u1_buf_8x    spare2_buf_8x (.in(1'b1),
                                  .out(spare2_buf_8x_unused));
cl_u1_oai22_4x spare2_oai22_4x (.in00(1'b1),
                                   .in01(1'b1),
                                   .in10(1'b1),
                                   .in11(1'b1),
                                   .out(spare2_oai22_4x_unused));
cl_u1_inv_16x   spare2_inv_16x (.in(1'b1),
                                   .out(spare2_inv_16x_unused));
cl_u1_nand2_16x spare2_nand2_16x (.in0(1'b1),
                                     .in1(1'b1),
                                     .out(spare2_nand2_16x_unused));
cl_u1_nor3_4x spare2_nor3_4x (.in0(1'b0),
                                 .in1(1'b0),
                                 .in2(1'b0),
                                 .out(spare2_nor3_4x_unused));
cl_u1_nand2_8x spare2_nand2_8x (.in0(1'b1),
                                   .in1(1'b1),
                                   .out(spare2_nand2_8x_unused));
cl_u1_buf_16x   spare2_buf_16x (.in(1'b1),
                                   .out(spare2_buf_16x_unused));
cl_u1_nor2_16x spare2_nor2_16x (.in0(1'b0),
                                   .in1(1'b0),
                                   .out(spare2_nor2_16x_unused));
cl_u1_inv_32x   spare2_inv_32x (.in(1'b1),
                                   .out(spare2_inv_32x_unused));
assign scan_out = so_2;


endmodule


// any PARAMS parms go into naming of macro

module fgu_fdc_ctl_msff_ctl_macro__width_8 (
  din,
  l1clk,
  scan_in,
  siclk,
  soclk,
  dout,
  scan_out);
wire [7:0] fdin;
wire [6:0] so;

  input [7:0] din;
  input l1clk;
  input scan_in;


  input siclk;
  input soclk;

  output [7:0] dout;
  output scan_out;
assign fdin[7:0] = din[7:0];


dff #(8)  d0_0 (
.l1clk(l1clk),
.siclk(siclk),
.soclk(soclk),
.d(fdin[7:0]),
.si({scan_in,so[6:0]}),
.so({so[6:0],scan_out}),
.q(dout[7:0])
);


endmodule


// any PARAMS parms go into naming of macro

module fgu_fdc_ctl_msff_ctl_macro__width_10 (
  din,
  l1clk,
  scan_in,
  siclk,
  soclk,
  dout,
  scan_out);
wire [9:0] fdin;
wire [8:0] so;

  input [9:0] din;
  input l1clk;
  input scan_in;


  input siclk;
  input soclk;

  output [9:0] dout;
  output scan_out;
assign fdin[9:0] = din[9:0];


dff #(10)  d0_0 (
.l1clk(l1clk),
.siclk(siclk),
.soclk(soclk),
.d(fdin[9:0]),
.si({scan_in,so[8:0]}),
.so({so[8:0],scan_out}),
.q(dout[9:0])
);


endmodule


// any PARAMS parms go into naming of macro

module fgu_fdc_ctl_msff_ctl_macro__width_4 (
  din,
  l1clk,
  scan_in,
  siclk,
  soclk,
  dout,
  scan_out);
wire [3:0] fdin;
wire [2:0] so;

  input [3:0] din;
  input l1clk;
  input scan_in;


  input siclk;
  input soclk;

  output [3:0] dout;
  output scan_out;
assign fdin[3:0] = din[3:0];


dff #(4)  d0_0 (
.l1clk(l1clk),
.siclk(siclk),
.soclk(soclk),
.d(fdin[3:0]),
.si({scan_in,so[2:0]}),
.so({so[2:0],scan_out}),
.q(dout[3:0])
);


endmodule


// any PARAMS parms go into naming of macro

module fgu_fdc_ctl_msff_ctl_macro__width_9 (
  din,
  l1clk,
  scan_in,
  siclk,
  soclk,
  dout,
  scan_out);
wire [8:0] fdin;
wire [7:0] so;

  input [8:0] din;
  input l1clk;
  input scan_in;


  input siclk;
  input soclk;

  output [8:0] dout;
  output scan_out;
assign fdin[8:0] = din[8:0];


dff #(9)  d0_0 (
.l1clk(l1clk),
.siclk(siclk),
.soclk(soclk),
.d(fdin[8:0]),
.si({scan_in,so[7:0]}),
.so({so[7:0],scan_out}),
.q(dout[8:0])
);


endmodule


// any PARAMS parms go into naming of macro

module fgu_fdc_ctl_msff_ctl_macro__width_6 (
  din,
  l1clk,
  scan_in,
  siclk,
  soclk,
  dout,
  scan_out);
wire [5:0] fdin;
wire [4:0] so;

  input [5:0] din;
  input l1clk;
  input scan_in;


  input siclk;
  input soclk;

  output [5:0] dout;
  output scan_out;
assign fdin[5:0] = din[5:0];


dff #(6)  d0_0 (
.l1clk(l1clk),
.siclk(siclk),
.soclk(soclk),
.d(fdin[5:0]),
.si({scan_in,so[4:0]}),
.so({so[4:0],scan_out}),
.q(dout[5:0])
);


endmodule


// any PARAMS parms go into naming of macro

module fgu_fdc_ctl_msff_ctl_macro__width_1 (
  din,
  l1clk,
  scan_in,
  siclk,
  soclk,
  dout,
  scan_out);
wire [0:0] fdin;

  input [0:0] din;
  input l1clk;
  input scan_in;


  input siclk;
  input soclk;

  output [0:0] dout;
  output scan_out;
assign fdin[0:0] = din[0:0];


dff #(1)  d0_0 (
.l1clk(l1clk),
.siclk(siclk),
.soclk(soclk),
.d(fdin[0:0]),
.si(scan_in),
.so(scan_out),
.q(dout[0:0])
);


endmodule