///////////////////////////////////////////////////////////////////////////////
//                                                                           //
// (c) Copyright OCP-IP 2008
// OCP-IP Confidential and Proprietary
//
//
//============================================================================
//      Project : OCP SLD WG
//       Author : James Aldis, Texas Instruments
//                Robert Guenzel (from TU of Braunschweig) for Greensocs Ltd.
//
//          $Id:
//
//  Description :  Splitter module for the TL1 timing example
//
//                                                                           //
///////////////////////////////////////////////////////////////////////////////


#ifndef __TIMING_SPLITTER_H__
#define __TIMING_SPLITTER_H__


#include "timing_common.h"

class timing_splitter : 
  public sc_core::sc_module
{
  SC_HAS_PROCESS(timing_splitter);
  public:
  // ports
  sc_core::sc_in<bool> clk;
  ocpip::ocp_slave_socket_tl1<> ocps;
  ocpip::ocp_master_socket_tl1<> ocpmA, ocpmB;
  
  ocpip::ocp_extension_pool<local_thread_ID> m_extPool;
  tlm_utils::instance_specific_extension_accessor acc;
  

    timing_splitter(sc_core::sc_module_name name, int nr_threads) 
      : sc_core::sc_module(name)
      , ocps("ocps", this, &timing_splitter::setOCPTL1MasterTiming)
      , ocpmA("ocpmA", this, &timing_splitter::setOCPTL1SlaveTiming)
      , ocpmB("ocpmB", this, &timing_splitter::setOCPTL1SlaveTiming)
      , m_extPool(10) 
      , threads(nr_threads){

      if (nr_threads<2){
        std::cerr<<sc_core::sc_module::name()<<" can only operate correctly with more than one thread"<<std::endl;
        exit(1);
      }

      ocpmA.register_nb_transport_bw(this, &timing_splitter::nb_transport_bwA);
      ocpmB.register_nb_transport_bw(this, &timing_splitter::nb_transport_bwB);
      ocps.register_nb_transport_fw(this, &timing_splitter::nb_transport_fw);

      SC_METHOD(clock_rising);
      sensitive<<clk.pos();
      dont_initialize();

      SC_METHOD(response_arb);
      sensitive<<arbiter_event;
      dont_initialize();

      SC_METHOD(request_split);
      sensitive<<splitter_event;
      dont_initialize();

      typedef tlm::tlm_generic_payload* tlm_generic_payload_ptr;
      req_reg = new tlm_generic_payload_ptr[threads];
      for (int i=0; i<threads; i++) req_reg[i]=NULL;
      reqs_out = new int[2*threads];
      reqs_out_A = reqs_out;
      reqs_out_B = &(reqs_out[threads]);
      for(int i=0; i<2*threads; i++) {
        reqs_out[i] = 0;
      }

      resp_reg = new tlm_generic_payload_ptr[2*threads];
      for (int i=0; i<2*threads; i++) resp_reg[i]=NULL;
      respA_reg = resp_reg;
      respB_reg = &(resp_reg[threads]);

      time_quantum = sc_core::sc_get_time_resolution();
      sthreadbusy_sample_time = time_quantum;  // initial guess
      request_sample_time = time_quantum;  // initial guess
      mthreadbusy_sample_time = time_quantum;  // initial guess
      response_sample_time = time_quantum;  // initial guess
      
      tb_txn_A=ocpmA.get_tb_transaction();
      tb_txn_B=ocpmB.get_tb_transaction();
      tb_txn_s=ocps.get_tb_transaction();
      tb_ph=ocpip::THREAD_BUSY_CHANGE;
      

      ocps.set_slave_timing(calc_s_timing());
      ocpmA.set_master_timing(calc_m_timing());
      ocpmB.set_master_timing(calc_m_timing());
      std::cout << "<<<< E-O-E >>>> " << sc_core::sc_module::name() << std::endl;
      
      ocpip::map_string_type config_map=get_config_map(nr_threads,32);
      ocpip::ocp_parameters  config;
      config.set_ocp_configuration(ocpmA.name(), config_map);
      ocpmA.set_ocp_config(config);
      
      config.set_ocp_configuration(ocpmB.name(), config_map);
      ocpmB.set_ocp_config(config);
      
      config.set_ocp_configuration(ocps.name(), config_map);
      ocps.set_ocp_config(config);

      tb_A=ocpmA.get_extension<ocpip::thread_busy>(*tb_txn_A);
      tb_A->value.type=ocpip::M_THREAD;
      tb_B=ocpmB.get_extension<ocpip::thread_busy>(*tb_txn_B);
      tb_B->value.type=ocpip::M_THREAD;
      tb_s=ocps.get_extension<ocpip::thread_busy>(*tb_txn_s);
      tb_s->value.type=ocpip::S_THREAD;
      
    };

    ~timing_splitter() {
      std::cout << "Deleting splitter:   " << name() << std::endl;

      for(int i=0; i<threads; i++) {
        std::cout << (req_reg[i] ? "R " : "- ");
      }
      std::cout << std::endl;
      for(int i=0; i<threads; i++) {
        std::cout << (respA_reg[i] ? "A " : "- ");
      }
      std::cout << std::endl;
      for(int i=0; i<threads; i++) {
        std::cout << (respB_reg[i] ? "B " : "- ");
      }
      std::cout << std::endl;
      for(int i=0; i<threads; i++) {
        std::cout << reqs_out_A[i] << " ";
      }
      std::cout << std::endl;
      for(int i=0; i<threads; i++) {
        std::cout << reqs_out_B[i] << " ";
      }
      std::cout << std::endl;
      std::cout << "  SThreadBusy sample time: " << sthreadbusy_sample_time << std::endl;
      std::cout << "  MThreadBusy sample time: " << mthreadbusy_sample_time << std::endl;
      std::cout << "  Request sample time:     " << request_sample_time << std::endl;
      std::cout << "  Response sample time:     " << response_sample_time << std::endl;
    }


    // processes
    void clock_rising() {

      // generate MThreadBusy for both master ports, based on
      // register occupation
      int mtbA_out = 0;
      int mtbB_out = 0;
      int mask = 1;
      for(int i=0; i<threads; i++) {
        if(respA_reg[i]) {
          mtbA_out |= mask;
        }
        if(respB_reg[i]) {
          mtbB_out |= mask;
        }
        mask <<= 1;
      }
      tb_A->value.mask=mtbA_out;
      tb_B->value.mask=mtbB_out;
      ocpmA->nb_transport_fw(*tb_txn_A, tb_ph, tb_time);
      ocpmB->nb_transport_fw(*tb_txn_B, tb_ph, tb_time);

      // generate SThreadBusy for slave port, based on request register
      // occupation (we prevent any thread from having more than one
      // target open at a time, to avoid response reordering)
      int stb_out = 0;
      mask =1;
      for(int i=0; i<threads; i++) {
        if(req_reg[i]) {
          stb_out |= mask;
        }
        mask <<= 1;
      }
      tb_s->value.mask=stb_out;
      ocps->nb_transport_bw(*tb_txn_s, tb_ph, tb_time);

      // notify events for the other methods
      sc_core::sc_time split_time = my_max(request_sample_time, sthreadbusy_sample_time);
      // note that arbitration of responses must occur later than splitting of
      // reqests, in order that new responses do not distort the number of
      // requests outstanding and therefore change the arbitration result - also
      // potentially creating a combinatorial loop
      sc_core::sc_time arb_time = my_max(split_time + time_quantum,
                   my_max(response_sample_time, mthreadbusy_sample_time));
      arbiter_event.notify(arb_time);
      splitter_event.notify(split_time);
    }

    void response_arb() {
      // sample MThreadBusy and replicate it to make the loop easy
      int l_mtb =  (mtb & ((1 << threads) - 1)) | (mtb << threads);
      int mask = 1;
      for(int i=0; i<(2*threads); i++) {
        if((resp_reg[i]) && !(mask & l_mtb)) {
          // grant this one

          local_thread_ID* threadID;
          acc(*resp_reg[i]).get_extension(threadID);
          assert(threadID);
          
          acc(*resp_reg[i]).clear_extension(threadID);
          m_extPool.recycle(threadID);
          
          txn_ph=tlm::BEGIN_RESP;
          txn_time=sc_core::SC_ZERO_TIME;
          switch (ocps->nb_transport_bw(*resp_reg[i], txn_ph, txn_time)){
            case tlm::TLM_UPDATED:
              assert(txn_ph==tlm::END_RESP);
            case tlm::TLM_COMPLETED:
              break;
            default:
              std::cerr<<"I expect to get updated or completed when using thread busy"<<std::endl;
              exit(1);
          }
          
          resp_reg[i]=NULL;
          reqs_out[i]--;
          break;
        }
        mask <<= 1;
      }
    }

    void request_split() {
      // choose a request from one thread (lowest first, ignoring those
      // for which sthreadbusy is high or where the thread has requests
      // outstanding on the other port) and send it to the appropriate
      // slave
      for(int i=0; i<threads; i++) {
        if(req_reg[i]) {
          // routing on bit[30]
          bool dest_is_A = (req_reg[i]->get_address() & 0x40000000);
          int *otherport_out = (dest_is_A ? reqs_out_B : reqs_out_A);
          int stb = (dest_is_A ? stbA : stbB);
          if((otherport_out[i] == 0) && !(stb & 1)) {
            // can send request.
            // rescale address to get balanced system-address map
            req_reg[i]->set_address(req_reg[i]->get_address() << 1);
            ocpip::ocp_master_socket_tl1<> *dest = (dest_is_A ? &ocpmA : &ocpmB);
            
            txn_ph=tlm::BEGIN_REQ;
            txn_time=sc_core::SC_ZERO_TIME;
            tlm::tlm_sync_enum retVal=(*dest)->nb_transport_fw(*req_reg[i], txn_ph, txn_time);
            local_thread_ID* threadID;
            acc(*req_reg[i]).get_extension(threadID);
            assert(threadID);
            ocpip::thread_busy* tb=(dest_is_A ? tb_A : tb_B);
            
            switch (retVal){
              case tlm::TLM_UPDATED:
                if (txn_ph==tlm::END_REQ); //great! just an immediate accept!
                else{
                  std::cerr<<"Unexpected phase "<<txn_ph<<" on return path. In "<<name()<<" at "<<sc_core::sc_time_stamp()<<std::endl;
                  exit(1);
                }
                break;
              case tlm::TLM_COMPLETED:
                break;              
              default:
                std::cerr<<"When using thread busy exact, I expect the use of the return path. In "<<name()<<" at "<<sc_core::sc_time_stamp()<<std::endl;
                exit(1);
            }

            
            req_reg[i]=NULL;
            int *thisport_out = (dest_is_A ? reqs_out_A : reqs_out_B);
            thisport_out[i]++;
            break;
          }
        }
        stbA >>= 1;
        stbB >>= 1;
      }
    }

    // when informed of master port timing, merger must re-inform the OCP
    // channels if anything changed
    void setOCPTL1SlaveTiming(ocpip::ocp_tl1_slave_timing slave_timing) {
      std::cout << "  << S-S-T >>   " << name() << std::endl;

      // increase in response group input time must be recorded
      // and may have caused an increase in response group output time
      // on slave port, which must be reported
      if(slave_timing.ResponseGrpStartTime + time_quantum
                > response_sample_time) {
        // calculate current response output time
        sc_core::sc_time old_resp_t_out = calc_s_timing().ResponseGrpStartTime;
        // store new response sample time
        response_sample_time = slave_timing.ResponseGrpStartTime + time_quantum;
        // calculate new response output time
        sc_core::sc_time new_resp_t_out = calc_s_timing().ResponseGrpStartTime;
        if(new_resp_t_out > old_resp_t_out) {
          ocps.set_slave_timing(calc_s_timing());
        }
      }

      // increase in sthreadbusy input time must be recorded
      // may have caused an increase in request group output time
      // on master ports, which must be reported
      if(slave_timing.SThreadBusyStartTime + time_quantum
                > sthreadbusy_sample_time) {
        // calculate current request output time
        sc_core::sc_time old_req_t_out = calc_m_timing().RequestGrpStartTime;
        // store new threadbusy sample time
        sthreadbusy_sample_time = slave_timing.SThreadBusyStartTime + time_quantum;
        // calculate new request output time
        sc_core::sc_time new_req_t_out = calc_m_timing().RequestGrpStartTime;
        if(new_req_t_out > old_req_t_out) {
          ocpmA.set_master_timing(calc_m_timing());
          ocpmB.set_master_timing(calc_m_timing());
        }
      }
    }

    // when informed of slave port timing, merger must re-inform the OCP
    // channels if anything changed
    void setOCPTL1MasterTiming(ocpip::ocp_tl1_master_timing master_timing) {
      std::cout << "  << S-M-T >>   " << name() << std::endl;

      // increase in request group input time must be recorded
      // may have caused an increase in request group output time on
      // master ports, which must be reported
      if(master_timing.RequestGrpStartTime + time_quantum
                > request_sample_time) {
        // calculate current request output time
        sc_core::sc_time old_req_t_out = calc_m_timing().RequestGrpStartTime;
        // store new request sample time
        request_sample_time = master_timing.RequestGrpStartTime + time_quantum;
        // calculate new request output time
        sc_core::sc_time new_req_t_out = calc_m_timing().RequestGrpStartTime;
        if(new_req_t_out > old_req_t_out) {
          ocpmA.set_master_timing(calc_m_timing());
          ocpmB.set_master_timing(calc_m_timing());
        }
      }

      // increase in mthreadbusy input time must be recorded
      // may have caused an increase in response group output time
      // on slave port, which must be reported
      if(master_timing.MThreadBusyStartTime + time_quantum
                > mthreadbusy_sample_time) {
        // calculate current response output time
        sc_core::sc_time old_resp_t_out = calc_s_timing().ResponseGrpStartTime;
        // store new threadbusy sample time
        mthreadbusy_sample_time = master_timing.MThreadBusyStartTime + time_quantum;
        // calculate new response output time
        sc_core::sc_time new_resp_t_out = calc_s_timing().ResponseGrpStartTime;
        if(new_resp_t_out > old_resp_t_out) {
          ocps.set_slave_timing(calc_s_timing());
        }
      }
    }

    tlm::tlm_sync_enum nb_transport_bwA(tlm::tlm_generic_payload& gp, tlm::tlm_phase& ph, sc_core::sc_time& tim){
      if (ph==ocpip::THREAD_BUSY_CHANGE){
        assert(ocpmA.get_extension<ocpip::thread_busy>(gp)->value.type==ocpip::S_THREAD);
        stbA=ocpmA.get_extension<ocpip::thread_busy>(gp)->value.mask;
        return tlm::TLM_ACCEPTED;
      }
    
      if (ph!=tlm::BEGIN_RESP){
        std::cerr<<"I only expect BEGIN_RES on the backward path, but got "<<ph<<" In "<<name()<<" at "<<sc_core::sc_time_stamp()<<std::endl;
        exit(1);
      }
      
      local_thread_ID* threadID;
      acc(gp).get_extension(threadID);
      assert(threadID);
      
      assert(!(((tb_A->value.mask)>>(threadID->local_id))&1)); //make sure our thread is not busy
      
      unsigned int* tmp_data=(unsigned int*)gp.get_data_ptr();
      (*tmp_data)>>=1;
      
      
      respA_reg[threadID->local_id]=&gp;
      ph=tlm::END_RESP;
      return tlm::TLM_UPDATED;
    }

    tlm::tlm_sync_enum nb_transport_bwB(tlm::tlm_generic_payload& gp, tlm::tlm_phase& ph, sc_core::sc_time& tim){
      if (ph==ocpip::THREAD_BUSY_CHANGE){
        assert(ocpmB.get_extension<ocpip::thread_busy>(gp)->value.type==ocpip::S_THREAD);
        stbB=ocpmB.get_extension<ocpip::thread_busy>(gp)->value.mask;
        return tlm::TLM_ACCEPTED;
      }
    
      if (ph!=tlm::BEGIN_RESP){
        std::cerr<<"I only expect BEGIN_RES on the backward path, but got "<<ph<<" In "<<name()<<" at "<<sc_core::sc_time_stamp()<<std::endl;
        exit(1);
      }
      local_thread_ID* threadID;
      acc(gp).get_extension(threadID);
      assert(threadID);
      
      assert(!(((tb_B->value.mask)>>(threadID->local_id))&1)); //make sure our thread is not busy
      
      unsigned int* tmp_data=(unsigned int*)gp.get_data_ptr();
      (*tmp_data)>>=1;
      
      respB_reg[threadID->local_id]=&gp;
      ph=tlm::END_RESP;
      return tlm::TLM_UPDATED;
    }

    tlm::tlm_sync_enum nb_transport_fw(tlm::tlm_generic_payload& gp, tlm::tlm_phase& ph, sc_core::sc_time& tim){
      if (ph==ocpip::THREAD_BUSY_CHANGE){
        assert(ocps.get_extension<ocpip::thread_busy>(gp)->value.type==ocpip::M_THREAD);
        mtb=ocps.get_extension<ocpip::thread_busy>(gp)->value.mask;
        return tlm::TLM_ACCEPTED;
      }
    
      if (ph!=tlm::BEGIN_REQ){
        std::cerr<<"I only expect BEGIN_REQ on the forward path, but got "<<ph<<" In "<<name()<<" at "<<sc_core::sc_time_stamp()<<std::endl;
        exit(1);
      }
      
      ocpip::thread_id* tmp;
      bool thread_id_available=ocps.get_extension<ocpip::thread_id>(tmp, gp);
      assert(thread_id_available); //there has to be a valid thread ID

      
      local_thread_ID* threadID;
      acc(gp).get_extension(threadID);
      assert(!threadID); //make sure we did not see this thing before
      threadID=m_extPool.create();
      threadID->local_id=tmp->value;
      acc(gp).set_extension(threadID);
      req_reg[tmp->value]=&gp;
      
      assert(!(((tb_s->value.mask)>>(tmp->value))&1)); //make sure the thread is not busy
      ph=tlm::END_REQ;
      return tlm::TLM_UPDATED;
    }

  private:
    sc_core::sc_time time_quantum;
    sc_core::sc_time sthreadbusy_sample_time;
    sc_core::sc_time request_sample_time;
    sc_core::sc_time mthreadbusy_sample_time;
    sc_core::sc_time response_sample_time;

    // helper functions: only request and response groups are non-default
    // timing.
    // note that the timing on the two OCP master ports is the same in this
    // module
    ocpip::ocp_tl1_master_timing calc_m_timing() {
      ocpip::ocp_tl1_master_timing to;
      to.RequestGrpStartTime =
                my_max(sthreadbusy_sample_time, request_sample_time);
      return(to);
    }
    ocpip::ocp_tl1_slave_timing  calc_s_timing() {
      ocpip::ocp_tl1_slave_timing to;
      to.ResponseGrpStartTime =
                my_max(mthreadbusy_sample_time, response_sample_time);
      return(to);
    }

    int threads;
    sc_core::sc_event arbiter_event, splitter_event;
    int *reqs_out, *reqs_out_A, *reqs_out_B;
    tlm::tlm_generic_payload *tb_txn_A, *tb_txn_B, *tb_txn_s, **resp_reg, **respA_reg, **respB_reg, **req_reg;
    tlm::tlm_phase tb_ph, txn_ph;
    ocpip::thread_busy *tb_A, *tb_B, *tb_s;
    sc_core::sc_time tb_time, txn_time;
    int stbA, stbB, mtb;
};


// end of multiple inclusion protection
#endif

