diff --git a/verilog/Makefile b/verilog/Makefile
new file mode 100644
index 0000000..6a5ed9c
--- /dev/null
+++ b/verilog/Makefile
@@ -0,0 +1,34 @@
+
+
+.PHONY: compile sim clean test
+.SILENT: compile sim clean test
+
+compile:
+	iverilog -g2005-sv -o sim -c modules.txt -s rr_scheduling_kernel_tb
+
+sim:
+	echo "finish" | vvp sim -lxt2
+	/Applications/gtkwave.app/Contents/Resources/bin/gtkwave test.vcd
+
+test:
+	iverilog -g2005-sv -o test_sim -c modules.txt -s memory_tb
+	vvp test_sim -lxt2
+	rm test_sim
+	
+	iverilog -g2005-sv -o test_sim -c modules.txt -s test_pivots_rr_scheduling_kernel_tb
+	vvp test_sim -lxt2
+	rm test_sim
+	
+	iverilog -g2005-sv -o test_sim -c modules.txt -s test_request_selection_rr_scheduling_kernel_tb
+	vvp test_sim -lxt2
+	rm test_sim
+	
+	iverilog -g2005-sv -o test_sim -c modules.txt -s test_request_mux_tb
+	vvp test_sim -lxt2
+	rm test_sim
+
+clean:
+	rm -f sim
+	rm -f test.vcd
+	rm -f memory_sim
+	rm -f memory_tb.vcd
diff --git a/verilog/README.md b/verilog/README.md
new file mode 100644
index 0000000..0f1e8d1
--- /dev/null
+++ b/verilog/README.md
@@ -0,0 +1,46 @@
+# Optimized Arbiter Verilog Implementation
+
+## Quickstart
+
+### Install Icarus Verilog
+For MacOS:
+```bash
+brew install icarus-verilog
+```
+
+### Install gtkwave
+
+For MacOS:
+```bash
+
+# Install gtkwave
+brew tap homebrew/cask
+brew cask install gtkwave
+
+# Perl Switch
+cpan install Switch
+perl -V:'installsitelib'
+sudo cp /usr/local/Cellar/perl/5.*/lib/perl5/site_perl/5.*/Switch.pm /Library/Perl/5.*/
+```
+
+### Run Testbench
+
+Run unit tests:
+```bash
+make test
+```
+
+Compile SystemVerilog:
+```bash
+iverilog -g2005-sv -o sim -c modules.txt -s <module_name>
+```
+
+Run the simulation:
+```bash
+vvp sim -lxt2
+```
+
+See the waveforms:
+```bash
+gtkwave test.vcd
+```
diff --git a/verilog/modules.txt b/verilog/modules.txt
new file mode 100644
index 0000000..55a15de
--- /dev/null
+++ b/verilog/modules.txt
@@ -0,0 +1,13 @@
+utils/utils.sv
+tb/memory_tb.sv
+tb/test_request_mux_tb.sv
+tb/rr_scheduling_kernel_tb.sv
+tb/test_request_selection_rr_scheduling_kernel_tb.sv
+tb/test_pivots_rr_scheduling_kernel_tb.sv
+modules/memory.sv
+modules/request_mux.sv
+modules/response_mux.sv
+modules/rr_scheduling_kernel.sv
+
+
+//
\ No newline at end of file
diff --git a/verilog/modules/memory.sv b/verilog/modules/memory.sv
new file mode 100644
index 0000000..c20f7f6
--- /dev/null
+++ b/verilog/modules/memory.sv
@@ -0,0 +1,39 @@
+module memory #(
+    parameter DATA = 8,
+    parameter ADDR = 4
+) (
+    input   wire                clk,
+    input   wire                rst_n,
+    
+    // Port A
+    input   wire                a_wr,
+    input   wire    [ADDR-1:0]  a_addr,
+    input   wire    [DATA-1:0]  a_din,
+    output  reg     [DATA-1:0]  a_dout,
+     
+    input   wire                b_wr,
+    input   wire    [ADDR-1:0]  b_addr,
+    input   wire    [DATA-1:0]  b_din,
+    output  reg     [DATA-1:0]  b_dout
+);
+ 
+    // Shared memory
+    reg [DATA-1:0] mem [(2**ADDR)-1:0];
+    
+    always @(posedge clk) begin
+        a_dout      <= mem[a_addr];
+        b_dout      <= mem[b_addr];
+        if(a_wr) begin
+            a_dout      <= a_din;
+            mem[a_addr] <= a_din;
+        end
+        if(b_wr) begin
+            b_dout      <= b_din;
+            mem[b_addr] <= b_din;
+        end
+        if (b_wr == 1 && a_wr == 1 && a_addr == b_addr) begin
+            $display("conflicting write on addr: %h", a_addr);
+            $finish;
+        end
+    end
+endmodule
\ No newline at end of file
diff --git a/verilog/modules/request_mux.sv b/verilog/modules/request_mux.sv
new file mode 100644
index 0000000..3161699
--- /dev/null
+++ b/verilog/modules/request_mux.sv
@@ -0,0 +1,17 @@
+
+module request_mux #(
+  parameter REQ_WIDTH = 10,
+  parameter REQ_NUMBER = 16)
+  (
+  input [REQ_WIDTH-1:0] requests [REQ_NUMBER],
+  input [$clog2(REQ_NUMBER)-1: 0] select,
+  output [REQ_WIDTH-1:0] selected_request
+);
+  /* 
+      I know muxes are tricky for FPGAs, so factored this out.
+      https://www.doulos.com/knowhow/fpga/multiplexer-variations/
+      
+   */
+
+  assign selected_request = requests[select];
+endmodule
\ No newline at end of file
diff --git a/verilog/modules/response_mux.sv b/verilog/modules/response_mux.sv
new file mode 100644
index 0000000..05a4e56
--- /dev/null
+++ b/verilog/modules/response_mux.sv
@@ -0,0 +1,35 @@
+module response_mux #(
+  parameter RES_WIDTH = 20,
+  parameter NKERNELS = 4,
+  parameter NCONSUMERS = 8,
+  parameter C_ID = 0)
+  (
+  input [RES_WIDTH-1:0] augumented_plm_outputs [NKERNELS],
+  input [$clog2(NCONSUMERS)-1:0] response_pivots [NKERNELS],
+  output reg [RES_WIDTH-1:0] selected_response);
+
+  /* 
+    Produce a 1-hot where the asserted bit represents the k_id of the port where the respone is for this C_ID.
+  */
+
+  wire [NKERNELS-1:0] one_hot_selector; 
+  genvar k_id;
+  generate
+      for(k_id = 0; k_id < NKERNELS; k_id = k_id + 1) begin
+          /* A request can only be handled by one port, so RHS will only be 1 for a single k_id if any */
+          assign one_hot_selector[k_id] = (response_pivots[k_id] == C_ID);         
+      end
+  endgenerate
+
+  /*
+    One-hot mux
+    https://stackoverflow.com/questions/19875899/how-to-define-a-parameterized-multiplexer-using-systemverilog
+  */
+  always @(*) begin
+      selected_response = 0;
+      for(int i = 0; i < NKERNELS; i++) begin
+          if (one_hot_selector == (1 << i))
+            selected_response = augumented_plm_outputs[i];
+      end
+  end
+endmodule
\ No newline at end of file
diff --git a/verilog/modules/rr_scheduling_kernel.sv b/verilog/modules/rr_scheduling_kernel.sv
new file mode 100644
index 0000000..e07a25f
--- /dev/null
+++ b/verilog/modules/rr_scheduling_kernel.sv
@@ -0,0 +1,131 @@
+
+module rr_scheduling_kernel #(
+  parameter ADDR_WIDTH = 4,
+  parameter VALUE_WIDTH = 8,
+  parameter NCONSUMERS = 2,
+  parameter NBANKS = 1,
+  parameter NPORTS = 1)
+  (
+  input [REQ_WIDTH-1:0] requests [NCONSUMERS], 
+  output [PLM_INPUT_WIDTH-1:0] plm_inputs [NKERNELS], 
+  input [PLM_OUTPUT_WIDTH-1:0] plm_outputs [NKERNELS], 
+  output reg [PLM_INPUT_WIDTH-1:0] out [NKERNELS],
+  output [RES_WIDTH-1:0] responses [NCONSUMERS], 
+
+  input clk, 
+  input reset);
+
+  localparam REQ_WIDTH = ADDR_WIDTH + VALUE_WIDTH + 1 + 1; /* addr, value, wr, valid */
+  localparam RES_WIDTH = PLM_OUTPUT_WIDTH + 1; /* value, valid */
+  localparam NUM_BANK_BITS = $clog2(NBANKS);
+  localparam PLM_INPUT_WIDTH = (ADDR_WIDTH - NUM_BANK_BITS) + VALUE_WIDTH + 1;
+  localparam PLM_OUTPUT_WIDTH = VALUE_WIDTH;
+
+  initial begin
+    `assert_true((NPORTS == 1 || NPORTS == 2), "unspuported number of PLM ports")
+  end
+
+  // NOTE: Must be > 1. Otherwise barfs up "failed assertion prts[0]->unpacked_dimensions()==0"
+  localparam NKERNELS = NBANKS * NPORTS;
+
+  localparam PIVOT_DIFF = NCONSUMERS / NPORTS;
+
+  /* Registers that remember the RR pivots. 
+     Structure:
+          bank_0_port_0
+          bank_0_port_1
+              ...
+          bank_1_port_0
+          bank_1_port_1
+  */
+  reg [$clog2(NCONSUMERS)-1:0] rr_pivots [NKERNELS];
+  reg rr_response_valid_bits [NKERNELS];
+  
+  reg [$clog2(NCONSUMERS)-1:0] rr_response_pivots [NKERNELS];
+  
+  wire [PLM_OUTPUT_WIDTH-1+1:0] augumented_plm_outputs[NKERNELS];
+  
+  /* rr_pivots control */
+  genvar g_port_i;
+  genvar g_bank_i;
+  generate
+
+    /* There is a rr_pivot register for each port of each bank */
+		for (g_bank_i = 0; g_bank_i < NBANKS; g_bank_i = g_bank_i + 1) begin
+      for (g_port_i = 0; g_port_i < NPORTS; g_port_i = g_port_i + 1) begin
+        localparam K_ID = g_bank_i * NPORTS + g_port_i; /* ID of the scheduling kernel */
+
+        /****************************** REQUEST ROUTING ******************************/
+
+        /* Determine validity of the candidate that the pivot is pointing to */
+        wire [REQ_WIDTH-1:0] lead_candidate; /* Bits of the request that pivot is pointing to */
+        wire [$clog2(NCONSUMERS)-1:0] sel_candidate; /* Candidate select */
+        wire [NUM_BANK_BITS-1:0] bank_address; /* Address bit of the said candidate */
+        wire is_candidate_addr_in_range; /* Is the address in range of the bank? */
+        wire is_valid_bit; /* Is the candidate valid bit set? */
+        wire is_eligible_request; /* Is this a request eligible for scheduling? */
+
+        assign sel_candidate = rr_pivots[K_ID];
+        request_mux #( .REQ_WIDTH(REQ_WIDTH), .REQ_NUMBER(NCONSUMERS)) req_mux (
+          .requests(requests), .select(sel_candidate), .selected_request(lead_candidate));
+
+        assign bank_address = lead_candidate[REQ_WIDTH-1:REQ_WIDTH-NUM_BANK_BITS];
+
+        assign is_candidate_addr_in_range = (bank_address == g_bank_i);
+        assign is_valid_bit = lead_candidate[0];
+        assign is_eligible_request = is_candidate_addr_in_range && is_valid_bit;
+
+        assign plm_inputs[K_ID] = (is_eligible_request) ? 
+          lead_candidate[REQ_WIDTH-1:1] /* not including request valid bit */
+          : 0; /* All 0s is wr=0, this its a resource read */
+        
+        /****************************** PLM OUTPUT AUGUMENTATION ******************************/
+
+        /* Need to augument the outputs with the validity bits */
+        assign augumented_plm_outputs[K_ID] = {plm_outputs[K_ID], rr_response_valid_bits[K_ID]};
+
+        /****************************** PIVOT UPDATE ******************************/
+        always @(posedge clk or posedge reset) begin
+          
+          /* Progress the pivot up, rely on wrapping */
+          rr_pivots[K_ID] <= rr_pivots[K_ID] + 1;
+
+          /* Tell the response logic for what consumer is the request being served */
+          rr_response_valid_bits[K_ID] <= is_eligible_request;
+          rr_response_pivots[K_ID] <= rr_pivots[K_ID];
+
+          if (reset) begin
+            /* Maximize the spread of pivots */
+            rr_pivots[K_ID] <= g_bank_i + g_port_i * PIVOT_DIFF;
+            rr_response_valid_bits[K_ID] <= 1'b0;
+          end
+        end
+      end
+    end
+  endgenerate
+
+  /****************************** RESPONSE ROUTING ******************************/
+
+  genvar consumer_id;
+  generate
+    for(consumer_id = 0; consumer_id < NCONSUMERS; consumer_id = consumer_id + 1) begin
+
+      wire [RES_WIDTH-1:0] selected_response;
+      
+      response_mux #(
+        .RES_WIDTH(RES_WIDTH),
+        .NKERNELS(NKERNELS),
+        .NCONSUMERS(NCONSUMERS),
+        .C_ID(consumer_id)
+      ) res_mux (
+        .augumented_plm_outputs(augumented_plm_outputs),
+        .response_pivots(rr_response_pivots),
+        .selected_response(selected_response)
+      );
+
+      assign responses[consumer_id] = selected_response;
+
+    end
+  endgenerate
+
+endmodule
\ No newline at end of file
diff --git a/verilog/tb/memory_tb.sv b/verilog/tb/memory_tb.sv
new file mode 100644
index 0000000..c756c78
--- /dev/null
+++ b/verilog/tb/memory_tb.sv
@@ -0,0 +1,103 @@
+
+`default_nettype none
+
+module memory_tb;
+reg clk;
+reg rst_n;
+
+localparam ADDR = 4;
+localparam DATA = 8;
+
+reg t_a_wr;
+reg [ADDR-1:0] t_a_addr;
+reg [DATA-1:0] t_a_din;
+wire [DATA-1:0] t_a_dout;
+
+reg t_b_wr;
+reg [ADDR-1:0] t_b_addr;
+reg [DATA-1:0] t_b_din;
+wire [DATA-1:0] t_b_dout;
+
+memory #(.ADDR(ADDR), .DATA(DATA)) dut
+(
+    .rst_n (rst_n),
+    .clk (clk),
+    .a_wr(t_a_wr),
+    .a_addr(t_a_addr),
+    .a_din(t_a_din),
+    .a_dout(t_a_dout),
+    .b_wr(t_b_wr),
+    .b_addr(t_b_addr),
+    .b_din(t_b_din),
+    .b_dout(t_b_dout)
+);
+
+localparam CLK_PERIOD = 10;
+always #(CLK_PERIOD/2) clk=~clk;
+
+// initial begin
+//     $dumpfile("memory_tb.vcd");
+//     $dumpvars(0, memory_tb);
+// end
+
+
+initial begin
+    #1 rst_n<=1'bx;clk<=1'bx;
+    #(CLK_PERIOD*3) rst_n<=1;
+    #(CLK_PERIOD*3) rst_n<=0;clk<=0;
+    repeat(5) @(posedge clk);
+    rst_n<=1;
+    
+    $write("================== TEST memory_tb ==================\n");
+    $write("TEST: basic write/read .... ");
+    @(posedge clk); 
+
+    t_a_wr <= 1; t_a_addr <= 3; t_a_din <= 234;
+    @(posedge clk); 
+
+    t_a_wr <= 1; t_a_addr <= 4; t_a_din <= 222;
+    @(posedge clk); 
+
+    t_a_wr <= 0; t_a_addr <= 3;
+
+    @(posedge clk);
+    @(posedge clk); 
+    
+    `assert(t_a_dout, 234)
+    $write("PASS\n");
+
+    $write("TEST: dual-port legal write/read .... ");
+    @(posedge clk);
+    t_a_wr <= 1; t_a_addr <= 3; t_a_din <= 234;
+    t_b_wr <= 1; t_b_addr <= 15; t_b_din <= 255;
+    @(posedge clk);
+    t_a_wr <= 0; t_a_addr <= 15;
+    t_b_wr <= 0; t_b_addr <= 3;
+
+    @(posedge clk);
+    @(posedge clk);
+    `assert(t_a_dout, 255)
+    `assert(t_b_dout, 234)
+    $write("PASS\n");
+    
+    $write("TEST: dual-port conflicting read .... ");
+    @(posedge clk);
+    t_a_wr <= 1; t_a_addr <= 0; t_a_din <= 1;
+
+    @(posedge clk);
+    t_a_wr <= 0; t_a_addr <= 0;
+    t_b_wr <= 0; t_b_addr <= 0;
+
+    @(posedge clk);
+    @(posedge clk);
+    `assert(t_a_dout, 1)
+    `assert(t_b_dout, 1)
+    $write("PASS\n");
+    
+    $write("\n");
+    repeat(2) @(posedge clk);
+    $finish(2);
+end
+
+endmodule
+`default_nettype wire
\ No newline at end of file
diff --git a/verilog/tb/rr_scheduling_kernel_tb.sv b/verilog/tb/rr_scheduling_kernel_tb.sv
new file mode 100644
index 0000000..682d0b2
--- /dev/null
+++ b/verilog/tb/rr_scheduling_kernel_tb.sv
@@ -0,0 +1,80 @@
+module rr_scheduling_kernel_tb;
+
+  /* Make a reset that pulses once. */
+  reg reset = 0;
+  /* Make a regular pulsing clock. */
+  reg clk = 0;
+  reg hello = 0;
+  always #5 clk = !clk;
+
+  parameter COUNTER_WIDTH = 8;
+  parameter ADDR_WIDTH = 4;
+  parameter VALUE_WIDTH = 8;
+  parameter NCONSUMERS = 8;
+  parameter NBANKS = 4;
+  parameter NPORTS = 2;
+
+  localparam REQ_WIDTH = ADDR_WIDTH + VALUE_WIDTH + 1 + 1;
+  localparam PLM_INPUT_WIDTH = (ADDR_WIDTH >> $clog2(NBANKS)) + VALUE_WIDTH + 1;
+  localparam NKERNELS = NBANKS * NPORTS;
+
+
+  reg [REQ_WIDTH - 1:0] requests [NCONSUMERS];
+  wire [PLM_INPUT_WIDTH - 1:0] value [NKERNELS];
+  
+  integer j = 0;
+  initial begin
+    for (j =0; j < NKERNELS; j++) begin
+      requests[j]= 0;
+    end
+  end
+
+  rr_scheduling_kernel #(
+    .ADDR_WIDTH(ADDR_WIDTH),
+    .VALUE_WIDTH(VALUE_WIDTH),
+    .NCONSUMERS(NCONSUMERS),
+    .NBANKS(NBANKS),
+    .NPORTS(NPORTS)
+  )
+  c1 (
+    .out(value),
+    .requests(requests),
+    .clk(clk),
+    .reset(reset));
+
+  initial begin
+    $monitor("At time %t, value = %h (%0d)", $time, value[0], value[0]);
+  end
+
+  integer i = 0;
+  initial begin
+    $dumpfile("test.vcd");
+    $dumpvars;
+    // for (i = 0; i < NKERNELS; i++) begin
+    //   $dumpvars(0, value[i]);
+    //   $dumpvars(0, c1.rr_pivots[i]);
+    // end
+    
+    for (i = 0; i < NKERNELS; i++) begin
+      $dumpvars(0, c1.rr_pivots[i]);
+    end
+  end
+  
+  initial begin
+    
+    # 5
+    reset = 1;
+    # 15
+    reset = 0;
+
+    # 5
+    // `assert(value[0], 0)
+    # 5
+    // `assert(value[0], 1)
+    $display("%h", value[0]);
+    # 5
+
+    # 100 $finish;
+  
+  end
+endmodule // test
\ No newline at end of file
diff --git a/verilog/tb/test_pivots_rr_scheduling_kernel_tb.sv b/verilog/tb/test_pivots_rr_scheduling_kernel_tb.sv
new file mode 100644
index 0000000..c6d2395
--- /dev/null
+++ b/verilog/tb/test_pivots_rr_scheduling_kernel_tb.sv
@@ -0,0 +1,88 @@
+
+
+`default_nettype none
+
+module test_pivots_rr_scheduling_kernel_tb;
+    
+    parameter NCONSUMERS = 8;
+    parameter NBANKS = 4;
+    parameter NPORTS = 2;
+    localparam NKERNELS = NBANKS * NPORTS;
+
+    reg clk;
+    reg rst;
+
+    localparam CLK_PERIOD = 10;
+    initial begin
+        clk = 1'b0;
+        rst = 1'b1;
+        repeat(4) #(CLK_PERIOD/2) clk = ~clk;
+        rst = 1'b0;
+        forever #(CLK_PERIOD/2) clk = ~clk; // generate a clock
+    end
+
+    rr_scheduling_kernel #(
+        .NCONSUMERS(NCONSUMERS),
+        .NBANKS(NBANKS),
+        .NPORTS(NPORTS)
+    ) rsk (
+        .clk(clk),
+        .reset(rst));
+
+    initial begin
+        @(negedge rst); // wait for reset
+
+        $write("================== TEST pivots ==================\n");
+        $write("TEST: reset initializes pivots correctly .... ");
+        @(posedge clk);
+        rst <= 1;
+        @(posedge clk);
+
+        @(posedge clk);
+        `assert(rsk.rr_pivots[0], 0);
+        `assert(rsk.rr_pivots[1], 4);
+        `assert(rsk.rr_pivots[2], 1);
+        `assert(rsk.rr_pivots[3], 5);
+        `assert(rsk.rr_pivots[4], 2);
+        `assert(rsk.rr_pivots[5], 6);
+        `assert(rsk.rr_pivots[6], 3);
+        `assert(rsk.rr_pivots[7], 7);
+        $write("PASS\n");
+        
+        $write("TEST: pivots get incremented correctly .... ");
+        @(posedge clk);
+        rst <= 0;
+        @(posedge clk);
+        @(posedge clk);
+        `assert(rsk.rr_pivots[0], 1);
+        `assert(rsk.rr_pivots[1], 5);
+        `assert(rsk.rr_pivots[2], 2);
+        `assert(rsk.rr_pivots[3], 6);
+        `assert(rsk.rr_pivots[4], 3);
+        `assert(rsk.rr_pivots[5], 7);
+        `assert(rsk.rr_pivots[6], 4);
+        `assert(rsk.rr_pivots[7], 0);
+        
+        @(posedge clk);
+        `assert(rsk.rr_pivots[0], 2);
+        `assert(rsk.rr_pivots[1], 6);
+        `assert(rsk.rr_pivots[2], 3);
+        `assert(rsk.rr_pivots[3], 7);
+        `assert(rsk.rr_pivots[4], 4);
+        `assert(rsk.rr_pivots[5], 0);
+        `assert(rsk.rr_pivots[6], 5);
+        `assert(rsk.rr_pivots[7], 1);
+
+        $write("PASS\n");
+
+        repeat(5) @(posedge clk);
+
+
+        $write("\n");
+
+        repeat(2) @(posedge clk);
+        $finish(2);
+    end
+
+endmodule
+`default_nettype wire
\ No newline at end of file
diff --git a/verilog/tb/test_request_mux_tb.sv b/verilog/tb/test_request_mux_tb.sv
new file mode 100644
index 0000000..ec1b74d
--- /dev/null
+++ b/verilog/tb/test_request_mux_tb.sv
@@ -0,0 +1,93 @@
+`default_nettype none
+
+module test_request_mux_tb;
+    
+    reg clk;
+    reg rst;
+
+    localparam CLK_PERIOD = 10;
+    initial begin
+        clk = 1'b0;
+        rst = 1'b1;
+        repeat(4) #(CLK_PERIOD/2) clk = ~clk;
+        rst = 1'b0;
+        forever #(CLK_PERIOD/2) clk = ~clk; // generate a clock
+    end
+
+
+    localparam REQ_WIDTH = 32;
+    localparam NCONSUMERS = 8;
+
+    reg [REQ_WIDTH-1:0] requests [NCONSUMERS];
+    reg [$clog2(NCONSUMERS)-1:0] select;
+    wire [REQ_WIDTH-1:0] selected_request;
+    
+    request_mux #(
+        .REQ_NUMBER(NCONSUMERS),
+        .REQ_WIDTH(REQ_WIDTH)
+    ) rqm (
+        .requests(requests),
+        .select(select),
+        .selected_request(selected_request)
+        );
+
+    // initial begin
+    //     $dumpfile("test.vcd");
+    //     $dumpvars;
+    // end
+
+
+    initial begin
+
+        requests[0] = REQ_WIDTH'(0);
+        requests[1] = REQ_WIDTH'(1000);
+        requests[2] = REQ_WIDTH'(2000);
+        requests[3] = REQ_WIDTH'(3000);
+        requests[4] = REQ_WIDTH'(4000);
+        requests[5] = REQ_WIDTH'(5000);
+        requests[6] = REQ_WIDTH'(6000);
+        requests[7] = REQ_WIDTH'(7000);
+        
+        @(negedge rst); // wait for reset
+
+        $write("================== TEST request mux ==================\n");
+        $write("TEST: mux selects the request correctly .... ");
+
+        @(posedge clk);
+        select = 0;
+        @(posedge clk);
+        `assert(selected_request, 0)
+
+        @(posedge clk);
+        select = 3;
+        @(posedge clk);
+        `assert(selected_request, 3000)
+        
+        @(posedge clk);
+        select = 5;
+        @(posedge clk);
+        `assert(selected_request, 5000)
+        
+        @(posedge clk);
+        select = 7;
+        @(posedge clk);
+        `assert(selected_request, 7000)
+        
+        @(posedge clk);
+        select = 4;
+        @(posedge clk);
+        `assert(selected_request, 4000)
+
+        $write("PASS\n");
+
+        repeat(200) @(posedge clk);
+        repeat(1) @(posedge clk);
+
+        $write("\n");
+
+        repeat(2) @(posedge clk);
+        $finish(2);
+    end
+
+endmodule
+`default_nettype wire
\ No newline at end of file
diff --git a/verilog/tb/test_request_selection_rr_scheduling_kernel_tb.sv b/verilog/tb/test_request_selection_rr_scheduling_kernel_tb.sv
new file mode 100644
index 0000000..14c69e6
--- /dev/null
+++ b/verilog/tb/test_request_selection_rr_scheduling_kernel_tb.sv
@@ -0,0 +1,294 @@
+
+
+`default_nettype none
+
+module test_request_selection_rr_scheduling_kernel_tb;
+    
+    reg clk;
+    reg rst;
+
+    localparam CLK_PERIOD = 10;
+    initial begin
+        clk = 1'b0;
+        rst = 1'b1;
+        repeat(4) #(CLK_PERIOD/2) clk = ~clk;
+        rst = 1'b0;
+        forever #(CLK_PERIOD/2) clk = ~clk; // generate a clock
+    end
+
+    parameter ADDR_WIDTH = 10;
+    parameter VALUE_WIDTH = 8;
+    parameter NCONSUMERS = 8;
+    parameter NBANKS = 4;
+    parameter NPORTS = 2;
+
+    localparam REQ_WIDTH = ADDR_WIDTH + VALUE_WIDTH + 1 + 1;
+    localparam PLM_ADDR_WIDTH = ADDR_WIDTH - $clog2(NBANKS);
+    localparam PLM_INPUT_WIDTH = PLM_ADDR_WIDTH + VALUE_WIDTH + 1;
+    localparam NKERNELS = NBANKS * NPORTS;
+
+    reg [REQ_WIDTH-1:0] requests [NCONSUMERS];
+    wire [PLM_INPUT_WIDTH-1:0] plm_inputs[NKERNELS];
+    
+    rr_scheduling_kernel #(
+        .NCONSUMERS(NCONSUMERS),
+        .NBANKS(NBANKS),
+        .NPORTS(NPORTS),
+        .ADDR_WIDTH(ADDR_WIDTH),
+        .VALUE_WIDTH(VALUE_WIDTH)
+    ) rsk (
+        .clk(clk),
+        .reset(rst),
+        .requests(requests),
+        .plm_inputs(plm_inputs)
+        );
+
+    integer i = 0;
+    initial begin
+        $dumpfile("test.vcd");
+        for(i = 0; i < NCONSUMERS; i++) begin
+            $dumpvars(0, rsk.requests[i]);
+        end
+        
+        for(i = 0; i < NKERNELS; i++) begin
+            $dumpvars(0, rsk.plm_inputs[i]);
+        end
+        $dumpvars;
+    end
+
+    wire [PLM_ADDR_WIDTH-1:0] plm_0_addr;
+    wire [VALUE_WIDTH-1:0] plm_0_val;
+    wire plm_0_wr;
+    wire [PLM_INPUT_WIDTH-1:0] plm_0;
+
+    assign plm_0 = plm_inputs[0];
+    assign plm_0_addr = plm_0[PLM_INPUT_WIDTH-1-: PLM_ADDR_WIDTH];
+    assign plm_0_val = plm_0[PLM_INPUT_WIDTH-PLM_ADDR_WIDTH-1 -: VALUE_WIDTH];
+    assign plm_0_wr = plm_0[PLM_INPUT_WIDTH-PLM_ADDR_WIDTH-VALUE_WIDTH-1 -: 1];
+
+    wire [PLM_ADDR_WIDTH-1:0] plm_1_addr;
+    wire [VALUE_WIDTH-1:0] plm_1_val;
+    wire plm_1_wr;
+    wire [PLM_INPUT_WIDTH-1:0] plm_1;
+
+    assign plm_1 = plm_inputs[1];
+    assign plm_1_addr = plm_1[PLM_INPUT_WIDTH-1-: PLM_ADDR_WIDTH];
+    assign plm_1_val = plm_1[PLM_INPUT_WIDTH-PLM_ADDR_WIDTH-1 -: VALUE_WIDTH];
+    assign plm_1_wr = plm_1[PLM_INPUT_WIDTH-PLM_ADDR_WIDTH-VALUE_WIDTH-1 -: 1];
+    
+    wire [PLM_ADDR_WIDTH-1:0] plm_2_addr;
+    wire [VALUE_WIDTH-1:0] plm_2_val;
+    wire plm_2_wr;
+    wire [PLM_INPUT_WIDTH-1:0] plm_2;
+    assign plm_2 = plm_inputs[2];
+    assign plm_2_addr = plm_2[PLM_INPUT_WIDTH-1-: PLM_ADDR_WIDTH];
+    assign plm_2_val = plm_2[PLM_INPUT_WIDTH-PLM_ADDR_WIDTH-1 -: VALUE_WIDTH];
+    assign plm_2_wr = plm_2[PLM_INPUT_WIDTH-PLM_ADDR_WIDTH-VALUE_WIDTH-1 -: 1];
+
+    wire [PLM_ADDR_WIDTH-1:0] plm_4_addr;
+    wire [VALUE_WIDTH-1:0] plm_4_val;
+    wire plm_4_wr;
+    wire [PLM_INPUT_WIDTH-1:0] plm_4;
+    assign plm_4 = plm_inputs[4];
+    assign plm_4_addr = plm_4[PLM_INPUT_WIDTH-1-: PLM_ADDR_WIDTH];
+    assign plm_4_val = plm_4[PLM_INPUT_WIDTH-PLM_ADDR_WIDTH-1 -: VALUE_WIDTH];
+    assign plm_4_wr = plm_4[PLM_INPUT_WIDTH-PLM_ADDR_WIDTH-VALUE_WIDTH-1 -: 1];
+
+    wire [PLM_ADDR_WIDTH-1:0] plm_6_addr;
+    wire [VALUE_WIDTH-1:0] plm_6_val;
+    wire plm_6_wr;
+    wire [PLM_INPUT_WIDTH-1:0] plm_6;
+    assign plm_6 = plm_inputs[6];
+    assign plm_6_addr = plm_6[PLM_INPUT_WIDTH-1-: PLM_ADDR_WIDTH];
+    assign plm_6_val = plm_6[PLM_INPUT_WIDTH-PLM_ADDR_WIDTH-1 -: VALUE_WIDTH];
+    assign plm_6_wr = plm_6[PLM_INPUT_WIDTH-PLM_ADDR_WIDTH-VALUE_WIDTH-1 -: 1];
+
+    initial begin
+
+        for(i = 0; i < NCONSUMERS; i++) begin
+            requests[i] = REQ_WIDTH'(0);
+        end
+
+        @(negedge rst); // wait for reset
+        
+        $write("================== TEST request selection ==================\n");
+        $write("TEST: a valid requests is routed to all ports eventually .... ");
+        
+        @(posedge clk); // reset
+        rst = 1'b1;
+        @(posedge clk);
+        rst = 1'b0;
+        requests[0] = {ADDR_WIDTH'(2), VALUE_WIDTH'(25), 1'b1, 1'b1}; 
+
+        @(posedge clk); // pivots 1-5
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        @(posedge clk); // pivot 2-6
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        @(posedge clk); // pivot 3-7
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        @(posedge clk); // pivot 4-0 +
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(2));
+        `assert(plm_1_val, ADDR_WIDTH'(25));
+        `assert(plm_1_wr, ADDR_WIDTH'(1));
+        @(posedge clk); // pivot 5-1 
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        @(posedge clk); // pivot 6-2 
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        @(posedge clk); // pivot 7-3 
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        @(posedge clk); // pivot 0-4 
+        `assert(plm_0_addr, ADDR_WIDTH'(2));
+        `assert(plm_0_val, ADDR_WIDTH'(25));
+        `assert(plm_0_wr, ADDR_WIDTH'(1));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        $write("PASS\n");
+        repeat(20) @(posedge clk);
+
+        $write("TEST: two valid requests for bank0 are routed to respective ports .... ");
+        @(posedge clk); // reset
+        rst = 1'b1;
+        @(posedge clk);
+        rst = 1'b0;
+        requests[0] = {ADDR_WIDTH'(3), VALUE_WIDTH'(25), 1'b1, 1'b1};
+        requests[4] = {ADDR_WIDTH'(5), VALUE_WIDTH'(50), 1'b1, 1'b1};
+
+        @(posedge clk); // pivots 1-5
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        @(posedge clk); // pivot 2-6
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        @(posedge clk); // pivot 3-7
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        @(posedge clk); // pivot 4-0 +
+        `assert(plm_0_addr, ADDR_WIDTH'(5));
+        `assert(plm_0_val, ADDR_WIDTH'(50));
+        `assert(plm_0_wr, ADDR_WIDTH'(1));
+        `assert(plm_1_addr, ADDR_WIDTH'(3));
+        `assert(plm_1_val, ADDR_WIDTH'(25));
+        `assert(plm_1_wr, ADDR_WIDTH'(1));
+        @(posedge clk); // pivot 5-1 
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        @(posedge clk); // pivot 6-2 
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        @(posedge clk); // pivot 7-3 
+        `assert(plm_0_addr, ADDR_WIDTH'(0));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(0));
+        `assert(plm_1_addr, ADDR_WIDTH'(0));
+        `assert(plm_1_val, ADDR_WIDTH'(0));
+        `assert(plm_1_wr, ADDR_WIDTH'(0));
+        @(posedge clk); // pivot 0-4 
+        `assert(plm_0_addr, ADDR_WIDTH'(3));
+        `assert(plm_0_val, ADDR_WIDTH'(25));
+        `assert(plm_0_wr, ADDR_WIDTH'(1));
+        `assert(plm_1_addr, ADDR_WIDTH'(5));
+        `assert(plm_1_val, ADDR_WIDTH'(50));
+        `assert(plm_1_wr, ADDR_WIDTH'(1));
+
+        $write("PASS\n");
+        
+        $write("TEST: requests are routed to different banks based on address .... ");
+        @(posedge clk); // reset
+        rst = 1'b1;
+        @(posedge clk);
+        rst = 1'b0;
+        requests[0] = {ADDR_WIDTH'(255), VALUE_WIDTH'(0), 1'b1, 1'b1};
+        requests[1] = {ADDR_WIDTH'(510), VALUE_WIDTH'(1), 1'b1, 1'b1};
+        requests[2] = {ADDR_WIDTH'(765), VALUE_WIDTH'(2), 1'b1, 1'b1};
+        requests[3] = {ADDR_WIDTH'(1020), VALUE_WIDTH'(3), 1'b1, 1'b1};
+        requests[4] = {ADDR_WIDTH'(0), VALUE_WIDTH'(0), 1'b0, 1'b0};
+        
+        @(posedge clk); // pivot 1-5
+        @(posedge clk); // pivot 2-6
+        @(posedge clk); // pivot 3-7
+        @(posedge clk); // pivot 4-0
+        @(posedge clk); // pivot 5-1
+        @(posedge clk); // pivot 6-2
+        @(posedge clk); // pivot 7-3
+        @(posedge clk); // pivot 0-4
+        `assert(plm_0_addr, ADDR_WIDTH'(255));
+        `assert(plm_0_val, ADDR_WIDTH'(0));
+        `assert(plm_0_wr, ADDR_WIDTH'(1));
+        `assert(plm_2_addr, ADDR_WIDTH'(254));
+        `assert(plm_2_val, ADDR_WIDTH'(1));
+        `assert(plm_2_wr, ADDR_WIDTH'(1));
+        `assert(plm_4_addr, ADDR_WIDTH'(253));
+        `assert(plm_4_val, ADDR_WIDTH'(2));
+        `assert(plm_4_wr, ADDR_WIDTH'(1));
+        `assert(plm_6_addr, ADDR_WIDTH'(252));
+        `assert(plm_6_val, ADDR_WIDTH'(3));
+        `assert(plm_6_wr, ADDR_WIDTH'(1));
+        @(posedge clk); // pivot 1-5
+
+        $write("PASS\n");
+
+        repeat(20) @(posedge clk);
+        repeat(1) @(posedge clk);
+
+        $write("\n");
+
+        repeat(2) @(posedge clk);
+        $finish(2);
+    end
+
+endmodule
+`default_nettype wire
\ No newline at end of file
diff --git a/verilog/utils/utils.sv b/verilog/utils/utils.sv
new file mode 100644
index 0000000..12e06c1
--- /dev/null
+++ b/verilog/utils/utils.sv
@@ -0,0 +1,13 @@
+`define QUOTE(q) `"q`"
+
+`define assert_true(condition, error_string) \
+    if (!condition) begin \
+        $display("ASSERTION FAILED: %s", error_string); \
+        $finish; \
+    end
+
+`define assert(signal, value) \
+    if (signal !== value) begin \
+        $display("ASSERTION FAILED in %m on %s: actual: %h != expected: %h",`QUOTE(signal), signal, value); \
+        $finish(1); \
+    end
\ No newline at end of file