diff --git a/research_projects/NDP/README.md b/research_projects/NDP/README.md new file mode 100644 index 0000000..89653c8 --- /dev/null +++ b/research_projects/NDP/README.md @@ -0,0 +1,78 @@ +# NDP trimming switch + +This directory hosts the `ndp.p4` trimming switch +implementation targetting the TNA architecture. +For more details, see our paper [Re-architecting datacenter networks and stacks for low latency and high performance](http://nets.cs.pub.ro/~costin/files/ndp.pdf). + +The p4 code, table population scripts and instructions for building and running +NDP for the Tofino switch are in the `dev_root/` directory. + +# How it works + +To summarize, `ndp.p4` keeps an under-approximation of +the buffer occupancy for each port in ingress (by means of a +three-color meter). Whenever the meter turns red, it means +that the buffer is full and packet undergoes trimming. To achieve +that, we mark the packet to be cloned to egress and setup +the clone session to truncate the packet in such way as to +only keep the packet headers. + +Since Tofino is keeping per-pipeline meter state, we may +end up in the situation where multiple ingress pipelines are +flooding a single output port without any of the meters turning +red. To solve this situation, we devise a three-level meter +strategy and make use of the Deflect-on-Drop capabilities +on the Tofino to make ingress meters trim more aggressively for +the port which is experiencing drops. After a pre-defined +interval, the meters switch to an intermediate level of trimming +and after even more time, when the incast has passed, to their original trim rate. + +A more detailed description of the implementation: + + * on ingress + + 0) the packet undergoes regular ipv4 forwarding with fwd decision to port egport + 1) if packet is ndp control => output packet to HIGH_PRIORITY_QUEUE + 2) if packet is ndp data => pass packet through meter[egport] + + 2.1) if meter color is GREEN => output packet to LOW_PRIORITY_QUEUE + + 2.2) if meter color != GREEN => clone packet to sid where (sid maps to egport, HIGH_PRIORITY_QUEUE, + packet length = 80B) + 3) if packet is not ndp => proceed with forwarding on OTHERS_QUEUE + + * on egress: + 1) if packet is ndp data and comes in from DoD port (dropped due to congestion) + 2) when trimmed or normal packets come in => do rewrites (mac src and dst addresses) and set ndp trim flags + 3) when clone packet back to egress to sesssion esid (esid maps to recirculation port, HIGH_PRIORITY_QUEUE, packet length = 80B) + 4) when packet comes back from egress clone => forward as-is (i.e. recirculate back into ingress) and notify all pipelines + to transition into pessimistic mode + + ### NDP modes: + * Each egress port works in 3 modes: + - optimistic + - pessimistic + - "halftimistic" + +The mode decides what meter will be used for NDP packets going out on the given port + + * In optimistic, we use meter_optimistic (line-rate) + + * In pessimistic, we use meter_pessimistic (1/4 * line-rate) + + * In halftimistic, we use meter_halftimistic (1/2 * line-rate) + + Initially, the switch starts in optimistic mode for all ports. + + Whenever a DoD packet is received in egress => all ingress pipelines are notified to + trim more aggressively (i.e. transition into pessimistic mode). + + A port remains in pessimistic mode for T0 ns if no extra DoDs occur. + After T0 ns, the port transitions into halftimistic mode. + + A port remains in halftimistic mode for T1 ns if no other DoDs occur. + After T1 ns, the port transitions back into optimistic mode. + + NB: T0 and T1 are hardcoded into ndp.p4 and are currently set + to 6us and 24us respectively. + diff --git a/research_projects/NDP/dev_root/Makefile b/research_projects/NDP/dev_root/Makefile new file mode 100755 index 0000000..5823e97 --- /dev/null +++ b/research_projects/NDP/dev_root/Makefile @@ -0,0 +1,20 @@ +P4C ?= $(SDE_INSTALL)/bin/bf-p4c + +all: .ndp.ts + +.ndp.ts: ndp.p4 common/util.p4 common/headers.p4 +ifndef SDE_INSTALL + $(error SDE_INSTALL is undefined) +endif + $(P4C) $(OPT) $< --bf-rt-schema ndp.tofino/bfrt.json + cp -r ndp.tofino $(SDE_INSTALL)/ + @touch $@ + +package: + tar -cvf ndp.tar -T package.txt + +clean: + rm -rf .ndp.ts ndp.tar + rm -rf .dodtest.ts + +.PHONY: clean package diff --git a/research_projects/NDP/dev_root/README.md b/research_projects/NDP/dev_root/README.md new file mode 100644 index 0000000..f1342c1 --- /dev/null +++ b/research_projects/NDP/dev_root/README.md @@ -0,0 +1,136 @@ +# ndp.p4 + +This repo contains TNA P4 code for running NDP with trimming. + +* ndp.p4 contains the P4 source code +* setup_ndp.py contains the control-plane code which populates +NDP's tables +* samples/ - contains configuration samples for Tofino + +## Compiling + +`ndp.p4` was tested against version `9.2.0` of the Intel SDE +(formerly `bf-sde`). We assume that the following environment variables are set prior to building and running: `$SDE`, `$SDE_INSTALL`. + +``` +make +``` +The output of this command is `$SDE_INSTALL/ndp.tofino/` + +## Running +Deploying the P4 switch on hardware: +``` +$SDE/run_switchd.sh -p ndp -c $SDE_INSTALL/ndp.tofino/ndp.conf +``` + +## Control plane +The control plane consists of the script `setup_ndp.py` +which takes as input a configuration file and populates +the entries for NDP. The current configuration is static +(i.e. no dynamic routing, no ARP etc.). + +`setup_ndp.py` works in two modes: +* single-pipe: no extra CLI arguments - the input is a *single-pipe* json - it will set all tables as symmetric (see below) +* multi-pipe: requires `-multi-pipe` CLI option. Expects as input a +multi-pipe json file which consists of a dictionary where keys are +strings representing pipe_ids and values are objects with the sole +attribute "file" whose value points to the single-pipe input json +for the particular pipe_id. The *single-pipe* input format is +described below + +The *single-pipe* input to `setup_ndp.py` is a json file with the following contents: +- arp - the contents of the ARP table of the switch (maps IPv4 -> MAC) - a dictionary of **arp entry** objects key IPv4 address, value MAC address +- rates - a list of **rate** objects with the following attributes: + - eg_port - dev_port for which the following attributes apply + - rate_kbps - int - meter speed in kbps (required) + - burst_kbits - int - meter "buffer" (burst size) - in kBits (required) + - shaper_rate_kbps - int - meter speed in kbps (optional: default to rate_kbps) - NB: shaper_rate_kbps = 0 ==> shaper is disabled for given port + - shaper_burst_kbits - int - burst size of shaper (optional: defaults to shaper_burst_kbits) + - port_speed - str - one of 10G, 25G, 40G, 50G, 100G (required) + - port_bufsize - int + - fec - str - one of NONE, RS (required) +- entries - a list of **entry** objects with the following attributes (all of them are required): + - smac - source MAC of outgoing port + - dip - destination IPv4 + - eg_port - dev_port - outgoing device port + - nhop - IPv4 of next hop or 0 if the destination IP subnet is directly connected +- allow_pessimism - bool - optional: default True. Disables optimistic/pessimistic modes and only considers optimistic meter + +## Examples + +First of all, set up the PYTHONPATH +``` +PYTHONPATH=$SDE_INSTALL/lib/python2.7/site-packages/:$SDE_INSTALL/lib/python2.7/site-packages/tofino +``` +* multi-pipe +``` +python setup_ndp.py -multi-pipe samples/multi_pipe/r1.json +``` + +* single-pipe +``` + python setup_ndp.py samples/single_pipe/r0_config.json +``` + +* troubleshooting +If running the script fails with something like `google.protobuf.internal` +not found, run the following (assuming original python site-packages is in /usr/local/lib/python2.7/site-packages) +``` +cp -r /usr/local/lib/python2.7/site-packages/protobuf*/google/protobuf/ $SDE_INSTALL/lib/python2.7/site-packages/google/ +``` + +Changing the running mode (single-pipe vs multi-pipe) between two +consecutive runs may sometimes lead to errors. If this is the case, +re-deploying ndp.p4 on the switch should solve the issue. + +## How it works + +Check out the original [NDP SIGCOMM'17 paper](https://dl.acm.org/doi/10.1145/3098822.3098825). + +### Current implementation + * on ingress + + 0) the packet undergoes regular ipv4 forwarding with fwd decision to port egport + 1) if packet is ndp control => output packet to HIGH_PRIORITY_QUEUE + 2) if packet is ndp data => pass packet through meter[egport] + + 2.1) if meter color is GREEN => output packet to LOW_PRIORITY_QUEUE + + 2.2) if meter color != GREEN => clone packet to sid where (sid maps to egport, HIGH_PRIORITY_QUEUE, + packet length = 80B) + 3) if packet is not ndp => proceed with forwarding on OTHERS_QUEUE + + * on egress: + 1) if packet is ndp data and comes in from DoD port (dropped due to congestion) + 2) when trimmed or normal packets come in => do rewrites (mac src and dst addresses) and set ndp trim flags + 3) when clone packet back to egress to sesssion esid (esid maps to recirculation port, HIGH_PRIORITY_QUEUE, packet length = 80B) + 4) when packet comes back from egress clone => forward as-is (i.e. recirculate back into ingress) and notify all pipelines + to transition into pessimistic mode + + ### NDP modes: + * Each egress port works in 3 modes: + - optimistic + - pessimistic + - "halftimistic" + +The mode decides what meter will be used for NDP packets going out on the given port + + * In optimistic, we use meter_optimistic (line-rate) + + * In pessimistic, we use meter_pessimistic (1/4 * line-rate) + + * In halftimistic, we use meter_halftimistic (1/2 * line-rate) + + Initially, the switch starts in optimistic mode for all ports. + + Whenever a DoD packet is received in egress => all ingress pipelines are notified to + trim more aggressively (i.e. transition into pessimistic mode). + + A port remains in pessimistic mode for T0 ns if no extra DoDs occur. + After T0 ns, the port transitions into halftimistic mode. + + A port remains in halftimistic mode for T1 ns if no other DoDs occur. + After T1 ns, the port transitions back into optimistic mode. + + NB: T0 and T1 are hardcoded into ndp.p4 and are currently set + to 6us and 24us respectively. \ No newline at end of file diff --git a/research_projects/NDP/dev_root/common/headers.p4 b/research_projects/NDP/dev_root/common/headers.p4 new file mode 100755 index 0000000..f1a0a75 --- /dev/null +++ b/research_projects/NDP/dev_root/common/headers.p4 @@ -0,0 +1,235 @@ +/******************************************************************************* + * BAREFOOT NETWORKS CONFIDENTIAL & PROPRIETARY + * + * Copyright (c) 2018-2019 Barefoot Networks, Inc. + + * All Rights Reserved. + * + * NOTICE: All information contained herein is, and remains the property of + * Barefoot Networks, Inc. and its suppliers, if any. The intellectual and + * technical concepts contained herein are proprietary to Barefoot Networks, + * Inc. + * and its suppliers and may be covered by U.S. and Foreign Patents, patents in + * process, and are protected by trade secret or copyright law. + * Dissemination of this information or reproduction of this material is + * strictly forbidden unless prior written permission is obtained from + * Barefoot Networks, Inc. + * + * No warranty, explicit or implicit is provided, unless granted under a + * written agreement with Barefoot Networks, Inc. + * + * + ******************************************************************************/ + +#ifndef _HEADERS_ +#define _HEADERS_ + +typedef bit<48> mac_addr_t; +typedef bit<32> ipv4_addr_t; +typedef bit<128> ipv6_addr_t; +typedef bit<12> vlan_id_t; +typedef bit<16> ndp_seqno_t; + +typedef bit<16> ether_type_t; +const ether_type_t ETHERTYPE_IPV4 = 16w0x0800; +const ether_type_t ETHERTYPE_ARP = 16w0x0806; +const ether_type_t ETHERTYPE_IPV6 = 16w0x86dd; +const ether_type_t ETHERTYPE_DUMB = 16w0x599; + +typedef bit<8> ip_protocol_t; +const ip_protocol_t IP_PROTOCOLS_ICMP = 1; +const ip_protocol_t IP_PROTOCOLS_TCP = 6; +const ip_protocol_t IP_PROTOCOLS_IPIP = 4; +const ip_protocol_t IP_PROTOCOLS_UDP = 17; + +// size = 14 +header ethernet_h { + mac_addr_t dst_addr; + mac_addr_t src_addr; + bit<16> ether_type; +} +header empty_h {} + +header vlan_tag_h { + bit<3> pcp; + bit<1> cfi; + vlan_id_t vid; + bit<16> ether_type; +} + +header mpls_h { + bit<20> label; + bit<3> exp; + bit<1> bos; + bit<8> ttl; +} + +// size = 20 +header ipv4_h { + bit<4> version; + bit<4> ihl; + bit<8> diffserv; + bit<16> total_len; + bit<16> identification; + bit<3> flags; + bit<13> frag_offset; + bit<8> ttl; + bit<8> protocol; + bit<16> hdr_checksum; + ipv4_addr_t src_addr; + ipv4_addr_t dst_addr; +} + +header ipv6_h { + bit<4> version; + bit<8> traffic_class; + bit<20> flow_label; + bit<16> payload_len; + bit<8> next_hdr; + bit<8> hop_limit; + ipv6_addr_t src_addr; + ipv6_addr_t dst_addr; +} + +header tcp_h { + bit<16> src_port; + bit<16> dst_port; + bit<32> seq_no; + bit<32> ack_no; + bit<4> data_offset; + bit<4> res; + bit<8> flags; + bit<16> window; + bit<16> checksum; + bit<16> urgent_ptr; +} + +// size = 8 +header udp_h { + bit<16> src_port; + bit<16> dst_port; + bit<16> hdr_lenght; + bit<16> checksum; +} + +header icmp_h { + bit<8> type_; + bit<8> code; + bit<16> hdr_checksum; +} + +// Address Resolution Protocol -- RFC 6747 +header arp_h { + bit<16> hw_type; + bit<16> proto_type; + bit<8> hw_addr_len; + bit<8> proto_addr_len; + bit<16> opcode; + // ... +} + +// Segment Routing Extension (SRH) -- IETFv7 +header ipv6_srh_h { + bit<8> next_hdr; + bit<8> hdr_ext_len; + bit<8> routing_type; + bit<8> seg_left; + bit<8> last_entry; + bit<8> flags; + bit<16> tag; +} + +// VXLAN -- RFC 7348 +header vxlan_h { + bit<8> flags; + bit<24> reserved; + bit<24> vni; + bit<8> reserved2; +} + +// Generic Routing Encapsulation (GRE) -- RFC 1701 +header gre_h { + bit<1> C; + bit<1> R; + bit<1> K; + bit<1> S; + bit<1> s; + bit<3> recurse; + bit<5> flags; + bit<3> version; + bit<16> proto; +} + +typedef bit<8> ndp_flags_t; +header ndp_shim_data_h { + ndp_flags_t flags; + bit<8> next_proto; + ndp_seqno_t seqno; + bit<16> pull_target; + bit<16> rsvd; +} + +header ndp_shim_ctrl_h { + bit<8> flags; + bit<8> reserved; + bit<16> wsize; + ndp_seqno_t ackno; + ndp_seqno_t pullno; +} + +typedef bit<8> pkt_type_t; +const pkt_type_t PKT_TYPE_NORMAL = 1; +const pkt_type_t PKT_TYPE_MIRROR = 2; +const pkt_type_t PKT_TYPE_TRIM = 3; +const pkt_type_t PKT_TYPE_MIRROR_UPDATE = 4; +const pkt_type_t PKT_TYPE_NOTIFY = 5; +header bridged_h { + pkt_type_t pkt_type; +} + +header eg_metadata_t { + pkt_type_t pkt_type; + mac_addr_t dstAddr; + mac_addr_t srcAddr; + bit<24> truncated; +} + +header trim_metadata_t { + pkt_type_t pkt_type; + PortId_t egress_port; + @padding bit<7> __pad; +} + +header normal_metadata_t { + pkt_type_t pkt_type; + PortId_t egress_port; + @padding bit<7> __pad; +} +header update_packet_metadata_t { + pkt_type_t pkt_type; +} + +struct switch_lookup_fields_t { + bit<8> ip_proto; + bit<128> ip_src_addr; + bit<128> ip_dst_addr; + bit<16> l4_src_port; + bit<16> l4_dst_port; +} + +struct header_t { + eg_metadata_t egmeta; + trim_metadata_t trim_meta; + normal_metadata_t normal_meta; + update_packet_metadata_t update_meta; + ethernet_h ethernet; + vlan_tag_h vlan_tag; + ipv4_h ipv4; + ipv6_h ipv6; + tcp_h tcp; + udp_h udp; + ndp_shim_data_h ndp_s_data; + ndp_shim_data_h ndp_s_ctrl; +} + +#endif /* _HEADERS_ */ diff --git a/research_projects/NDP/dev_root/common/util.p4 b/research_projects/NDP/dev_root/common/util.p4 new file mode 100755 index 0000000..1c0a597 --- /dev/null +++ b/research_projects/NDP/dev_root/common/util.p4 @@ -0,0 +1,102 @@ +/******************************************************************************* + * BAREFOOT NETWORKS CONFIDENTIAL & PROPRIETARY + * + * Copyright (c) 2018-2019 Barefoot Networks, Inc. + * All Rights Reserved. + * + * NOTICE: All information contained herein is, and remains the property of + * Barefoot Networks, Inc. and its suppliers, if any. The intellectual and + * technical concepts contained herein are proprietary to Barefoot Networks, + * Inc. + * and its suppliers and may be covered by U.S. and Foreign Patents, patents in + * process, and are protected by trade secret or copyright law. + * Dissemination of this information or reproduction of this material is + * strictly forbidden unless prior written permission is obtained from + * Barefoot Networks, Inc. + * + * No warranty, explicit or implicit is provided, unless granted under a + * written agreement with Barefoot Networks, Inc. + * + * + ******************************************************************************/ +#ifndef _UTIL_ +#define _UTIL_ + +#include "headers.p4" + +parser TofinoIngressParser( + packet_in pkt, + out ingress_intrinsic_metadata_t ig_intr_md) { + state start { + pkt.extract(ig_intr_md); + transition select(ig_intr_md.resubmit_flag) { + 1 : parse_resubmit; + 0 : parse_port_metadata; + } + } + + state parse_resubmit { + // Parse resubmitted packet here. + transition reject; + } + + state parse_port_metadata { +#if __TARGET_TOFINO__ == 2 + pkt.advance(192); +#else + pkt.advance(64); +#endif + transition accept; + } +} + +parser TofinoEgressParser( + packet_in pkt, + out egress_intrinsic_metadata_t eg_intr_md) { + state start { + pkt.extract(eg_intr_md); + transition accept; + } +} + +// Empty egress parser/control blocks +parser EmptyEgressParser( + packet_in pkt, + out H hdr, + out M eg_md, + out egress_intrinsic_metadata_t eg_intr_md) { + state start { + transition accept; + } +} + +control EmptyEgressDeparser( + packet_out pkt, + inout H hdr, + in M eg_md, + in egress_intrinsic_metadata_for_deparser_t ig_intr_dprs_md) { + apply {} +} + +control EmptyEgress( + inout H hdr, + inout M eg_md, + in egress_intrinsic_metadata_t eg_intr_md, + in egress_intrinsic_metadata_from_parser_t eg_intr_md_from_prsr, + inout egress_intrinsic_metadata_for_deparser_t ig_intr_dprs_md, + inout egress_intrinsic_metadata_for_output_port_t eg_intr_oport_md) { + apply {} +} + +Hash>(HashAlgorithm_t.CRC32) ip_hash; +Hash>(HashAlgorithm_t.CRC32) non_ip_hash; + +action compute_ip_hash(in switch_lookup_fields_t lkp, out bit<32> hash) { + hash = ip_hash.get({lkp.ip_src_addr, + lkp.ip_dst_addr, + lkp.ip_proto, + lkp.l4_dst_port, + lkp.l4_src_port}); +} + +#endif /* _UTIL */ diff --git a/research_projects/NDP/dev_root/ndp.p4 b/research_projects/NDP/dev_root/ndp.p4 new file mode 100755 index 0000000..2e91c80 --- /dev/null +++ b/research_projects/NDP/dev_root/ndp.p4 @@ -0,0 +1,820 @@ +/******************************************************************************* + * + * Copyright (c) 2020-2021 Correct Networks, Intel Corporation + * All Rights Reserved. + * Authors: + * Dragos Dumitrescu (dragos@correctnetworks.io) + * Adrian Popa (adrian.popa@correctnetworks.io) + * + * NOTICE: TBD + * + * + ******************************************************************************/ + +/******************************************************************************* + * + * Here are the main steps: + * + * on ingress + * the packet undergoes regular ipv4 forwarding with fwd decision to port egport + * 1) if packet is ndp control => output packet to HIGH_PRIORITY_QUEUE + * 2) if packet is ndp data => pass packet through meter[egport] + 2.1) if meter color is GREEN => output packet to LOW_PRIORITY_QUEUE + 2.2) if meter color != GREEN => clone packet to sid where (sid maps to egport, HIGH_PRIORITY_QUEUE, + packet length = 80B) + 3) if packet is not ndp => proceed with forwarding on OTHERS_QUEUE + * + * on egress: + * 1) if packet is ndp data and comes in from DoD port (dropped due to congestion) + 2) when trimmed or normal packets come in => do rewrites (mac src and dst addresses) and set ndp trim flags + 3) when clone packet back to egress to sesssion esid (esid maps to recirculation port, HIGH_PRIORITY_QUEUE, packet length = 80B) + 4) when packet comes back from egress clone => forward as-is (i.e. recirculate back into ingress) and notify all pipelines + to transition into pessimistic mode + + * NDP modes: + * Each egress port works in 3 modes: + - optimistic + - pessimistic + - halftimistic + The mode decides what meter will be used for NDP packets going out on the given port + In optimistic, we use meter_optimistic (line-rate) + In pessimistic, we use meter_pessimistic (1/4 * line-rate) + In halftimistic, we use meter_halftimistic (1/2 * line-rate) + + Initially, the switch starts in optimistic mode for all ports + Whenever a DoD packet is received in egress => all ingress pipelines are notified to + trim more aggressively (i.e. transition into pessimistic mode). + + A port remains in pessimistic mode for T0 ns if no extra DoDs occur. + After T0 ns, the port transitions into halftimistic mode. + + A port remains in halftimistic mode for T1 ns if no other DoDs occur. + After T1 ns, the port transitions back into optimistic mode. + ******************************************************************************/ + +#include +#if __TARGET_TOFINO__ == 2 +#include +#else +#include +#endif + +#include "common/util.p4" +#include "common/headers.p4" + + +#ifndef T0_NS +#define T0_NS 6 * 1000 +#endif + +#ifndef T1_NS +#define T1_NS 24 * 1000 +#endif + +typedef bit<2> pkt_color_t; +typedef bit<32> ndp_timestamp_t; + +const pkt_color_t SWITCH_METER_COLOR_GREEN = 0; +const pkt_color_t SWITCH_METER_COLOR_YELLOW = 1; +const pkt_color_t SWITCH_METER_COLOR_RED = 2; + +const bit<32> ecmp_selection_table_size = 16384; +const bit<32> ecmp_table_size = 1024; + +struct qos_metadata_a_t { + pkt_color_t color; +} + +struct ig_metadata_t { + pkt_type_t pkt_type; + PortId_t egress_port; + MirrorId_t mirror_session_id; + bool drop_ndp; + bool always_truncate; + ipv4_addr_t nhop; + switch_lookup_fields_t lkp; + bit<32> hash; + bit<32> nhop_idx; +} +struct egress_metadata_t { + MirrorId_t mirror_session_id; + bit<1> is_recirculate_port; +} + +// --------------------------------------------------------------------------- +// Ingress parser +// --------------------------------------------------------------------------- +parser SwitchIngressParser( + packet_in pkt, + out header_t hdr, + out ig_metadata_t ig_md, + out ingress_intrinsic_metadata_t ig_intr_md) { + + TofinoIngressParser() tofino_parser; + value_set>(4) is_recirc_port; + + state start { + tofino_parser.apply(pkt, ig_intr_md); + transition select(ig_intr_md.ingress_port) { + is_recirc_port : parse_update_command; + default : parse_ethernet; + } + } + state parse_update_command { + pkt_type_t pktype = pkt.lookahead(); + transition select(pktype) { + PKT_TYPE_TRIM : parse_trim; + PKT_TYPE_MIRROR_UPDATE : parse_update; + PKT_TYPE_NOTIFY : parse_trim; + } + } + state parse_update { + pkt.extract(hdr.update_meta); + transition parse_ethernet; + } + state parse_trim { + pkt.extract(hdr.trim_meta); + transition parse_ethernet; + } + + state parse_ethernet { + pkt.extract(hdr.ethernet); + transition select (hdr.ethernet.ether_type) { + ETHERTYPE_IPV4 : parse_ipv4; + default : reject; + } + } + + state parse_ipv4 { + pkt.extract(hdr.ipv4); + transition select(hdr.ipv4.protocol) { + IP_PROTOCOLS_UDP : parse_udp; + IP_PROTOCOLS_TCP : parse_tcp; + default : accept; + } + } + + state parse_tcp { + pkt.extract(hdr.tcp); + transition accept; + } + + state parse_udp { + pkt.extract(hdr.udp); + transition select(hdr.udp.dst_port) { + 666 : parse_ndp; + default : accept; + } + } + + state parse_ndp { + ndp_flags_t flags = pkt.lookahead(); + transition select(flags[3:1]) { + 3w0b100 &&& 3w0b100 : parse_ndp_s_ctrl; + 3w0b010 &&& 3w0b010 : parse_ndp_s_ctrl; + 3w0b001 &&& 3w0b001 : parse_ndp_s_ctrl; + default: parse_ndp_s_data; + } + } + + state parse_ndp_s_data { + pkt.extract(hdr.ndp_s_data); + transition accept; + } + + state parse_ndp_s_ctrl { + pkt.extract(hdr.ndp_s_ctrl); + transition accept; + } + +} + +// --------------------------------------------------------------------------- +// Egress parser +// --------------------------------------------------------------------------- +parser SwitchEgressParser( + packet_in pkt, + out header_t hdr, out egress_metadata_t eg_md, + out egress_intrinsic_metadata_t eg_intr_md) { + + TofinoEgressParser() tofino_parser; + + state start { + tofino_parser.apply(pkt, eg_intr_md); + transition parse_metas; + } + state parse_metas { + pkt_type_t pkttype = pkt.lookahead(); + // egress parser drops by default + transition select (pkttype) { + PKT_TYPE_MIRROR : parse_mirror_md; + PKT_TYPE_NORMAL : parse_normal_md; + PKT_TYPE_TRIM : parse_trim; + PKT_TYPE_MIRROR_UPDATE : parse_mirror_update; + PKT_TYPE_NOTIFY : parse_trim; + } + } + state parse_trim { + pkt.extract(hdr.trim_meta); + transition select(hdr.trim_meta.pkt_type) { + PKT_TYPE_NOTIFY: parse_mirror_update; + default: parse_ethernet; + } + } + state parse_mirror_update { + pkt.extract(hdr.update_meta); + transition parse_ethernet; + } + state parse_normal_md { + pkt.extract(hdr.normal_meta); + transition parse_ethernet; + } + state parse_mirror_md { + pkt.extract(hdr.egmeta); + transition parse_ethernet; + } + state parse_ethernet { + pkt.extract(hdr.ethernet); + transition select (hdr.ethernet.ether_type) { + ETHERTYPE_IPV4 : parse_ipv4; + default : reject; + } + } + + state parse_ipv4 { + pkt.extract(hdr.ipv4); + transition select(hdr.ipv4.protocol) { + IP_PROTOCOLS_UDP : parse_udp; + default : accept; + } + } + + state parse_udp { + pkt.extract(hdr.udp); + transition select(hdr.udp.dst_port) { + 666 : parse_ndp; + default : accept; + } + } + + state parse_ndp { + ndp_flags_t flags = pkt.lookahead(); + transition select(flags[3:1]) { + 3w0b100 &&& 3w0b100 : parse_ndp_s_ctrl; + 3w0b010 &&& 3w0b010 : parse_ndp_s_ctrl; + 3w0b001 &&& 3w0b001 : parse_ndp_s_ctrl; + default: parse_ndp_s_data; + } + } + + state parse_ndp_s_data { + pkt.extract(hdr.ndp_s_data); + transition accept; + } + + state parse_ndp_s_ctrl { + pkt.extract(hdr.ndp_s_ctrl); + transition accept; + } +} + +header empty_hdr_t { } + +// --------------------------------------------------------------------------- +// Ingress Deparser +// --------------------------------------------------------------------------- +control SwitchIngressDeparser( + packet_out pkt, + inout header_t hdr, + in ig_metadata_t ig_md, + in ingress_intrinsic_metadata_for_deparser_t ig_intr_dprsr_md) { + Mirror() mirror; + Checksum() ipv4_checksum; + + apply { + hdr.ipv4.hdr_checksum = ipv4_checksum.update({ + hdr.ipv4.version, + hdr.ipv4.ihl, + hdr.ipv4.diffserv, + hdr.ipv4.total_len, + hdr.ipv4.identification, + hdr.ipv4.flags, + hdr.ipv4.frag_offset, + hdr.ipv4.ttl, + hdr.ipv4.protocol, + hdr.ipv4.src_addr, + hdr.ipv4.dst_addr}); + + if (ig_intr_dprsr_md.mirror_type == 3w1) { + // emit with no data + // does not work because of compiler bug in 8.9.1 + // mirror.emit(ig_md.mirror_session_id); + mirror.emit(ig_md.mirror_session_id, hdr.egmeta); + } + if (ig_intr_dprsr_md.mirror_type == 3w2) { + mirror.emit(ig_md.mirror_session_id, {ig_md.pkt_type, ig_md.egress_port, 0}); + } + pkt.emit(hdr); + } +} + +control SwitchEgressDeparser( + packet_out pkt, + inout header_t hdr, + in egress_metadata_t eg_md, + in egress_intrinsic_metadata_for_deparser_t eg_intr_dprsr_md) { + Checksum() ipv4_checksum; + Mirror() mirror; + + apply { + hdr.ipv4.hdr_checksum = ipv4_checksum.update({ + hdr.ipv4.version, + hdr.ipv4.ihl, + hdr.ipv4.diffserv, + hdr.ipv4.total_len, + hdr.ipv4.identification, + hdr.ipv4.flags, + hdr.ipv4.frag_offset, + hdr.ipv4.ttl, + hdr.ipv4.protocol, + hdr.ipv4.src_addr, + hdr.ipv4.dst_addr}); + pkt.emit(hdr); + if (eg_intr_dprsr_md.mirror_type == 3w1) { + mirror.emit(eg_md.mirror_session_id, {}); + } + } +} + +control SwitchIngress( + inout header_t hdr, + inout ig_metadata_t ig_md, + in ingress_intrinsic_metadata_t ig_intr_md, + in ingress_intrinsic_metadata_from_parser_t ig_intr_prsr_md, + inout ingress_intrinsic_metadata_for_deparser_t ig_intr_dprsr_md, + inout ingress_intrinsic_metadata_for_tm_t ig_intr_tm_md) { + + bit<1> allow_pessimism; + Counter, PortId_t>(32w4096, CounterType_t.PACKETS_AND_BYTES) per_port_counter; + Counter, PortId_t>(32w4096, CounterType_t.PACKETS) meter_chop; + Counter, PortId_t>(32w4096, CounterType_t.PACKETS) return_chop; + Counter, PortId_t>(32w4096, CounterType_t.PACKETS) nr_arm; + + Counter, PortId_t>(32w4096, CounterType_t.PACKETS) nr_opti; + Counter, PortId_t>(32w4096, CounterType_t.PACKETS) nr_pesi; + Counter, PortId_t>(32w4096, CounterType_t.PACKETS) nr_half; + + // NDP switch state machine + // for each egress port we maintain the following state machine: + // s0(optimistic) : meter is at line-rate + // s1(pessimistic) : meter is at 1/4 * line-rate + // s2(1/2 pessimistic): meter is at 1/2 * line-rate + // Transitions: + // e1: s0 ------DoD------------> s1 + // e2: s1 ------t0 ns elapsed since e1 ----> s2 + // e3: s2 ------t1 ns elapsed since e1 ----> s0 + // on e1: set downgrade_time[port] = now(); + // on e3: set downgrade_time[port] = 0; + // initially: port is in s0 + Register(32w256, 32w0) t0_reg; + RegisterAction>(t0_reg) arm0 = { + void apply(inout ndp_timestamp_t v) { + v = ig_intr_prsr_md.global_tstamp[31:0] + T0_NS; + } + }; + + Register(32w256, 32w0) t1_reg; + RegisterAction>(t1_reg) arm1 = { + void apply(inout ndp_timestamp_t v) { + v = ig_intr_prsr_md.global_tstamp[31:0] + T1_NS; + } + }; + + RegisterAction>(t0_reg) transition_0 = { + void apply(inout ndp_timestamp_t v, out bit<1> ret) { + if (ig_intr_prsr_md.global_tstamp[31:0] > v) { + v = 32w0; + ret = 1w0; + } else if (ig_intr_prsr_md.global_tstamp[31:0] + T0_NS < v) { + v = 32w0; + ret = 1w0; + } else { + ret = 1w1; + } + } + }; + RegisterAction>(t1_reg) transition_1 = { + void apply(inout ndp_timestamp_t v, out bit<1> ret) { + if (ig_intr_prsr_md.global_tstamp[31:0] > v) { + v = 32w0; + ret = 1w0; + } else if (ig_intr_prsr_md.global_tstamp[31:0] + T1_NS < v) { + v = 32w0; + ret = 1w0; + } else { + ret = 1w1; + } + } + }; + + qos_metadata_a_t qos_md; + Meter(512, MeterType_t.BYTES) meter_optimistic; + Meter(512, MeterType_t.BYTES) meter_pessimistic; + Meter(512, MeterType_t.BYTES) meter_halftimistic; + + action drop() { + ig_intr_dprsr_md.drop_ctl = 0x1; // Drop packet. + } + + action ipv4_forward_direct_connect(mac_addr_t srcAddr, PortId_t port) { + ig_intr_tm_md.ucast_egress_port = port; + ig_md.egress_port = port; + ig_md.nhop = hdr.ipv4.dst_addr; + hdr.ethernet.src_addr = srcAddr; + hdr.ipv4.ttl = hdr.ipv4.ttl - 1; + } + action ipv4_forward(mac_addr_t srcAddr, ipv4_addr_t nhop, PortId_t port) { + ig_intr_tm_md.ucast_egress_port = port; + ig_md.egress_port = port; + ig_md.nhop = nhop; + hdr.ethernet.src_addr = srcAddr; + hdr.ipv4.ttl = hdr.ipv4.ttl - 1; + } + + action set_nhop_idx(bit<32> nhop_idx) { + ig_md.nhop_idx = nhop_idx; + } + + table ipv4_lpm { + key = { + hdr.ipv4.dst_addr : lpm; + } + actions = { + set_nhop_idx; + NoAction; + } + size = 1024; + default_action = NoAction(); + } + action set_dst(mac_addr_t dst_addr) { + hdr.ethernet.dst_addr = dst_addr; + } + table arp { + key = { + ig_md.nhop : exact; + } + actions = { + drop; + set_dst; + } + default_action = drop(); + size = 8192; + } + + action set_high_priority() { + ig_intr_tm_md.qid = 1; + } + + action setup_metas_for_egress() { + ig_intr_tm_md.qid = 0; + ig_intr_tm_md.bypass_egress = 0; + } + // send normal packet to egress for further processing. + // this action is applied against an NDP data packet + // (regardless of whether it was trimmed or not) + action set_low_priority() { + hdr.normal_meta.setValid(); + hdr.normal_meta.pkt_type = PKT_TYPE_NORMAL; + hdr.normal_meta.egress_port = ig_intr_tm_md.ucast_egress_port; + // only DoD full ndp packets + ig_intr_tm_md.deflect_on_drop = 1w1; + setup_metas_for_egress(); + } + + action mirror_and_drop(MirrorId_t session_id) { + ig_intr_dprsr_md.mirror_type = 3w1; + ig_md.mirror_session_id = session_id; + hdr.egmeta.setValid(); + hdr.egmeta.pkt_type = PKT_TYPE_MIRROR; + hdr.egmeta.dstAddr = hdr.ethernet.dst_addr; + hdr.egmeta.srcAddr = hdr.ethernet.src_addr; + setup_metas_for_egress(); + drop(); + } + + table truncate { + key = { + ig_intr_tm_md.ucast_egress_port : exact; + } + actions = { + mirror_and_drop; + drop; + } + const default_action = drop(); + size = 512; + } + + action set_config(bit<1> p_allow_pessimism, bit<1> always_truncate, + bit<1> drop_ndp) { + allow_pessimism = p_allow_pessimism; + ig_md.always_truncate = (bool)always_truncate; + ig_md.drop_ndp = (bool)drop_ndp; + } + table configure_ndp { + actions = { + set_config; + } + } + + action set_lkp_ip() { + ig_md.lkp.ip_src_addr = (bit<128>)hdr.ipv4.src_addr; + ig_md.lkp.ip_dst_addr = (bit<128>)hdr.ipv4.dst_addr; + ig_md.lkp.ip_proto = hdr.ipv4.protocol; + } + action set_lkp_udp() { + set_lkp_ip(); + ig_md.lkp.l4_src_port = hdr.udp.src_port; + ig_md.lkp.l4_dst_port = hdr.udp.dst_port; + } + action set_lkp_ndp() { + set_lkp_ip(); + ig_md.lkp.l4_src_port = hdr.ndp_s_data.rsvd; + ig_md.lkp.l4_dst_port = hdr.udp.dst_port; + } + action set_lkp_tcp() { + set_lkp_ip(); + ig_md.lkp.l4_src_port = hdr.tcp.src_port; + ig_md.lkp.l4_dst_port = hdr.tcp.dst_port; + } + action set_lkp_ip_unknown() { + set_lkp_ip(); + ig_md.lkp.l4_src_port = hdr.udp.src_port; + ig_md.lkp.l4_dst_port = hdr.udp.dst_port; + } + Hash>(HashAlgorithm_t.IDENTITY) selector_hash; + ActionProfile(ecmp_selection_table_size) ecmp_selector; + ActionSelector( + ecmp_selector, + selector_hash, + SelectorMode_t.FAIR, + 16, + ecmp_selection_table_size + ) ecmp_selector_sel; + + table nexthop_resolve { + key = { + ig_md.nhop_idx : exact; + ig_md.hash : selector; + } + actions = { + drop; ipv4_forward; ipv4_forward_direct_connect; + } + size = ecmp_table_size; + const default_action = drop; + implementation = ecmp_selector_sel; + } + + table populate_lkp { + key = { + hdr.ipv4.isValid() : exact; + hdr.udp.isValid() : exact; + hdr.tcp.isValid() : exact; + hdr.ndp_s_data.isValid() : ternary; + } + actions = { + set_lkp_tcp; set_lkp_udp; set_lkp_ip_unknown; drop; NoAction; + set_lkp_ndp; + } + } + + action invalidate_update_and_send(MulticastGroupId_t grp) { + set_high_priority(); + ig_intr_tm_md.bypass_egress = 0; + hdr.update_meta.setInvalid(); + ig_intr_tm_md.mcast_grp_a = grp; + ig_intr_tm_md.level1_mcast_hash = 1; + ig_intr_tm_md.level2_mcast_hash = 1; + hdr.trim_meta.setValid(); + hdr.trim_meta.pkt_type = PKT_TYPE_NOTIFY; + hdr.trim_meta.egress_port = ig_intr_tm_md.ucast_egress_port; + } + + table act_on_update { + key = { + ig_intr_tm_md.ucast_egress_port : exact; + } + actions = { + invalidate_update_and_send; drop; + } + // say 2 is our mcast clone session + default_action = invalidate_update_and_send(0x1000); + } + + apply { + configure_ndp.apply(); + // always deflect on drop by tm + ig_intr_tm_md.bypass_egress = 1; + + // for the moment only accept ipv4 packets + if (!hdr.ipv4.isValid()) { + drop(); + } else { + populate_lkp.apply(); + compute_ip_hash(ig_md.lkp, ig_md.hash); + // IP forwarding and friends + ipv4_lpm.apply(); + nexthop_resolve.apply(); + arp.apply(); + if (hdr.trim_meta.isValid() && hdr.trim_meta.pkt_type == PKT_TYPE_NOTIFY) { + nr_arm.count(hdr.trim_meta.egress_port); + // command pipe to become pessimistic starting now (transition e1) + arm0.execute(hdr.trim_meta.egress_port); + arm1.execute(hdr.trim_meta.egress_port); + drop(); + } else if (hdr.update_meta.isValid()) { + act_on_update.apply(); + return_chop.count(ig_intr_tm_md.ucast_egress_port); + } else { + // start ndp logic + bit<1> r1 = transition_0.execute(ig_intr_tm_md.ucast_egress_port); + bit<1> r2 = transition_1.execute(ig_intr_tm_md.ucast_egress_port); + pkt_color_t pesicolor = (bit<2>)meter_pessimistic.execute(ig_intr_tm_md.ucast_egress_port); + pkt_color_t halfcolor = (bit<2>)meter_halftimistic.execute(ig_intr_tm_md.ucast_egress_port); + pkt_color_t opticolor = (bit<2>)meter_optimistic.execute(ig_intr_tm_md.ucast_egress_port); + if (allow_pessimism == 1w0) { + nr_opti.count(ig_intr_tm_md.ucast_egress_port); + qos_md.color = opticolor; + } else { + if (r1 == 1w1) { + nr_pesi.count(ig_intr_tm_md.ucast_egress_port); + qos_md.color = pesicolor; + } else if (r2 == 1w1) { + nr_half.count(ig_intr_tm_md.ucast_egress_port); + qos_md.color = halfcolor; + } else { + nr_opti.count(ig_intr_tm_md.ucast_egress_port); + qos_md.color = opticolor; + } + } + + if (!ig_md.drop_ndp && hdr.udp.isValid() && + (hdr.ndp_s_ctrl.isValid() || hdr.ndp_s_data.isValid())) { + per_port_counter.count(ig_intr_tm_md.ucast_egress_port); + // NDP special case + // INVARIANT: IsTruncated(p) ==> IsNDPData(p) + if (hdr.ndp_s_ctrl.isValid()) { + // If this is a control packet (ACK/NACK), which is + // indicated by the ndp.flags field, then we want + // to directly send these packets to the high + // priority queue. + set_high_priority(); + } else if (!ig_md.always_truncate && qos_md.color == SWITCH_METER_COLOR_GREEN) { + // For non-control packets, check the meter. + // If its green, then go to the low-priority queue + // otherwise, mirror packet + set_low_priority(); + } else { + meter_chop.count(ig_intr_tm_md.ucast_egress_port); + truncate.apply(); + } + } else { + per_port_counter.count(ig_intr_tm_md.ucast_egress_port); + // any other kind of traffic gets mapped to QID == 2 + // in order not to interfere with ndp + // anything other than NDP simply passes through + ig_intr_tm_md.qid = 2; + } + } + } + } +} + +control SwitchEgress( + inout header_t hdr, + inout egress_metadata_t eg_md, + in egress_intrinsic_metadata_t eg_intr_md, + in egress_intrinsic_metadata_from_parser_t eg_intr_md_from_prsr, + inout egress_intrinsic_metadata_for_deparser_t eg_intr_dprs_md, + inout egress_intrinsic_metadata_for_output_port_t eg_intr_oport_md) { + Counter, PortId_t>(32w4096, CounterType_t.PACKETS) reach_egress; + Counter, PortId_t>(32w4096, CounterType_t.PACKETS) dod_chop; + Counter, PortId_t>(32w4096, CounterType_t.PACKETS) ig_chopped; + Counter, PortId_t>(32w4096, CounterType_t.PACKETS) trim_metas; + Counter, PortId_t>(32w4096, CounterType_t.PACKETS) mcast_received; + Counter, PortId_t>(32w4096, CounterType_t.PACKETS) mcast_received_actual; + action drop() { + eg_intr_dprs_md.drop_ctl = 1; + } + action nop() {} + action set_ndp_data_flags() { + hdr.ndp_s_data.flags = hdr.ndp_s_data.flags | (8w1 << 7); + // TODO: is it right? 46 == 64 - 14 (sizeof(ethernet)) - 4 (sizeof(ether_trailer)) + // TODO: please make it configurable + hdr.ipv4.total_len = 46; + hdr.udp.hdr_lenght = 26; + } + action do_invalidate_trim() { + hdr.trim_meta.setInvalid(); + } + action invalidate_trim() { + hdr.trim_meta.setInvalid(); + set_ndp_data_flags(); + } + action invalidate_trim_and_set_update(bit<8> also_add) { + hdr.trim_meta.setInvalid(); + hdr.ipv4.total_len = 46; + hdr.udp.hdr_lenght = 26; + hdr.ndp_s_data.flags = also_add; + hdr.update_meta.setValid(); + hdr.update_meta.pkt_type = PKT_TYPE_MIRROR_UPDATE; + } + table act_on_egress { + key = { + eg_intr_md.egress_port : exact; + } + actions = { + nop; drop; + invalidate_trim; invalidate_trim_and_set_update; + } + default_action = invalidate_trim(); + } + table act_on_notification { + key = { + eg_intr_md.egress_port : exact; + } + actions = { + nop; do_invalidate_trim; + } + const default_action = do_invalidate_trim(); + const entries = { + (0 << 7 | 68) : nop(); + (1 << 7 | 68) : nop(); + (2 << 7 | 68) : nop(); + (3 << 7 | 68) : nop(); + } + } + action set_eg_mirror(MirrorId_t session_id) { + eg_md.mirror_session_id = session_id; + } + + table eg_port2_egmirror { + key = { + hdr.normal_meta.egress_port : exact; + } + actions = { + drop; set_eg_mirror; + } + default_action = drop(); + } + apply { + reach_egress.count(eg_intr_md.egress_port); + // INVARIANT: hdr.ndp_s_data.isValid() == true + if (hdr.egmeta.isValid()) { + ig_chopped.count(eg_intr_md.egress_port); + // this is a chop ordered by ingress => rewrite and send out + hdr.ethernet.src_addr = hdr.egmeta.srcAddr; + hdr.ethernet.dst_addr = hdr.egmeta.dstAddr; + hdr.egmeta.setInvalid(); + set_ndp_data_flags(); + } else { + if (hdr.trim_meta.isValid()) { + if (hdr.trim_meta.pkt_type == PKT_TYPE_TRIM) { + // step 6.X where X is the current pipe_id => + // we are in DoDx port => send it out as it will + // be forwarded back to ingress (step 7.X) + trim_metas.count(hdr.trim_meta.egress_port); + act_on_egress.apply(); + } else { + mcast_received.count(eg_intr_md.egress_port); + act_on_notification.apply(); + } + // else if pkt_type == PKT_TYPE_NOTIFY => do nothing and let packet back into ingress + } else { + // normal ndp_data packet from ingress + if (eg_intr_md.deflection_flag == 1w1) { + // step 1: got DoD + dod_chop.count(hdr.normal_meta.egress_port); + eg_port2_egmirror.apply(); + hdr.trim_meta.setValid(); + hdr.trim_meta.pkt_type = PKT_TYPE_TRIM; + hdr.trim_meta.egress_port = hdr.normal_meta.egress_port; + eg_intr_dprs_md.mirror_type = 3w1; + hdr.normal_meta.setInvalid(); + drop(); + // step 2: forward packet with trim_meta which will arrive back in ingress (step 3) + // step 4: clone packet. It will get replicated to all DoD ports in all pipelines (step 5.*) + } else { + hdr.normal_meta.setInvalid(); + } + } + } + } +} +Pipeline(SwitchIngressParser(), + SwitchIngress(), + SwitchIngressDeparser(), + SwitchEgressParser(), + SwitchEgress(), + SwitchEgressDeparser()) pipe; + +Switch(pipe) main; diff --git a/research_projects/NDP/dev_root/samples/multi_pipe/r1.json b/research_projects/NDP/dev_root/samples/multi_pipe/r1.json new file mode 100644 index 0000000..de27866 --- /dev/null +++ b/research_projects/NDP/dev_root/samples/multi_pipe/r1.json @@ -0,0 +1,7 @@ +{ + "0" : { + "file" : "r10_config.json" + }, "1" : { + "file" : "r11_config.json" + } +} \ No newline at end of file diff --git a/research_projects/NDP/dev_root/samples/multi_pipe/r10_config.json b/research_projects/NDP/dev_root/samples/multi_pipe/r10_config.json new file mode 100644 index 0000000..a3b7e42 --- /dev/null +++ b/research_projects/NDP/dev_root/samples/multi_pipe/r10_config.json @@ -0,0 +1,155 @@ +{ + "arp": { + "192.168.1.12": "00:90:fb:65:d6:6b", + "192.168.1.4": "00:90:fb:65:d6:6b", + "192.168.1.8": "00:90:fb:65:d6:6b", + "192.168.1.0": "00:90:fb:65:d6:6b" + }, + "allow_pessimism": 0, + "rates": [ + { + "shaper_burst_kbits": "15000", + "eg_port": 0, + "shaper_rate_kbps": "27000000", + "rate_kbps": "25000000", + "burst_kbits": "1000", + "port_speed": "25G", + "port_bufsize": "4500000", + "fec": "NONE" + }, + { + "shaper_burst_kbits": "15000", + "eg_port": 4, + "shaper_rate_kbps": "27000000", + "rate_kbps": "25000000", + "burst_kbits": "1000", + "port_speed": "25G", + "port_bufsize": "4500000", + "fec": "NONE" + }, + { + "shaper_burst_kbits": "15000", + "eg_port": 8, + "shaper_rate_kbps": "27000000", + "rate_kbps": "25000000", + "burst_kbits": "1000", + "port_speed": "25G", + "port_bufsize": "4500000", + "fec": "NONE" + }, + { + "shaper_burst_kbits": "15000", + "eg_port": 12, + "shaper_rate_kbps": "27000000", + "rate_kbps": "25000000", + "burst_kbits": "1000", + "port_speed": "25G", + "port_bufsize": "4500000", + "fec": "NONE" + } + ], + "entries": [ + { + "smac": "0a:10:ad:be:ef:01", + "dip": "10.0.8.0/24", + "eg_port": 4, + "nhop": "192.168.1.4" + }, + { + "smac": "0a:10:ad:be:ef:01", + "dip": "10.0.19.0/24", + "eg_port": 4, + "nhop": "192.168.1.4" + }, + { + "smac": "0a:10:ad:be:ef:00", + "dip": "10.0.16.0/24", + "eg_port": 0, + "nhop": "192.168.1.0" + }, + { + "smac": "0a:10:ad:be:ef:00", + "dip": "10.0.12.0/24", + "eg_port": 0, + "nhop": "192.168.1.0" + }, + { + "smac": "0a:10:ad:be:ef:00", + "dip": "10.0.13.0/24", + "eg_port": 0, + "nhop": "192.168.1.0" + }, + { + "smac": "0a:10:ad:be:ef:01", + "dip": "10.0.10.0/24", + "eg_port": 4, + "nhop": "192.168.1.4" + }, + { + "smac": "0a:10:ad:be:ef:03", + "dip": "10.0.23.0/24", + "eg_port": 12, + "nhop": "192.168.1.12" + }, + { + "smac": "0a:10:ad:be:ef:00", + "dip": "10.0.17.0/24", + "eg_port": 0, + "nhop": "192.168.1.0" + }, + { + "smac": "0a:10:ad:be:ef:00", + "dip": "10.0.15.0/24", + "eg_port": 0, + "nhop": "192.168.1.0" + }, + { + "smac": "0a:10:ad:be:ef:00", + "dip": "10.0.14.0/24", + "eg_port": 0, + "nhop": "192.168.1.0" + }, + { + "smac": "0a:10:ad:be:ef:02", + "dip": "10.0.7.0/24", + "eg_port": 8, + "nhop": "192.168.1.8" + }, + { + "smac": "0a:10:ad:be:ef:01", + "dip": "10.0.18.0/24", + "eg_port": 4, + "nhop": "192.168.1.4" + }, + { + "smac": "0a:10:ad:be:ef:01", + "dip": "10.0.9.0/24", + "eg_port": 4, + "nhop": "192.168.1.4" + }, + { + "smac": "0a:10:ad:be:ef:02", + "dip": "10.0.20.0/24", + "eg_port": 8, + "nhop": "192.168.1.8" + }, + { + "smac": "0a:10:ad:be:ef:01", + "dip": "10.0.11.0/24", + "eg_port": 4, + "nhop": "192.168.1.4" + }, + { + "smac": "0a:10:ad:be:ef:03", + "dip": "10.0.22.0/24", + "eg_port": 12, + "nhop": "192.168.1.12" + }, + { + "smac": "0a:10:ad:be:ef:02", + "dip": "10.0.21.0/24", + "eg_port": 8, + "nhop": "192.168.1.8" + } + ] +} \ No newline at end of file diff --git a/research_projects/NDP/dev_root/samples/multi_pipe/r11_config.json b/research_projects/NDP/dev_root/samples/multi_pipe/r11_config.json new file mode 100644 index 0000000..393114b --- /dev/null +++ b/research_projects/NDP/dev_root/samples/multi_pipe/r11_config.json @@ -0,0 +1 @@ +{"arp": {"192.168.1.6": "00:90:fb:65:d6:6b", "192.168.1.10": "00:90:fb:65:d6:6b", "192.168.1.2": "00:90:fb:65:d6:6b", "192.168.1.14": "00:90:fb:65:d6:6b"}, "allow_pessimism": 0, "default_rate": {"burst_kbits": 6000, "rate_kbps": 25000000}, "rates": [{"shaper_burst_kbits": "15000", "eg_port": 128, "shaper_rate_kbps": "27000000", "rate_kbps": "25000000", "burst_kbits": "1000", "port_speed": "25G", "port_bufsize": "4500000", "fec": "NONE"}, {"shaper_burst_kbits": "15000", "eg_port": 132, "shaper_rate_kbps": "27000000", "rate_kbps": "25000000", "burst_kbits": "1000", "port_speed": "25G", "port_bufsize": "4500000", "fec": "NONE"}, {"shaper_burst_kbits": "15000", "eg_port": 136, "shaper_rate_kbps": "27000000", "rate_kbps": "25000000", "burst_kbits": "1000", "port_speed": "25G", "port_bufsize": "4500000", "fec": "NONE"}, {"shaper_burst_kbits": "15000", "eg_port": 140, "shaper_rate_kbps": "27000000", "rate_kbps": "25000000", "burst_kbits": "1000", "port_speed": "25G", "port_bufsize": "4500000", "fec": "NONE"}], "entries": [{"smac": "0a:11:ad:be:ef:01", "dip": "10.0.8.0/24", "eg_port": 132, "nhop": "192.168.1.6"}, {"smac": "0a:11:ad:be:ef:01", "dip": "10.0.19.0/24", "eg_port": 132, "nhop": "192.168.1.6"}, {"smac": "0a:11:ad:be:ef:00", "dip": "10.0.16.0/24", "eg_port": 128, "nhop": "192.168.1.2"}, {"smac": "0a:11:ad:be:ef:00", "dip": "10.0.12.0/24", "eg_port": 128, "nhop": "192.168.1.2"}, {"smac": "0a:11:ad:be:ef:00", "dip": "10.0.13.0/24", "eg_port": 128, "nhop": "192.168.1.2"}, {"smac": "0a:11:ad:be:ef:01", "dip": "10.0.10.0/24", "eg_port": 132, "nhop": "192.168.1.6"}, {"smac": "0a:11:ad:be:ef:03", "dip": "10.0.23.0/24", "eg_port": 140, "nhop": "192.168.1.14"}, {"smac": "0a:11:ad:be:ef:00", "dip": "10.0.17.0/24", "eg_port": 128, "nhop": "192.168.1.2"}, {"smac": "0a:11:ad:be:ef:00", "dip": "10.0.15.0/24", "eg_port": 128, "nhop": "192.168.1.2"}, {"smac": "0a:11:ad:be:ef:00", "dip": "10.0.14.0/24", "eg_port": 128, "nhop": "192.168.1.2"}, {"smac": "0a:11:ad:be:ef:02", "dip": "10.0.7.0/24", "eg_port": 136, "nhop": "192.168.1.10"}, {"smac": "0a:11:ad:be:ef:01", "dip": "10.0.18.0/24", "eg_port": 132, "nhop": "192.168.1.6"}, {"smac": "0a:11:ad:be:ef:01", "dip": "10.0.9.0/24", "eg_port": 132, "nhop": "192.168.1.6"}, {"smac": "0a:11:ad:be:ef:02", "dip": "10.0.20.0/24", "eg_port": 136, "nhop": "192.168.1.10"}, {"smac": "0a:11:ad:be:ef:01", "dip": "10.0.11.0/24", "eg_port": 132, "nhop": "192.168.1.6"}, {"smac": "0a:11:ad:be:ef:03", "dip": "10.0.22.0/24", "eg_port": 140, "nhop": "192.168.1.14"}, {"smac": "0a:11:ad:be:ef:02", "dip": "10.0.21.0/24", "eg_port": 136, "nhop": "192.168.1.10"}], "ports": []} \ No newline at end of file diff --git a/research_projects/NDP/dev_root/samples/single_pipe/r0_config.json b/research_projects/NDP/dev_root/samples/single_pipe/r0_config.json new file mode 100644 index 0000000..24515c7 --- /dev/null +++ b/research_projects/NDP/dev_root/samples/single_pipe/r0_config.json @@ -0,0 +1,439 @@ +{ + "arp": { + "10.0.14.13": "b0:26:28:82:ae:fa", + "10.0.10.13": "a0:36:9f:5f:dc:cc", + "10.0.20.13": "40:a6:b7:0a:7f:08", + "10.0.11.13": "a0:36:9f:5c:52:f1", + "10.0.16.13": "40:a6:b7:0a:7c:c0", + "10.0.9.13": "a0:36:9f:5c:52:f0", + "10.0.18.13": "40:a6:b7:0a:7f:f8", + "10.0.13.13": "b0:26:28:82:af:c3", + "10.0.22.13": "40:a6:b7:0a:7d:e8", + "10.0.8.13": "a0:36:9f:5c:5a:ac", + "10.0.21.13": "40:a6:b7:0a:7f:09", + "10.0.7.13": "a0:36:9f:5c:58:84", + "192.168.1.7": "0b:00:ad:be:ef:13", + "192.168.1.5": "0b:00:ad:be:ef:12", + "192.168.1.3": "0b:00:ad:be:ef:11", + "10.0.15.13": "b0:26:28:82:ae:fb", + "192.168.1.1": "0b:00:ad:be:ef:10", + "10.0.12.13": "b0:26:28:82:af:c2", + "192.168.1.9": "0b:00:ad:be:ef:14", + "10.0.23.13": "40:a6:b7:0a:7d:e9", + "192.168.1.11": "0b:00:ad:be:ef:15", + "192.168.1.13": "0b:00:ad:be:ef:16", + "192.168.1.15": "0b:00:ad:be:ef:17", + "10.0.19.13": "40:a6:b7:0a:7f:f9", + "10.0.17.13": "40:a6:b7:0a:7c:c1" + }, + "allow_pessimism": 1, + "default_rate": { + "burst_kbits": 6000, + "rate_kbps": 25000000 + }, + "rates": [ + { + "shaper_burst_kbits": "960", + "eg_port": 128, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 0, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "6000", + "eg_port": 16, + "shaper_rate_kbps": "25000000", + "rate_kbps": "25000000", + "burst_kbits": "6000", + "port_speed": "25G", + "port_bufsize": "120000", + "fec": "NONE" + }, + { + "shaper_burst_kbits": "6000", + "eg_port": 19, + "shaper_rate_kbps": "25000000", + "rate_kbps": "25000000", + "burst_kbits": "6000", + "port_speed": "25G", + "port_bufsize": "120000", + "fec": "NONE" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 8, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 12, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "6000", + "eg_port": 18, + "shaper_rate_kbps": "25000000", + "rate_kbps": "25000000", + "burst_kbits": "6000", + "port_speed": "25G", + "port_bufsize": "120000", + "fec": "NONE" + }, + { + "shaper_burst_kbits": "6000", + "eg_port": 17, + "shaper_rate_kbps": "25000000", + "rate_kbps": "25000000", + "burst_kbits": "6000", + "port_speed": "25G", + "port_bufsize": "120000", + "fec": "NONE" + }, + { + "shaper_burst_kbits": "6000", + "eg_port": 272, + "shaper_rate_kbps": "10000000", + "rate_kbps": "10000000", + "burst_kbits": "6000", + "port_speed": "10G", + "port_bufsize": "120000", + "fec": "NONE" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 264, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 268, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 132, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 4, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 256, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 384, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 260, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 388, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "6000", + "eg_port": 144, + "shaper_rate_kbps": "10000000", + "rate_kbps": "10000000", + "burst_kbits": "6000", + "port_speed": "10G", + "port_bufsize": "120000", + "fec": "NONE" + }, + { + "shaper_burst_kbits": "6000", + "eg_port": 145, + "shaper_rate_kbps": "10000000", + "rate_kbps": "10000000", + "burst_kbits": "6000", + "port_speed": "10G", + "port_bufsize": "120000", + "fec": "NONE" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 136, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 140, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 392, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "960", + "eg_port": 396, + "shaper_rate_kbps": "99000000", + "rate_kbps": "99000000", + "burst_kbits": "960", + "port_speed": "100G", + "port_bufsize": "120000", + "fec": "RS" + }, + { + "shaper_burst_kbits": "6000", + "eg_port": 147, + "shaper_rate_kbps": "10000000", + "rate_kbps": "10000000", + "burst_kbits": "6000", + "port_speed": "10G", + "port_bufsize": "120000", + "fec": "NONE" + }, + { + "shaper_burst_kbits": "6000", + "eg_port": 146, + "shaper_rate_kbps": "10000000", + "rate_kbps": "10000000", + "burst_kbits": "6000", + "port_speed": "10G", + "port_bufsize": "120000", + "fec": "NONE" + } + ], + "entries": [ + { + "smac": "0a:00:ad:be:ef:00", + "dip": "10.0.16.1/24", + "eg_port": 8, + "nhop": 0 + }, + { + "smac": "0a:00:ad:be:ef:01", + "dip": "10.0.17.1/24", + "eg_port": 12, + "nhop": 0 + }, + { + "smac": "0a:00:ad:be:ef:02", + "dip": "10.0.12.1/24", + "eg_port": 18, + "nhop": 0 + }, + { + "smac": "0a:00:ad:be:ef:03", + "dip": "10.0.13.1/24", + "eg_port": 17, + "nhop": 0 + }, + { + "smac": "0a:00:ad:be:ef:04", + "dip": "10.0.14.1/24", + "eg_port": 16, + "nhop": 0 + }, + { + "smac": "0a:00:ad:be:ef:05", + "dip": "10.0.15.1/24", + "eg_port": 19, + "nhop": 0 + }, + { + "smac": "0a:01:ad:be:ef:00", + "dip": "10.0.18.1/24", + "eg_port": 140, + "nhop": 0 + }, + { + "smac": "0a:01:ad:be:ef:01", + "dip": "10.0.19.1/24", + "eg_port": 136, + "nhop": 0 + }, + { + "smac": "0a:01:ad:be:ef:02", + "dip": "10.0.8.1/24", + "eg_port": 145, + "nhop": 0 + }, + { + "smac": "0a:01:ad:be:ef:03", + "dip": "10.0.9.1/24", + "eg_port": 144, + "nhop": 0 + }, + { + "smac": "0a:01:ad:be:ef:04", + "dip": "10.0.10.1/24", + "eg_port": 146, + "nhop": 0 + }, + { + "smac": "0a:01:ad:be:ef:05", + "dip": "10.0.11.1/24", + "eg_port": 147, + "nhop": 0 + }, + { + "smac": "0a:02:ad:be:ef:00", + "dip": "10.0.20.1/24", + "eg_port": 264, + "nhop": 0 + }, + { + "smac": "0a:02:ad:be:ef:01", + "dip": "10.0.21.1/24", + "eg_port": 268, + "nhop": 0 + }, + { + "smac": "0a:02:ad:be:ef:02", + "dip": "10.0.7.1/24", + "eg_port": 272, + "nhop": 0 + }, + { + "smac": "0a:03:ad:be:ef:00", + "dip": "10.0.22.1/24", + "eg_port": 396, + "nhop": 0 + }, + { + "smac": "0a:03:ad:be:ef:01", + "dip": "10.0.23.1/24", + "eg_port": 392, + "nhop": 0 + }, + { + "smac": "0a:00:ad:be:ef:06", + "dip": "192.168.1.0/31", + "eg_port": 0, + "nhop": 0 + }, + { + "smac": "0a:00:ad:be:ef:07", + "dip": "192.168.1.2/31", + "eg_port": 128, + "nhop": 0 + }, + { + "smac": "0a:01:ad:be:ef:06", + "dip": "192.168.1.4/31", + "eg_port": 256, + "nhop": 0 + }, + { + "smac": "0a:01:ad:be:ef:07", + "dip": "192.168.1.6/31", + "eg_port": 384, + "nhop": 0 + }, + { + "smac": "0a:02:ad:be:ef:03", + "dip": "192.168.1.8/31", + "eg_port": 4, + "nhop": 0 + }, + { + "smac": "0a:02:ad:be:ef:04", + "dip": "192.168.1.10/31", + "eg_port": 132, + "nhop": 0 + }, + { + "smac": "0a:03:ad:be:ef:02", + "dip": "192.168.1.12/31", + "eg_port": 260, + "nhop": 0 + }, + { + "smac": "0a:03:ad:be:ef:03", + "dip": "192.168.1.14/31", + "eg_port": 388, + "nhop": 0 + } + ], + "ports": [] +} \ No newline at end of file diff --git a/research_projects/NDP/dev_root/setup_ndp.py b/research_projects/NDP/dev_root/setup_ndp.py new file mode 100755 index 0000000..614407c --- /dev/null +++ b/research_projects/NDP/dev_root/setup_ndp.py @@ -0,0 +1,978 @@ +################################################################################ +# +# Copyright (c) 2020-2021 Correct Networks, Intel Corporation +# All Rights Reserved. +# Authors: +# Dragos Dumitrescu (dragos@correctnetworks.io) +# Adrian Popa (adrian.popa@correctnetworks.io) +# +# NOTICE: TBD +# +############################################################################### + +import sys + +from collections import namedtuple, OrderedDict +import json +import logging +import time +import unittest +import struct +import math + +import argparse + +import os + +from csv import reader, writer + +import grpc +import bfrt_grpc.bfruntime_pb2 as bfruntime_pb2 +import bfrt_grpc.client as gc + +from thriftutils import * +from res_pd_rpc.ttypes import * +from devport_mgr_pd_rpc import devport_mgr +from devport_mgr_pd_rpc.ttypes import * +from pal_rpc import pal +from pal_rpc.ttypes import * +from tm_api_rpc import tm +from tm_api_rpc.ttypes import * +from mc_pd_rpc import mc +from mc_pd_rpc.ttypes import * +from mirror_pd_rpc import mirror +from mirror_pd_rpc.ttypes import * +from conn_mgr_pd_rpc import conn_mgr +from conn_mgr_pd_rpc.ttypes import * + +from thrift.transport import TTransport +from thrift.transport import TSocket +from thrift.protocol import TBinaryProtocol, TMultiplexedProtocol + +SWITCH_METER_COLOR_GREEN = 0 +SWITCH_METER_COLOR_YELLOW = 1 +SWITCH_METER_COLOR_RED = 2 + +NDP_MCAST_GRP = 0x1000 + +speed = pal_port_speed_t.BF_SPEED_10G +fec = pal_fec_type_t.BF_FEC_TYP_NONE +speed_10g = 2 +speed_25g = 4 +speed_40g = 8 +speed_40g_nb = 16 +speed_50g = 32 +speed_100g = 64 + +SPEEDSTR2NR = { + '10g' : pal_port_speed_t.BF_SPEED_10G, + '25g' : pal_port_speed_t.BF_SPEED_25G, + '40g' : pal_port_speed_t.BF_SPEED_40G, + '40g_nb' : pal_port_speed_t.BF_SPEED_40G_NB, + '50g' : pal_port_speed_t.BF_SPEED_50G, + '100g' : pal_port_speed_t.BF_SPEED_100G +} + +def speed_string_to_number(port_speed): + global SPEEDSTR2NR + return SPEEDSTR2NR[port_speed.lower()] + + +FEC2NR = {'NONE': pal_fec_type_t.BF_FEC_TYP_NONE, 'RS': pal_fec_type_t.BF_FEC_TYP_REED_SOLOMON} +def fec_string_to_number(fec): + global FEC2NR + return FEC2NR[fec.upper()] + + +logger = logging.getLogger('Test') +logger.addHandler(logging.StreamHandler()) +logger.setLevel(logging.DEBUG) + +def mirror_session(mir_type, mir_dir, sid, egr_port=0, egr_port_v=False, + egr_port_queue=0, packet_color=0, mcast_grp_a=0, + mcast_grp_a_v=False, mcast_grp_b=0, mcast_grp_b_v=False, + max_pkt_len=0, level1_mcast_hash=0, level2_mcast_hash=0, + cos=0, c2c=0, extract_len=0, timeout=0, int_hdr=[]): + return MirrorSessionInfo_t(mir_type, + mir_dir, + sid, + egr_port, + egr_port_v, + egr_port_queue, + packet_color, + mcast_grp_a, + mcast_grp_a_v, + mcast_grp_b, + mcast_grp_b_v, + max_pkt_len, + level1_mcast_hash, + level2_mcast_hash, + cos, + c2c, + extract_len, + timeout, + int_hdr, + len(int_hdr)) + + +PRIORITY_LOW = 0 +PRIORITY_LOW_QID = 0 +PRIORITY_HIGH = 7 +PRIORITY_HIGH_QID = 1 + +OTHERS_QID = 2 + +TRUNCATION_LENGTH=64 + +Entry = namedtuple('Entry', ['dip', 'pLen', 'smac', 'eg_port', 'nhop']) +Rate = namedtuple('Rate', ['rate_kbps', 'burst_kbits']) + +def get_bufsize(rate): + return rate.burst_kbits * 125 + +class PortProperties(object): + # meter_rate is of type rate and refers to ingress meter + # shaper_rate is of type rate and refers to q shaper + # bufsize is an int (in bytes) and refers to size of buffer + def __init__(self, meter_rate, shaper_rate = None, bufsize = None): + self.meter_rate = meter_rate + self.shaper_rate = shaper_rate if shaper_rate is not None else meter_rate + if bufsize is not None: + self.bufsize = bufsize + else: + self.bufsize = get_bufsize(self.shaper_rate) + self.other_properties = {} + +class LooksLikeFixedObjectTrait: + def __init__(self, fixedObject): + self.pal = fixedObject.pal + self.tm = fixedObject.tm + self.mirror = fixedObject.mirror + self.mc = fixedObject.mc + self.devport_mgr = fixedObject.devport_mgr + self.conn_mgr = fixedObject.conn_mgr + +class QueryGRPCTrait: + def __init__(self, interface, p4name, bfrt_info = None): + self.interface = interface + self.p4name = p4name + if bfrt_info is not None: + self.bfrt_info = bfrt_info + else: + self.bfrt_info = self.interface.bfrt_info_get(self.p4name) + def insert_table_entry(self, target, table, keys, action, data): + table_obj = self.bfrt_info.table_get(table) + key_list = table_obj.make_key(keys) + if action is None: + data_list = table_obj.make_data(data) + else: + data_list = [table_obj.make_data(data, action)] + table_obj.entry_add(target, [key_list], [data_list]) + def delete_table_entry(self, target, table, keys): + table_obj = self.bfrt_info.table_get(table) + key_list = table_obj.make_key(keys) + table_obj.entry_del(target, [key_list]) + def insert_or_update_entry(self, target, table, keys, action, data): + table_obj = self.bfrt_info.table_get(table) + key_list = table_obj.make_key(keys) + try: + table_obj.entry_del(target, [key_list]) + except: + pass + if data is not None: + if action is None: + data_list=[table_obj.make_data(data)] + else: + data_list = [table_obj.make_data(data, action)] + else: + data_list = None + table_obj.entry_add(target, [key_list], data_list) + def get_table_entry(self, target, table, keys, props): + table_obj = self.bfrt_info.table_get(table) + if keys is not None: + key_list = [table_obj.make_key(keys)] + else: + key_list = None + return table_obj.entry_get(target, key_list, props) + def modify_table_default_entry(self, target, table, action, data): + table_obj = self.bfrt_info.table_get(table) + data_list = table_obj.make_data(data, action) + table_obj.default_entry_set(target, data_list) + +# for each pipe have this state object. +# it can also setup and teardown itself +class PerPipeConfig(QueryGRPCTrait, LooksLikeFixedObjectTrait): + def __init__(self, parent, pipe_id, config_json): + QueryGRPCTrait.__init__(self, parent.interface, parent.p4_name, bfrt_info = parent.bfrt_info) + LooksLikeFixedObjectTrait.__init__(self, parent) + self.parent = parent + self.pipe = pipe_id + self.shdl = self.parent.shdl + self.nr_pipes = self.parent.nr_pipes + + self.device_id = self.parent.device_id + self.config_json = config_json + self.target = gc.Target(device_id=self.device_id, pipe_id=self.pipe) + self.all_pipes_target = gc.Target(device_id=self.device_id, pipe_id=0xffff) + self.dev_target = DevTarget_t(self.device_id, hex_to_i16(self.pipe)) + self.entries = [] + self.ports = set() + self.rates = {} + self.port_properties = {} + self.arp = {} + self.importConfig(self.config_json) + + def is_my_port(self, port): + pipe = portToPipe(port) + if pipe >= self.nr_pipes: + return False + if self.pipe == 0xffff: + return True + return pipe == self.pipe + + def is_my_pipe(self, pipe): + if self.pipe == 0xffff: + return True + return self.pipe == pipe + + def cleanup_ecmp(self): + # step 1: get stuff to be erased from the table + nh_entries = self.get_table_entry( + self.target, 'SwitchIngress.nexthop_resolve', None, {"from_hw": False}) + for (data, keys) in nh_entries: + key_dict = keys.to_dict() + val = key_dict['ig_md.nhop_idx']['value'] + self.delete_table_entry(self.target, 'SwitchIngress.nexthop_resolve', + [gc.KeyTuple('ig_md.nhop_idx', gc.to_bytes(val, 4))]) + sel_entries = self.get_table_entry( + self.target, 'SwitchIngress.ecmp_selector_sel', None, {'from_hw': True}) + i = 0 + for (data, key) in sel_entries: + key_dict = key.to_dict() + gid = key_dict['$SELECTOR_GROUP_ID']['value'] + if self.pipe == 0xffff or (gid & 7 == self.pipe): + try: + self.delete_table_entry(self.target, 'SwitchIngress.ecmp_selector_sel', + [gc.KeyTuple('$SELECTOR_GROUP_ID', gc.to_bytes(gid, 4))]) + except: + logger.warning('failed to delete group id {}'.format(gid)) + pass + ap_entries = self.get_table_entry( + self.target, 'SwitchIngress.ecmp_selector', None, {"from_hw": False}) + i = 0 + for (data, key) in ap_entries: + key_dict = key.to_dict() + mid=key_dict['$ACTION_MEMBER_ID']['value'] + if self.pipe == 0xffff or (mid & 7 == self.pipe): + try: + self.delete_table_entry(self.target, 'SwitchIngress.ecmp_selector', + [gc.KeyTuple('$ACTION_MEMBER_ID', gc.to_bytes(mid, 4))]) + except Exception as ex: + logger.warning('failed to delete mid {} because {}'.format(mid, ex)) + pass + + def importConfig(self, config_json): + shaper_rates = {} + port_speeds = {} + port_fecs = {} + bufsize = {} + for e in config_json["entries"]: + port=e["eg_port"] + if not self.is_my_port(port): + continue + prefix, length = e["dip"].split('/') + length = int(length) + self.entries.append(Entry(prefix, length, e["smac"], port, e['nhop'])) + for r in config_json["rates"]: + port = r["eg_port"] + if not self.is_my_port(port): + continue + self.ports.add(port) + if 'rate_kbps' in r and 'burst_kbits' in r: + self.rates[port] = Rate(int(r["rate_kbps"]), int(r["burst_kbits"])) + else: + logger.warning('WARNING! Port {} has no rate_kbps or burst_kbits attribute'.format(port)) + continue + if 'shaper_rate_kbps' in r and 'shaper_burst_kbits' in r: + shaper_rates[port] = Rate(int(r['shaper_rate_kbps']), int(r['shaper_burst_kbits'])) + if 'port_bufsize' in r: + bufsize[port] = int(r['port_bufsize']) + if 'port_speed' in r: + try: + port_speeds[port] = speed_string_to_number(r['port_speed']) + except: + logger.warning('invalid speed string {} for port {}'.format(r['port_speed'], port)) + if 'fec' in r: + try: + port_fecs[port] = fec_string_to_number(r['fec']) + except: + logger.warning('invalid fec {} for port {}'.format(r['fec'], port)) + if 'arp' in config_json: + for ip in config_json['arp']: + mac=config_json['arp'][ip] + self.arp[ip]=mac + if 'force_trim' in config_json: + self.force_trim = bool(config_json['force_trim']) + else: + self.force_trim = False + if 'allow_pessimism' in config_json: + self.allow_pessimism = bool(config_json['allow_pessimism']) + else: + self.allow_pessimism = True + for port in self.ports: + if self.is_my_port(port): + self.port_properties[port] = PortProperties(self.rates[port], + shaper_rates.get( + port, None), + bufsize.get(port, None)) + if port in port_speeds: + self.port_properties[port].other_properties['port_speed'] = port_speeds[port] + else: + self.port_properties[port].other_properties['port_speed'] = pal_port_speed_t.BF_SPEED_100G + if port in port_fecs: + self.port_properties[port].other_properties['fec'] = port_fecs[port] + else: + self.port_properties[port].other_properties['fec'] = pal_fec_type_t.BF_FEC_TYP_NONE + + def global_id_from_local_id(self, local_id): + if self.pipe == 0xffff: + nr = 4 + else: + nr = self.pipe + return local_id << 3 | nr + + def handle_entries(self): + lst = [] + revindex = {} + self.ip2acts = {} + for e in self.entries: + tu = (e.smac, e.eg_port, e.nhop) + if tu not in revindex: + lst.append(tu) + revindex[tu] = len(revindex) + for e in self.entries: + tu = (e.smac, e.eg_port, e.nhop) + dst = (e.dip, e.pLen) + if dst not in self.ip2acts: + self.ip2acts[dst] = [revindex[tu]] + else: + self.ip2acts[dst].append(revindex[tu]) + #TODO: CLEANUP: for the current pipeline, find all entries in table + # nexthop_resolve + # for each e: + # if e is Member => mark e.action_member_id for deletion + # if e is Group => mark e.group_id for deletion + # for each group_id marked for deletion: + # for each action_member_id in members: => mark action_member_id for deletion + # for each next hop - i.e. unique combination of fwd/nhop/srcmac + # create an ACTION_MEMBER - index + 1 (because we map drop to 0) + self.insert_or_update_entry(self.target, + 'SwitchIngress.ecmp_selector', + [gc.KeyTuple('$ACTION_MEMBER_ID', + gc.to_bytes(self.global_id_from_local_id(0), 4))], + 'SwitchIngress.drop', []) + groups = self.get_table_entry( + self.target, 'SwitchIngress.ecmp_selector_sel', None, {'from_hw': True}) + groups = list([(x[0].to_dict(), x[1].to_dict()) for x in groups]) + for idx, e in enumerate(lst): + action = 'SwitchIngress.ipv4_forward' + aparms = [] + if isinstance(e[2], int): + action = 'SwitchIngress.ipv4_forward_direct_connect' + aparms = [gc.DataTuple('srcAddr', bytearray(gc.mac_to_bytes(e[0]))), + gc.DataTuple('port', gc.to_bytes(e[1], 2))] + else: + aparms = [gc.DataTuple('srcAddr', bytearray(gc.mac_to_bytes(e[0]))), + gc.DataTuple('nhop', bytearray(gc.ipv4_to_bytes(e[2]))), + gc.DataTuple('port', gc.to_bytes(e[1], 2))] + global_index = self.global_id_from_local_id(idx+1) + matching = [g for g in groups if global_index in g[0]['$ACTION_MEMBER_ID']] + if len(matching) > 0: + selgrid = matching[0][1]['$SELECTOR_GROUP_ID']['value'] + self.delete_table_entry(self.target, 'SwitchIngress.ecmp_selector_sel', [gc.KeyTuple('$SELECTOR_GROUP_ID', + gc.to_bytes(selgrid, 4))]) + self.insert_or_update_entry( + self.target, + 'SwitchIngress.ecmp_selector', + [gc.KeyTuple('$ACTION_MEMBER_ID', + gc.to_bytes(global_index, 4))], + action, aparms) + groupindex = [0] + r_groupindex = {0: 0} + + nhidx = 1 + for ip in self.ip2acts: + (dip, plen) = ip + gnhidx = self.global_id_from_local_id(nhidx) + self.insert_or_update_entry( + self.target, + 'SwitchIngress.ipv4_lpm', + [gc.KeyTuple('hdr.ipv4.dst_addr', bytearray(gc.ipv4_to_bytes(dip)), prefix_len=plen)], + 'SwitchIngress.set_nhop_idx', + [gc.DataTuple('nhop_idx', gc.to_bytes(gnhidx, 4))]) + nhidx=nhidx+1 + nhidx = 1 + for ip, actions in self.ip2acts.items(): + gnhidx = self.global_id_from_local_id(nhidx) + (dip, plen) = ip + datafield = None + if len(actions) > 1: + groupmembers = tuple(actions) + if groupmembers not in r_groupindex: + gid = len(r_groupindex) + r_groupindex[groupmembers] = gid + groupindex.append(groupmembers) + # create group made up of all actions which correspond + # to this ip - this is the first time we perceive this + # group + self.insert_or_update_entry( + self.target, + 'SwitchIngress.ecmp_selector_sel', + [gc.KeyTuple('$SELECTOR_GROUP_ID', gc.to_bytes(self.global_id_from_local_id(gid), 4))], + None, + [gc.DataTuple('$MAX_GROUP_SIZE', gc.to_bytes(8, 4)), + gc.DataTuple('$ACTION_MEMBER_STATUS', + bool_arr_val=[True]*len(groupmembers)), + gc.DataTuple('$ACTION_MEMBER_ID', + int_arr_val=list([self.global_id_from_local_id(x+1) for x in groupmembers]))]) + else: + gid=r_groupindex[groupmembers] + datafield=[gc.DataTuple('$SELECTOR_GROUP_ID', + gc.to_bytes(self.global_id_from_local_id(gid), 4))] + else: + datafield=[gc.DataTuple('$ACTION_MEMBER_ID', + gc.to_bytes(self.global_id_from_local_id(actions[0]+1), 4))] + self.insert_or_update_entry( + self.target, + 'SwitchIngress.nexthop_resolve', + [gc.KeyTuple('ig_md.nhop_idx', gc.to_bytes(gnhidx, 4))], + None, + datafield) + nhidx=nhidx+1 + self.insert_or_update_entry( + self.target, + 'SwitchIngress.nexthop_resolve', + [gc.KeyTuple('ig_md.nhop_idx', gc.to_bytes(0, 4))], + None, + [gc.DataTuple('$ACTION_MEMBER_ID', + gc.to_bytes(self.global_id_from_local_id(0), 4))]) + + def set_meter_rate(self, port, rate, meter='SwitchIngress.meter'): + self.insert_table_entry( + self.all_pipes_target, + meter, + [gc.KeyTuple('$METER_INDEX', gc.to_bytes(port, 4))], + None, + [gc.DataTuple('$METER_SPEC_CIR_KBPS', gc.to_bytes(rate.rate_kbps, 8)), + gc.DataTuple('$METER_SPEC_PIR_KBPS', gc.to_bytes(rate.rate_kbps, 8)), + gc.DataTuple('$METER_SPEC_CBS_KBITS', gc.to_bytes(rate.burst_kbits, 8)), + gc.DataTuple('$METER_SPEC_PBS_KBITS', gc.to_bytes(rate.burst_kbits, 8))]) + + def reset_meter_rate(self, port, meter): + self.insert_table_entry( + self.all_pipes_target, + meter, + [gc.KeyTuple('$METER_INDEX', gc.to_bytes(port, 4))], + None, + [gc.DataTuple('$METER_SPEC_CIR_KBPS', bytearray(b"\xff" * 8)), + gc.DataTuple('$METER_SPEC_PIR_KBPS', bytearray(b"\xff" * 8)), + gc.DataTuple('$METER_SPEC_CBS_KBITS', bytearray(b"\xff" * 8)), + gc.DataTuple('$METER_SPEC_PBS_KBITS', bytearray(b"\xff" * 8))]) + + def setUp(self): + # - configure the TM with 2 priority queues for each port, one high priority + # queue for trimmed headers and control packets, and one lower priority + # queue for data packets. This can be done with the tm_set_q_sched_priority + for port in self.ports: + # each config state handles its own ports + # if config is per-pipe => only configure ports in this pipe + # if config is global => configure all ports + if not self.is_my_port(port): + continue + # """ + # tm_pool_usage_t + # Attributes: + # - pool + # - base_use_limit + # - dynamic_baf + # - hysteresis + # """ + props = self.port_properties.get(port, None) + if props is None: + logger.warning('no port props for {}'.format(port)) + continue + if 'port_speed' in props.other_properties: + speed = props.other_properties['port_speed'] + else: + logger.warning('no speed provided, ignoring port {}'.format(port)) + continue + if 'fec' in props.other_properties: + fec = props.other_properties['fec'] + else: + logger.warning('no fec provided, ignoring port {}'.format(port)) + continue + try: + self.pal.pal_port_add(self.device_id, port, + speed, fec) + self.pal.pal_port_an_set( + self.device_id, port, pal_autoneg_policy_t.BF_AN_FORCE_DISABLE) + try: + #HACK: somehow the port doesn't start off if not carrying out this line + mu = self.pal.pal_port_mtu_get(self.device_id, port) + logger.info('port {} has mtu tx:{},rx:{}'.format(mu.tx_mtu, mu.rx_mtu)) + self.pal.pal_port_mtu_set(self.device_id, port, mu.tx_mtu, mu.rx_mtu) + except: + pass + except: + logger.warning('failed to set speed/fec ({}/{}) for port {}'.format(speed, fec, port)) + self.tm.tm_get_q_sched_priority(self.device_id, port, PRIORITY_LOW_QID) + try: + # ndp data should be serviced with lower prio than ndp control + self.tm.tm_set_q_sched_priority( + self.device_id, port, PRIORITY_LOW_QID, PRIORITY_LOW) + # others should be serviced with low prio wrt ndp control + self.tm.tm_set_q_sched_priority( + self.device_id, port, OTHERS_QID, PRIORITY_LOW) + # highest prio goes to ndp control + self.tm.tm_set_q_sched_priority( + self.device_id, port, PRIORITY_HIGH_QID, PRIORITY_HIGH) + high_usage=self.tm.tm_get_q_app_pool_usage(self.device_id, port, PRIORITY_HIGH_QID) + low_usage=self.tm.tm_get_q_app_pool_usage(self.device_id, port, PRIORITY_LOW_QID) + others_usage=self.tm.tm_get_q_app_pool_usage(self.device_id, port, OTHERS_QID) + # set up q for port low priority + # hold on to N = 32 (#packets) * 9000 (MTU) + # this means #cels: N / 80 (cell size) == 3600 + # magic number: 9 == no BAF i.e. static queue + rate = self.rates[port] + qsize = props.bufsize + qsize_in_cells = int(math.ceil(qsize / 80 + 3)) + self.tm.tm_set_q_app_pool_usage(self.device_id, port, PRIORITY_LOW_QID, + low_usage.pool, qsize_in_cells, 9, low_usage.hysteresis) + self.tm.tm_set_q_app_pool_usage(self.device_id, port, OTHERS_QID, + others_usage.pool, qsize_in_cells, 9, others_usage.hysteresis) + self.tm.tm_set_q_guaranteed_min_limit(self.device_id, port, PRIORITY_HIGH_QID, + 400) + high_usage=self.tm.tm_get_q_app_pool_usage(self.device_id, port, PRIORITY_HIGH_QID) + low_usage=self.tm.tm_get_q_app_pool_usage(self.device_id, port, PRIORITY_LOW_QID) + others_usage=self.tm.tm_get_q_app_pool_usage(self.device_id, port, OTHERS_QID) + port_shaper_msg = '' + if props.shaper_rate.rate_kbps != 0: + port_shaper_msg = 'shaping buffer:{}kB, speed:{}Gbps'.format(get_bufsize(props.shaper_rate) / 8.0, + props.shaper_rate.rate_kbps / 1e6) + self.tm.tm_set_port_shaping_rate(self.device_id, port, False, + get_bufsize(props.shaper_rate), props.shaper_rate.rate_kbps) + self.tm.tm_enable_port_shaping(self.device_id, port) + else: + port_shaper_msg = 'shaping disabled' + self.tm.tm_disable_port_shaping(self.device_id, port) + logger.info('port {} speed {} fec {} {}'.format( + port, speed, fec, port_shaper_msg)) + logger.info('port {} high prio ({}) queue size:{}kB'.format( + port, PRIORITY_HIGH_QID, high_usage.base_use_limit * 80 / 1000)) + logger.info('port {} low prio ({}) queue size:{}kB'.format( + port, PRIORITY_LOW_QID, low_usage.base_use_limit * 80 / 1000)) + logger.info('port {} others prio ({}) queue size:{}kB'.format( + port, OTHERS_QID, others_usage.base_use_limit * 80 / 1000)) + except Exception as ex: + logger.info('failed to do tm stuff with {}, because {}'.format(port, ex)) + self.pal.pal_port_enable_all(self.device_id) + for pipe_id in range(0, self.nr_pipes): + if self.is_my_pipe(pipe_id): + dod_port = (pipe_id << 7) | 68 + self.tm.tm_set_negative_mirror_dest( + self.device_id, pipe_id, dod_port, 1) + print('eg mirror {} -> {}'.format(pipe_id, dod_port)) + info = mirror_session(MirrorType_e.PD_MIRROR_TYPE_NORM, + Direction_e.PD_DIR_EGRESS, + 2 * dod_port + 2, + egr_port_v=True, + egr_port=dod_port, + max_pkt_len=TRUNCATION_LENGTH + 3) + try: + self.mirror.mirror_session_delete(self.shdl, self.dev_target, sid) + except: + pass + self.mirror.mirror_session_create(self.shdl, self.dev_target, info) + # configure one mirroring session per egress port (i2e). This can be done + # with the mirror_session_create PDFixed Thrift API + logger.info("Configuring mirroring sessions") + for port in self.ports: + if not self.is_my_port(port): + continue + # session id 0 is reserved on Tofino (not Tofino2) + sid = 2*port + 1 + self.insert_or_update_entry( + self.target, + 'SwitchIngress.truncate', + [gc.KeyTuple('ig_intr_tm_md.ucast_egress_port', gc.to_bytes(port, 2))], + 'SwitchIngress.mirror_and_drop', + [gc.DataTuple('session_id', gc.to_bytes(sid, 2))]) + info = mirror_session(MirrorType_e.PD_MIRROR_TYPE_NORM, + Direction_e.PD_DIR_INGRESS, + sid, + egr_port=port, + egr_port_v=True, + egr_port_queue=PRIORITY_HIGH_QID, + max_pkt_len=TRUNCATION_LENGTH + 3) + self.mirror.mirror_session_create(self.shdl, self.dev_target, info) + + + NDP_MCAST_GRP = 0x1000 + info = mirror_session(MirrorType_e.PD_MIRROR_TYPE_NORM, + Direction_e.PD_DIR_INGRESS, + 2, + mcast_grp_a=NDP_MCAST_GRP, + mcast_grp_a_v = True, + egr_port_queue = PRIORITY_HIGH_QID) + self.mirror.mirror_session_create(self.shdl, self.dev_target, info) + for port in self.ports: + pipe = portToPipe(port) + dod_port = (pipe << 7) | 68 + self.insert_or_update_entry( + self.all_pipes_target, + 'SwitchEgress.eg_port2_egmirror', + [gc.KeyTuple('hdr.normal_meta.egress_port', gc.to_bytes(port, 2))], + 'SwitchEgress.set_eg_mirror', + [gc.DataTuple('session_id', gc.to_bytes(2 * dod_port + 2, 2))]) + self.devport_mgr.devport_mgr_set_copy_to_cpu(self.device_id, 1, 192) + self.conn_mgr.complete_operations(self.shdl) + logger.info('Populating multicast tables') + repl_port = 68 + ports = [192] + try: + self.set_entry_scope_table_attribute(self.target, 'SwitchIngressParser.is_recirc_port', + config_gress_scope=True, + predefined_gress_scope_val=bfruntime_pb2.Mode.ALL, + config_pipe_scope=True, + predefined_pipe_scope=True, + predefined_pipe_scope_val=bfruntime_pb2.Mode.SINGLE, + pipe_scope_args=0xff, + config_prsr_scope=True, predefined_prsr_scope_val=bfruntime_pb2.Mode.ALL, + prsr_scope_args=0xff) + except: + pass + for pipe_id in range(0, self.nr_pipes): + if not self.is_my_pipe(pipe_id): + continue + pipe_repl_port = (pipe_id << 7) | repl_port + ports.append(pipe_repl_port) + # program value set SwitchIngressParser.is_recirc_port to be (pipe_id << 7) | repl_port + try: + self.delete_table_entry( + self.all_pipes_target, 'SwitchIngressParser.is_recirc_port', + [gc.KeyTuple('f1', gc.to_bytes(pipe_repl_port, 2), mask=gc.to_bytes(0xffff, 2))]) + except: + pass + self.insert_or_update_entry( + self.all_pipes_target, 'SwitchIngressParser.is_recirc_port', + [gc.KeyTuple('f1', gc.to_bytes(pipe_repl_port, 2), + mask=gc.to_bytes(0xffff, 2))], + None, None) + try: + self.insert_or_update_entry( + self.all_pipes_target, + 'SwitchEgress.act_on_egress', + [gc.KeyTuple('eg_intr_md.egress_port', gc.to_bytes(pipe_repl_port, 2))], + 'SwitchEgress.invalidate_trim_and_set_update', + [gc.DataTuple('also_add', gc.to_bytes((1<<4)|(1<<7), 1))]) + except: + pass + self.mc.mc_init() + mcsession = self.mc.mc_create_session() + cnt = self.mc.mc_mgrp_get_count(mcsession, self.device_id) + mcgrps = {} + if cnt > 0: + batch = 4 + at = self.mc.mc_mgrp_get_first(mcsession, self.device_id) + mgid = self.mc.mc_mgrp_get_attr(mcsession, self.device_id, at).mgid + mcgrps[mgid] = at + cnt = cnt - 1 + while cnt > 0: + crtbat = batch if cnt >= batch else cnt + bat = self.mc.mc_mgrp_get_next_i(mcsession, self.device_id, at, crtbat) + at = bat[len(bat)-1] + for x in bat: + mgid = self.mc.mc_mgrp_get_attr(mcsession, self.device_id, x).mgid + mcgrps[mgid] = x + cnt = cnt - crtbat + try: + if not NDP_MCAST_GRP in mcgrps: + mc_hdl = self.mc.mc_mgrp_create(mcsession, self.device_id, NDP_MCAST_GRP) + else: + mc_hdl = mcgrps[NDP_MCAST_GRP] + node_hdl = self.mc.mc_node_create( + mcsession, self.device_id, 0, set_port_map(ports), set_lag_map([])) + self.mc.mc_associate_node(mcsession, self.device_id, mc_hdl, node_hdl, 0, False) + self.mc.mc_complete_operations(mcsession) + finally: + self.mc.mc_destroy_session(mcsession) + def postRun(self): + self.cleanup_ecmp() + for e in self.ip2acts: + (dip, plen) = e + self.delete_table_entry( + self.target, + 'SwitchIngress.ipv4_lpm', + [gc.KeyTuple('hdr.ipv4.dst_addr', bytearray(gc.ipv4_to_bytes(dip)), prefix_len=plen)]) + for ip in self.arp: + self.delete_table_entry( + self.target, + 'SwitchIngress.arp', + [gc.KeyTuple('ig_md.nhop', bytearray(gc.ipv4_to_bytes(ip)))]) + for port in self.ports: + self.reset_meter_rate(port,meter='SwitchIngress.meter_optimistic') + self.reset_meter_rate(port,meter='SwitchIngress.meter_pessimistic') + def runTest(self): + print('cleanup {}'.format(self.pipe)) + self.cleanup_ecmp() + print('handle_entries {}'.format(self.pipe)) + self.handle_entries() + print('arp {}'.format(self.pipe)) + for ip in self.arp: + mac=self.arp[ip] + self.insert_or_update_entry( + self.target, + 'SwitchIngress.arp', + [gc.KeyTuple('ig_md.nhop', bytearray(gc.ipv4_to_bytes(ip)))], + 'SwitchIngress.set_dst', + [gc.DataTuple('dst_addr', bytearray(gc.mac_to_bytes(mac)))]) + self.modify_table_default_entry(self.target, 'SwitchIngress.configure_ndp', + 'SwitchIngress.set_config', + [gc.DataTuple('p_allow_pessimism', gc.to_bytes(1, 1)), + gc.DataTuple('always_truncate', gc.to_bytes(self.force_trim, 1)), + gc.DataTuple('drop_ndp', gc.to_bytes(0, 1))] + ) + for meter_name in ['SwitchIngress.meter_optimistic', 'SwitchIngress.meter_pessimistic', 'SwitchIngress.meter_halftimistic']: + meter_obj = self.bfrt_info.table_get(meter_name) + # normal_header, IPG + preamble + CRC + meter_obj.attribute_meter_bytecount_adjust_set( + self.all_pipes_target, 1 + 12 + 8 + 4) + logger.info('Setting up meter entries') + for port in self.ports: + rate = self.rates[port] + logger.info("port {} meter rate={}Gbps, buffer={}kB".format( + port, rate.rate_kbps / 1e6, rate.burst_kbits * 125 / 1000)) + self.set_meter_rate(port, rate, meter='SwitchIngress.meter_optimistic') + self.set_meter_rate(port, Rate(int(math.floor(rate.rate_kbps / 4.0)), rate.burst_kbits), + meter='SwitchIngress.meter_pessimistic') + self.set_meter_rate(port, Rate(int(math.floor(rate.rate_kbps / 2.0)), rate.burst_kbits), + meter='SwitchIngress.meter_halftimistic') + + def tearDown(self): + for port in self.ports: + self.delete_table_entry( + self.target, + 'SwitchIngress.truncate', + [gc.KeyTuple('ig_intr_tm_md.ucast_egress_port', gc.to_bytes(port, 2))]) + sid = 2*port + 1 + self.mirror.mirror_session_delete(self.shdl, self.dev_target, sid) + props = self.port_properties.get(port, None) + if props is not None: + self.tm.tm_disable_port_shaping(self.device_id, port) + +def portToPipe(port): + return port >> 7 +def portToPipeLocalId(port): + return port & 0x7F +def portToBitIdx(port): + pipe = portToPipe(port) + index = portToPipeLocalId(port) + return 72 * pipe + index +def set_port_map(indicies): + bit_map = [0] * ((288 + 7) // 8) + for i in indicies: + index = portToBitIdx(i) + bit_map[index / 8] = (bit_map[index / 8] | (1 << (index % 8))) & 0xFF + return bytes_to_string(bit_map) + +def set_lag_map(indicies): + bit_map = [0] * ((256 + 7) / 8) + for i in indicies: + bit_map[i / 8] = (bit_map[i / 8] | (1 << (i % 8))) & 0xFF + return bytes_to_string(bit_map) + +class FixedInterface: + def __init__(self, addr): + self.pd_transport = TSocket.TSocket(addr, 9090) + self.pd_transport = TTransport.TBufferedTransport(self.pd_transport) + self.pd_transport.open() + self.binary_protocol = TBinaryProtocol.TBinaryProtocol( + self.pd_transport) + self.conn_mgr = conn_mgr.Client(TMultiplexedProtocol.TMultiplexedProtocol( + self.binary_protocol, "conn_mgr")) + self.devport_mgr = devport_mgr.Client(TMultiplexedProtocol.TMultiplexedProtocol( + self.binary_protocol, "devport_mgr")) + self.pal = pal.Client(TMultiplexedProtocol.TMultiplexedProtocol( + self.binary_protocol, "pal")) + self.tm = tm.Client(TMultiplexedProtocol.TMultiplexedProtocol( + self.binary_protocol, "tm")) + self.mirror = mirror.Client(TMultiplexedProtocol.TMultiplexedProtocol( + self.binary_protocol, "mirror")) + self.mc = mc.Client(TMultiplexedProtocol.TMultiplexedProtocol( + self.binary_protocol, "mc")) + def __del__(self): + self.pd_transport.close() + +class NDPLive(QueryGRPCTrait, LooksLikeFixedObjectTrait): + def __init__(self, p4_name, interface, fixedObject, config_file, multi_pipe): + QueryGRPCTrait.__init__(self, interface, p4_name) + LooksLikeFixedObjectTrait.__init__(self, fixedObject) + self.p4_name = p4_name + self.device_id = 0 + self.pipe_cfgs = {} + self.target=gc.Target(device_id=self.device_id, pipe_id=0xffff) + self.config_file = config_file + self.multi_pipe = multi_pipe + + def __enter__(self): + self.setUp(self.config_file, self.multi_pipe) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.tearDown() + + def tearDown(self): + logger.info('Tearing down...') + self.pal.pal_port_del_all(self.device_id) + try: + for cfg in self.pipe_cfgs.values(): + cfg.tearDown() + except: + pass + self.conn_mgr.complete_operations(self.shdl) + self.conn_mgr.client_cleanup(self.shdl) + + def scope_setup(self): + pipe_scope = bfruntime_pb2.Mode.SINGLE if len(self.pipe_cfgs) != 1 else bfruntime_pb2.Mode.ALL + single_pipe = (len(self.pipe_cfgs) == 1) + for t in ['SwitchIngress.truncate', + 'SwitchIngress.ipv4_lpm', + 'SwitchIngress.arp', + 'SwitchIngress.configure_ndp', + 'SwitchIngress.nexthop_resolve']: + t_obj = self.bfrt_info.table_get(t) + if next(t_obj.attribute_get(self.target, 'EntryScope'))['pipe_scope']['predef'] != pipe_scope: + t_obj.default_entry_reset(self.target) + t_obj.attribute_entry_scope_set(self.target, + config_gress_scope=True, + predefined_gress_scope_val=bfruntime_pb2.Mode.ALL, + config_pipe_scope=True, + predefined_pipe_scope=True, + predefined_pipe_scope_val=pipe_scope, + pipe_scope_args=0xff, + config_prsr_scope=True, + predefined_prsr_scope_val=bfruntime_pb2.Mode.ALL, + prsr_scope_args=0xff) + def setup_lkp(self): + self.insert_or_update_entry( + self.target, 'SwitchIngress.populate_lkp', + [gc.KeyTuple('hdr.ipv4.$valid', gc.to_bytes(1, 1)), + gc.KeyTuple('hdr.udp.$valid', gc.to_bytes(0, 1)), + gc.KeyTuple('hdr.tcp.$valid', gc.to_bytes(1, 1)), + gc.KeyTuple('hdr.ndp_s_data.$valid', gc.to_bytes(0, 1), mask=gc.to_bytes(0, 1)), + gc.KeyTuple('$MATCH_PRIORITY', gc.to_bytes(100, 1))], + 'SwitchIngress.set_lkp_tcp', + [] + ) + self.insert_or_update_entry(self.target, 'SwitchIngress.populate_lkp', + [gc.KeyTuple('hdr.ipv4.$valid', gc.to_bytes(1, 1)), + gc.KeyTuple('hdr.udp.$valid', gc.to_bytes(1, 1)), + gc.KeyTuple('hdr.tcp.$valid', gc.to_bytes(0, 1)), + gc.KeyTuple('hdr.ndp_s_data.$valid', gc.to_bytes(0, 1), mask=gc.to_bytes(1, 1)), + gc.KeyTuple('$MATCH_PRIORITY', gc.to_bytes(50, 1))], + 'SwitchIngress.set_lkp_udp', + [] + ) + self.insert_or_update_entry(self.target, 'SwitchIngress.populate_lkp', + [gc.KeyTuple('hdr.ipv4.$valid', gc.to_bytes(1, 1)), + gc.KeyTuple('hdr.udp.$valid', gc.to_bytes(1, 1)), + gc.KeyTuple('hdr.tcp.$valid', gc.to_bytes(0, 1)), + gc.KeyTuple('hdr.ndp_s_data.$valid', gc.to_bytes(1, 1), mask=gc.to_bytes(1, 1)), + gc.KeyTuple('$MATCH_PRIORITY', gc.to_bytes(50, 1))], + 'SwitchIngress.set_lkp_ndp', + [] + ) + self.insert_or_update_entry(self.target, 'SwitchIngress.populate_lkp', + [gc.KeyTuple('hdr.ipv4.$valid', gc.to_bytes(0, 1)), + gc.KeyTuple('hdr.udp.$valid', gc.to_bytes(0, 1)), + gc.KeyTuple('hdr.tcp.$valid', gc.to_bytes(0, 1)), + gc.KeyTuple('hdr.ndp_s_data.$valid', gc.to_bytes(0, 1), mask=gc.to_bytes(0, 1)), + gc.KeyTuple('$MATCH_PRIORITY', gc.to_bytes(100, 1))], + 'SwitchIngress.set_lkp_ip_unknown', + [] + ) + + self.modify_table_default_entry(self.target, 'SwitchIngress.populate_lkp', + 'NoAction', []) + + def setUp(self, config_file, multi_pipe): + self.target = gc.Target(device_id=0, pipe_id=0xffff) + + self.nr_pipes = self.pal.pal_num_pipes_get(self.device_id) + self.pal.pal_port_del_all(self.device_id) + # Get bfrt_info and set it as part of the test + self.shdl = self.conn_mgr.client_init() + # config file should have a single top-level dictionary + # mapping pipe -> config + with open(config_file, 'r') as fconfig: + job = json.load(fconfig) + if multi_pipe: + for pipe, config in job.items(): + if int(pipe) >= self.nr_pipes: + logger.warning('WARNING! requested pipe {} >= #nr pipes: {}'.format(pipe, self.nr_pipes)) + continue + pipe_config_obj = config + if 'file' in config: + actual_file = config['file'] + if not os.path.isabs(config['file']): + actual_file = os.path.join(os.path.dirname(config_file), actual_file) + logger.info('configuring {} from {}'.format(pipe, actual_file)) + with open(actual_file, 'r') as fcfg: + pipe_config_obj = json.load(fcfg) + self.pipe_cfgs[pipe] = PerPipeConfig(self, int(pipe), pipe_config_obj) + else: + self.pipe_cfgs[0xffff] = PerPipeConfig(self, 0xffff, job) + + # global configs + self.scope_setup() + self.setup_lkp() + for pipe, cfg in self.pipe_cfgs.items(): + logger.info('setting up pipe {}'.format(pipe)) + cfg.setUp() + + def runTest(self): + for cfg in self.pipe_cfgs.values(): + cfg.runTest() + raw_input("Configuration done. Press Enter to cleanup...") + for cfg in self.pipe_cfgs.values(): + cfg.postRun() + +class ConnectionManager: + def __init__(self, addr): + self.interface = gc.ClientInterface(addr + ':50052', client_id=0, + device_id=0, is_master=True) + self.fixedObject = FixedInterface(addr) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.interface._tear_down_stream() + self.fixedObject.pd_transport.close() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-a', dest='grpc_addr', + default='localhost', required=False, help='ip of grpc') + parser.add_argument('-multi-pipe', dest='multi_pipe', + action='store_true', help='set to use one instance per pipeline') + parser.add_argument( + 'input_json', help='input json file to read configs from') + args = parser.parse_args() + with ConnectionManager(args.grpc_addr) as conn_mgr: + conn_mgr.interface.bind_pipeline_config('ndp') + with NDPLive('ndp', conn_mgr.interface, conn_mgr.fixedObject, args.input_json, args.multi_pipe) as ndplive: + ndplive.runTest() + +if __name__ == '__main__': + main() diff --git a/research_projects/NDP/dev_root/thriftutils.py b/research_projects/NDP/dev_root/thriftutils.py new file mode 100644 index 0000000..96201bc --- /dev/null +++ b/research_projects/NDP/dev_root/thriftutils.py @@ -0,0 +1,64 @@ +import socket +import struct + +def hex_to_i16(h): + x = int(h) + if (x > 0x7FFF): x-= 0x10000 + return x +def i16_to_hex(h): + x = int(h) + if (x & 0x8000): x+= 0x10000 + return x +def hex_to_i32(h): + x = int(h) + if (x > 0x7FFFFFFF): x-= 0x100000000 + return x +def i32_to_hex(h): + x = int(h) + if (x & 0x80000000): x+= 0x100000000 + return x +def hex_to_byte(h): + x = int(h) + if (x > 0x7F): x-= 0x100 + return x +def byte_to_hex(h): + x = int(h) + if (x & 0x80): x+= 0x100 + return x +def uint_to_i32(u): + if (u > 0x7FFFFFFF): u-= 0x100000000 + return u +def i32_to_uint(u): + if (u & 0x80000000): u+= 0x100000000 + return u +def char_to_uchar(x): + if (x >= 0): + return x + return 256 + x + +def bytes_to_string(byte_array): + form = 'B' * len(byte_array) + return struct.pack(form, *byte_array) + +def string_to_bytes(string): + form = 'B' * len(string) + return list(struct.unpack(form, string)) + +def macAddr_to_string(addr): + byte_array = [int(b, 16) for b in addr.split(':')] + return bytes_to_string(byte_array) + +def ipv4Addr_to_i32(addr): + byte_array = [int(b) for b in addr.split('.')] + res = 0 + for b in byte_array: res = res * 256 + b + return uint_to_i32(res) + +def stringify_macAddr(addr): + return ':'.join('%02x' % char_to_uchar(x) for x in addr) + +def i32_to_ipv4Addr(addr): + return socket.inet_ntoa(struct.pack("!i", addr)) + +def ipv6Addr_to_string(addr): + return bytes(socket.inet_pton(socket.AF_INET6, addr))