From a13bf2648116f352ebc2cb6aaa32b4f35345d950 Mon Sep 17 00:00:00 2001 From: Chul-Woong Yang Date: Fri, 28 Mar 2025 12:06:51 +0900 Subject: [PATCH] DPDK: Add support for PCI device hotplug attach and detach - Implemented hotplug functionality to dynamically attach and detach PCI devices. - Updated device management to handle hotplug events for PCI devices. Example: DBGvpp# detach dpdk 1 DBGvpp# attach dpdk 0000:00:04.0 --- src/plugins/dpdk/device/cli.c | 82 ++++++++++++++++++++++ src/plugins/dpdk/device/common.c | 88 ++++++++++++++++++++++++ src/plugins/dpdk/device/init.c | 114 ++++++++++++++++++------------- 3 files changed, 238 insertions(+), 46 deletions(-) diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c index 77f9a27f97b6..1f512e809601 100644 --- a/src/plugins/dpdk/device/cli.c +++ b/src/plugins/dpdk/device/cli.c @@ -373,6 +373,88 @@ VLIB_CLI_COMMAND (show_vpe_version_command, static) = { .function = show_dpdk_version_command_fn, }; +extern clib_error_t * +dpdk_detach_device(vlib_main_t *vm, dpdk_main_t *dm, const u32 hw_if_index, u16 *port_id); + +static clib_error_t * +dpdk_detach_cli_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + clib_error_t *error = 0; + dpdk_main_t *dm = &dpdk_main; + vnet_main_t *vnm = vnet_get_main (); + u32 hw_if_index = ~0; + u16 port_id = ~0; + + if (unformat (input, "%U", unformat_vnet_hw_interface, vnm, + &hw_if_index)) + ; + else if (unformat (input, "%u", &hw_if_index)) + ; + else + return clib_error_return (0, "Please specify valid hw-interface name or index"); + + error = dpdk_detach_device (vm, dm, hw_if_index, &port_id); + if (!error) + { + vlib_cli_output (vm, "Successfully detached hw_if %u (port_id %u)\n", + hw_if_index, port_id); + } + return error; +} + +/*? + * This command is used to detach the DPDK interface. + * + * @cliexpar + * Example of how to detach the DPDK interface: + * @cliexcmd{detach dpdk 1} +?*/ +VLIB_CLI_COMMAND (detach_dpdk_version_command, static) = { + .path = "detach dpdk", + .short_help = "detach dpdk ", + .function = dpdk_detach_cli_command_fn, +}; + +extern clib_error_t * +dpdk_attach_device(vlib_main_t *vm, dpdk_main_t *dm, const vlib_pci_addr_t pci_addr, u16 *port_id); + +static clib_error_t * +dpdk_attach_cli_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + clib_error_t *error = 0; + dpdk_main_t *dm = &dpdk_main; + vlib_pci_addr_t pci_addr = {0}; + u16 port_id = ~0; + + if (unformat (input, "%U", unformat_vlib_pci_addr, &pci_addr)) + ; + else + return clib_error_return (0, "Please specify valid pci-addr"); + error = dpdk_attach_device (vm, dm, pci_addr, &port_id); + if (!error) + { + vlib_cli_output (vm, "Successfully attached port %u\n", port_id); + } + return error; +} + +/*? + * This command is used to attach the DPDK interface. + * + * @cliexpar + * Example of how to attach the DPDK interface: + * @cliexcmd{attach dpdk 0000:00:04.0} +?*/ +VLIB_CLI_COMMAND (attach_dpdk_version_command, static) = { + .path = "attach dpdk", + .short_help = "attach dpdk ", + .function = dpdk_attach_cli_command_fn, +}; + /* Dummy function to get us linked in. */ void dpdk_cli_reference (void) diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c index d6eed5441b49..0559077549a1 100644 --- a/src/plugins/dpdk/device/common.c +++ b/src/plugins/dpdk/device/common.c @@ -514,6 +514,94 @@ dpdk_get_vmbus_device (const struct rte_eth_dev_info *info) } #endif /* __linux__ */ +clib_error_t * +dpdk_detach_device(vlib_main_t *vm, dpdk_main_t *dm, const u32 hw_if_index, u16 *port_id) +{ + vnet_main_t *vnm = vnet_get_main(); + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + int ret; + + hw = vnet_get_hw_interface (vnm, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + if (!xd) + return clib_error_return(0, "DPDK device not found for hw-if-index %u", hw_if_index); + + *port_id = xd->port_id; + + if (!rte_eth_dev_is_valid_port(xd->port_id)) + return clib_error_return(0, "DPDK device has invalid port. hw-if-index %u portid: %u", + hw_if_index, xd->port_id); + + vnet_hw_interface_set_flags (vnm, hw_if_index, 0); + vnet_delete_hw_interface (vnm, hw_if_index); // and it calls dpdk_device_stop() + + struct rte_eth_dev_info dev_info; + if ((ret = rte_eth_dev_info_get(xd->port_id, &dev_info))) + return clib_error_return(0, "rte_eth_dev_info_get(), err=%d", ret); + + if (dev_info.device == NULL) + return clib_error_return(0, "rete_eth_dev_info_get() returns null device"); + + if ((ret = rte_eth_dev_close(xd->port_id))) + return clib_error_return(0, "rte_eth_dev_close(), err=%d", ret); + + if ((ret = rte_dev_remove(dev_info.device))) + return clib_error_return(0, "Cannot remove DPDK device for hw-if-index %u (port=%u, err=%d)", + hw_if_index, xd->port_id, ret); + + dpdk_device_t *v = dm->devices; + for (uword i = 0; i < vec_len(dm->devices); i++) + { + if (&v[i] == xd) + { + v[i] = v[vec_len(v) - 1]; + vec_set_len(v, vec_len(v) - 1); + break; + } + } + + return NULL; +} +extern void +dpdk_bind_device_to_uio(dpdk_config_main_t *conf, vlib_pci_addr_t *addr); +extern clib_error_t * +dpdk_init_port(dpdk_main_t *dm, u16 port_id); + +clib_error_t * +dpdk_attach_device(vlib_main_t *vm, dpdk_main_t *dm, vlib_pci_addr_t pci_addr, u16 *port_id) +{ + clib_error_t *error = 0; + vnet_main_t *vnm = vnet_get_main(); + dpdk_config_main_t *conf = &dpdk_config_main; + u8 *dev_name = NULL; + + dev_name = format(dev_name, "%U", format_vlib_pci_addr, &pci_addr); + dpdk_bind_device_to_uio (conf, &pci_addr); + if (rte_eal_hotplug_add ("pci", (const char *) dev_name, "") != 0) + { + error = clib_error_return(0, "rte_eal_hotplug_add(%s)", dev_name); + goto err_exit; + } + if (rte_eth_dev_get_port_by_name((const char *)dev_name, port_id) != 0) + { + error = clib_error_return(0, "rte_eth_dev_get_port_by_name(%s)", dev_name); + goto err_exit; + } + + if ((error = dpdk_init_port(&dpdk_main, *port_id))) + { + goto err_exit; + } + + for (int i = 0; i < vec_len (dm->devices); i++) + vnet_hw_if_update_runtime_data (vnm, dm->devices[i].hw_if_index); + + err_exit: + vec_free(dev_name); + return error; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index aaa2c1f4a68b..30a5ff2ffd79 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -295,45 +295,17 @@ dpdk_counters_xstats_init (dpdk_device_t *xd) vec_free (xstats_names); } -static clib_error_t * -dpdk_lib_init (dpdk_main_t * dm) +static char *if_num_fmt; +clib_error_t * +dpdk_init_port(dpdk_main_t *dm, u16 port_id) { vnet_main_t *vnm = vnet_get_main (); - u16 port_id; vlib_thread_main_t *tm = vlib_get_thread_main (); vnet_device_main_t *vdm = &vnet_device_main; + dpdk_device_t *xd; vnet_sw_interface_t *sw; vnet_hw_interface_t *hi; - dpdk_device_t *xd; - char *if_num_fmt; - - /* vlib_buffer_t template */ - vec_validate_aligned (dm->per_thread_data, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - for (int i = 0; i < tm->n_vlib_mains; i++) - { - dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data, i); - clib_memset (&ptd->buffer_template, 0, sizeof (vlib_buffer_t)); - vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_TX] = (u32) ~ 0; - } - - if_num_fmt = - dm->conf->interface_name_format_decimal ? "%d/%d/%d" : "%x/%x/%x"; - - /* device config defaults */ - dm->default_port_conf.n_rx_desc = DPDK_NB_RX_DESC_DEFAULT; - dm->default_port_conf.n_tx_desc = DPDK_NB_TX_DESC_DEFAULT; - dm->default_port_conf.n_rx_queues = 1; - dm->default_port_conf.n_tx_queues = tm->n_vlib_mains; - dm->default_port_conf.rss_hf = - RTE_ETH_RSS_IP | RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP; - dm->default_port_conf.max_lro_pkt_size = DPDK_MAX_LRO_SIZE_DEFAULT; - - if ((clib_mem_get_default_hugepage_size () == 2 << 20) && - check_l3cache () == 0) - dm->default_port_conf.n_rx_desc = dm->default_port_conf.n_tx_desc = 512; - - RTE_ETH_FOREACH_DEV (port_id) + do { u8 addr[6]; int rv, q; @@ -343,21 +315,21 @@ dpdk_lib_init (dpdk_main_t * dm) dpdk_driver_t *dr; i8 numa_node; - if (!rte_eth_dev_is_valid_port (port_id)) - continue; + if (!rte_eth_dev_is_valid_port (port_id)) + return clib_error_return (0, "rte_eth_dev_is_valid_port(%u)", port_id); if ((rv = rte_eth_dev_info_get (port_id, &di)) != 0) { dpdk_log_warn ("[%u] failed to get device info. skipping device.", port_id); - continue; + return clib_error_return (0, "rte_eth_dev_info_get(%u)", port_id); } if (di.device == 0) { dpdk_log_warn ("[%u] missing device info. Skipping '%s' device", port_id, di.driver_name); - continue; + return clib_error_return (0, "rte_eth_dev_info_get(%u) missing device", port_id); } devconf = dpdk_find_startup_config (&di); @@ -367,7 +339,7 @@ dpdk_lib_init (dpdk_main_t * dm) { dpdk_log_notice ("[%d] Device %s blacklisted. Skipping...", port_id, di.driver_name); - continue; + return clib_error_return (0, "device(%u) is blacklisted", port_id); } vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); @@ -401,6 +373,7 @@ dpdk_lib_init (dpdk_main_t * dm) else { struct rte_pci_device *pci_dev; + vec_reset_length (xd->name); if (dr && dr->interface_name_prefix) { /* prefix override by driver */ @@ -614,6 +587,47 @@ dpdk_lib_init (dpdk_main_t * dm) format_dpdk_device_errors, xd); dpdk_counters_xstats_init (xd); } + while (0); + return 0; +} + +static clib_error_t * +dpdk_lib_init (dpdk_main_t * dm) +{ + vnet_main_t *vnm = vnet_get_main (); + vlib_thread_main_t *tm = vlib_get_thread_main (); + u16 port_id; + + /* vlib_buffer_t template */ + vec_validate_aligned (dm->per_thread_data, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + for (int i = 0; i < tm->n_vlib_mains; i++) + { + dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data, i); + clib_memset (&ptd->buffer_template, 0, sizeof (vlib_buffer_t)); + vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_TX] = (u32) ~ 0; + } + + if_num_fmt = + dm->conf->interface_name_format_decimal ? "%d/%d/%d" : "%x/%x/%x"; + + /* device config defaults */ + dm->default_port_conf.n_rx_desc = DPDK_NB_RX_DESC_DEFAULT; + dm->default_port_conf.n_tx_desc = DPDK_NB_TX_DESC_DEFAULT; + dm->default_port_conf.n_rx_queues = 1; + dm->default_port_conf.n_tx_queues = tm->n_vlib_mains; + dm->default_port_conf.rss_hf = + RTE_ETH_RSS_IP | RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP; + dm->default_port_conf.max_lro_pkt_size = DPDK_MAX_LRO_SIZE_DEFAULT; + + if ((clib_mem_get_default_hugepage_size () == 2 << 20) && + check_l3cache () == 0) + dm->default_port_conf.n_rx_desc = dm->default_port_conf.n_tx_desc = 512; + + RTE_ETH_FOREACH_DEV (port_id) + { + dpdk_init_port(dm, port_id); + } for (int i = 0; i < vec_len (dm->devices); i++) vnet_hw_if_update_runtime_data (vnm, dm->devices[i].hw_if_index); @@ -621,20 +635,16 @@ dpdk_lib_init (dpdk_main_t * dm) return 0; } -static void -dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) +void +dpdk_bind_device_to_uio(dpdk_config_main_t *conf, vlib_pci_addr_t *addr) { vlib_main_t *vm = vlib_get_main (); clib_error_t *error; u8 *pci_addr = 0; int num_whitelisted = vec_len (conf->dev_confs); vlib_pci_device_info_t *d = 0; - vlib_pci_addr_t *addr = 0, *addrs; int i; - - addrs = vlib_pci_get_all_dev_addrs (); - vec_foreach (addr, addrs) - { + do { dpdk_device_config_t * devconf = 0; vec_reset_length (pci_addr); pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, addr, 0); @@ -828,11 +838,23 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) devconf->is_blacklisted = 1; clib_error_report (error); } - } + } while (0); vec_free (pci_addr); vlib_pci_free_device_info (d); } +static void +dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) +{ + vlib_pci_addr_t *addr = 0, *addrs; + + addrs = vlib_pci_get_all_dev_addrs (); + vec_foreach (addr, addrs) + { + dpdk_bind_device_to_uio(conf, addr); + } +} + static void dpdk_bind_vmbus_devices_to_uio (dpdk_config_main_t * conf) {