Skip to content

Commit 42475e2

Browse files
committed
lib/cpuinfo: Increase the file descriptors limit to handle more CPUs
The pqos tool fails with the following errors on systems with 300 or more CPU cores. $pqos NOTE: Mixed use of MSR and kernel interfaces to manage CAT or CMT & MBM may lead to unexpected behavior. ERROR: Could not open /sys/fs/resctrl directory ERROR: Failed to stop resctrl events ERROR: Failed to start all selected OS monitoring events Monitoring start error on core(s) 339, status 1 By default, the file descriptor limit is set to 1024 for a session. pqos monitor uses 3 descriptors for each CPU for perf monitoring. So, it runs out of limit(1024) on systems with 300 or more CPUs. Fix the issue by detecting the number of CPUs in the system and increasing the descriptor limit using system call getrlimit and setrlimit respectively. Increase the limit to 4 times the number of CPUs to take care of open files limit. Signed-off-by: Babu Moger <[email protected]>
1 parent 14e3840 commit 42475e2

File tree

5 files changed

+54
-1
lines changed

5 files changed

+54
-1
lines changed

lib/common.c

100755100644
+31
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,14 @@
4343
#include <stdlib.h>
4444
#include <string.h>
4545
#include <sys/mman.h>
46+
#include <sys/resource.h>
4647
#include <unistd.h>
4748

49+
/* Maximum required file descriptors per core */
50+
#define MAX_FD_PER_CORE 5
51+
/* pqos tool opens some file descriptors while using msr interface */
52+
#define MAX_PQOS_FD 100
53+
4854
FILE *
4955
pqos_fopen(const char *name, const char *mode)
5056
{
@@ -392,3 +398,28 @@ pqos_read(int fd, void *buf, size_t count)
392398

393399
return count;
394400
}
401+
402+
int
403+
pqos_set_no_files_limit(unsigned long max_core_count)
404+
{
405+
struct rlimit files_limit;
406+
const rlim_t required_fd =
407+
(max_core_count * MAX_FD_PER_CORE) + MAX_PQOS_FD;
408+
409+
if (getrlimit(RLIMIT_NOFILE, &files_limit))
410+
return PQOS_RETVAL_ERROR;
411+
412+
/* Check Kernel allows to open required file descriptors */
413+
if (files_limit.rlim_max < required_fd ||
414+
files_limit.rlim_cur < required_fd) {
415+
if (files_limit.rlim_max < required_fd)
416+
files_limit.rlim_max = required_fd;
417+
418+
files_limit.rlim_cur = required_fd;
419+
420+
if (setrlimit(RLIMIT_NOFILE, &files_limit))
421+
return PQOS_RETVAL_ERROR;
422+
}
423+
424+
return PQOS_RETVAL_OK;
425+
}

lib/common.h

+11
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,17 @@ PQOS_LOCAL void pqos_munmap(void *mem, const uint64_t size);
200200
*/
201201
PQOS_LOCAL ssize_t pqos_read(int fd, void *buf, size_t count);
202202

203+
/**
204+
* @brief Increase the number of open files limit to handle more
205+
* than 256 CPUs.
206+
*
207+
* @param [in] Max CPUs on the system
208+
*
209+
* @return PQOS_RETVAL_OK for success
210+
* @retval PQOS_RETVAL_ERROR for failure
211+
*/
212+
PQOS_LOCAL int pqos_set_no_files_limit(unsigned long max_core_count);
213+
203214
#ifdef __cplusplus
204215
}
205216
#endif

lib/cpuinfo.c

+6
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
#include "allocation.h"
4141
#include "cap.h"
42+
#include "common.h"
4243
#include "cpu_registers.h"
4344
#include "log.h"
4445
#include "machine.h"
@@ -452,6 +453,11 @@ cpuinfo_build_topo(struct apic_info *apic)
452453
return NULL;
453454
}
454455

456+
if (pqos_set_no_files_limit(max_core_count)) {
457+
LOG_ERROR("Open files limit not sufficient!\n");
458+
return NULL;
459+
}
460+
455461
const size_t mem_sz =
456462
sizeof(*l_cpu) + (max_core_count * sizeof(struct pqos_coreinfo));
457463

lib/os_cpuinfo.c

+5
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,11 @@ os_cpuinfo_topology(void)
267267
return NULL;
268268
}
269269

270+
if (pqos_set_no_files_limit(max_core_count)) {
271+
LOG_ERROR("Open files limit not sufficient!\n");
272+
return NULL;
273+
}
274+
270275
const size_t mem_sz =
271276
sizeof(*cpu) + (max_core_count * sizeof(struct pqos_coreinfo));
272277

pqos/alloc.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -840,7 +840,7 @@ set_allocation_assoc(const struct pqos_devinfo *dev)
840840
static void
841841
fill_core_tab(char *str)
842842
{
843-
unsigned max_cores_count = 128;
843+
unsigned max_cores_count = sysconf(_SC_NPROCESSORS_CONF);
844844
uint64_t *cores = calloc(max_cores_count, sizeof(uint64_t));
845845
unsigned i = 0, n = 0, cos = 0;
846846
char *p = NULL;

0 commit comments

Comments
 (0)