From d34db68a5786843c7db3e3ffc7478c2bbc07fc39 Mon Sep 17 00:00:00 2001 From: Harish Date: Thu, 16 Jan 2020 11:35:42 +0530 Subject: [PATCH 1/2] Add test to validate PFN before and after softoffline Patch adds test to validate PFN before and after softoffline Signed-off-by: Harish --- memory/numa_test.py | 18 ++++- memory/numa_test.py.data/Makefile | 5 +- memory/numa_test.py.data/softoffline.c | 104 +++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 4 deletions(-) create mode 100644 memory/numa_test.py.data/softoffline.c diff --git a/memory/numa_test.py b/memory/numa_test.py index 5d75997db..7ff5ac7df 100644 --- a/memory/numa_test.py +++ b/memory/numa_test.py @@ -81,12 +81,13 @@ def setUp(self): if not smm.check_installed(package) and not smm.install(package): self.cancel('%s is needed for the test to be run' % package) - for file_name in ['util.c', 'numa_test.c', 'Makefile']: + for file_name in ['util.c', 'numa_test.c', 'softoffline.c', + 'Makefile']: self.copyutil(file_name) build.make(self.teststmpdir) - def test(self): + def test_movepages(self): os.chdir(self.teststmpdir) self.log.info("Starting test...") cmd = './numa_test -m %s -n %s' % (self.map_type, self.nr_pages) @@ -98,6 +99,19 @@ def test(self): elif ret != 0: self.fail('Please check the logs for failure') + def test_softoffline(self): + """ + Test PFN's before and after offlining + """ + self.nr_pages = self.params.get( + 'nr_pages', default=50) + os.chdir(self.teststmpdir) + self.log.info("Starting test...") + cmd = './softoffline -m %s -n %s' % (self.map_type, self.nr_pages) + ret = process.system(cmd, shell=True, sudo=True, ignore_status=True) + if ret != 0: + self.fail('Please check the logs for failure') + if __name__ == "__main__": main() diff --git a/memory/numa_test.py.data/Makefile b/memory/numa_test.py.data/Makefile index 5c36f1f68..001cdeb4c 100644 --- a/memory/numa_test.py.data/Makefile +++ b/memory/numa_test.py.data/Makefile @@ -1,8 +1,9 @@ BIN=numa_test -all: ${BIN} +OFFL=softoffline +all: ${BIN} ${OFFL} %: %.c util.c cc -o $@ $^ -lpthread -lnuma -lhugetlbfs clean: - rm ${BIN} + rm ${BIN} ${OFFL} diff --git a/memory/numa_test.py.data/softoffline.c b/memory/numa_test.py.data/softoffline.c new file mode 100644 index 000000000..dabf101fe --- /dev/null +++ b/memory/numa_test.py.data/softoffline.c @@ -0,0 +1,104 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE for more details. + * Copyright: 2020 IBM + * Author: Aneesh Kumar K.V + * Author: Harish + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define errmsg(x, ...) fprintf(stderr, x, ##__VA_ARGS__),exit(1) + +extern unsigned long get_pfn(void *addr); +int main(int argc, char *argv[]) +{ + char *p; + int c, i, nr_pages = 3; + int page_size = getpagesize(); + int mapflag = MAP_ANONYMOUS; + int protflag = PROT_READ|PROT_WRITE; + char *old_pfn, *new_pfn; + + while ((c = getopt(argc, argv, "m:n:hH")) != -1) { + switch(c) { + case 'm': + if (!strcmp(optarg, "private")) + mapflag |= MAP_PRIVATE; + else if (!strcmp(optarg, "shared")) + mapflag |= MAP_SHARED; + else + errmsg("invalid optarg for -m\n"); + break; + case 'n': + nr_pages = strtoul(optarg, NULL, 10); + break; + case 'h': + mapflag |= MAP_HUGETLB; + page_size = gethugepagesize(); + break; + case 'H': + errmsg("%s -m [private|shared] -h -n \n", argv[0]); + + default: + errmsg("invalid option\n"); + break; + } + } + old_pfn = (char*) malloc(nr_pages * sizeof(char)); + new_pfn = (char*) malloc(nr_pages * sizeof(char)); + + if (!(mapflag & (MAP_SHARED | MAP_PRIVATE))) + errmsg("Specify shared or private using -m flag\n"); + + p = mmap(NULL, nr_pages * page_size, protflag, mapflag, -1, 0); + if (p == MAP_FAILED) + errmsg("Failed mmap\n"); + + /* fault in */ + memset(p, 'a', nr_pages * page_size); + for (i = 0; i < nr_pages; i++){ + old_pfn[i] = get_pfn(p + (i * page_size)); + printf("pfn before soft offline 0x%lx\n", old_pfn[i]); + } + + if (madvise(p, nr_pages * page_size, MADV_SOFT_OFFLINE) == -1) + errmsg("madvise failed\n"); + + memset(p, 'a', nr_pages * page_size); + for (i = 0; i < nr_pages; i++){ + new_pfn[i] = get_pfn(p + (i * page_size)); + printf("pfn after soft offline 0x%lx\n", new_pfn[i]); + } + + for (i = 0; i < nr_pages; i++){ + if (old_pfn[i] == new_pfn[i]){ + printf("pfn matches, softoffline failed\n"); + return -1; + } + } + printf("Softoffline succeeded!\n"); + return 0; +} From 80968148d46d6912fe347aaaa8c2ab80f6a969fc Mon Sep 17 00:00:00 2001 From: Harish Date: Fri, 17 Jan 2020 09:53:12 +0530 Subject: [PATCH 2/2] Add test to validate migration time between THP and base page Patch adds test to validate and compare migration time taken between THPs and base pages Signed-off-by: Harish --- memory/numa_test.py | 15 +- memory/numa_test.py.data/Makefile | 5 +- memory/numa_test.py.data/bench_movepages.c | 159 +++++++++++++++++++++ memory/numa_test.py.data/util.c | 59 ++++++++ 4 files changed, 235 insertions(+), 3 deletions(-) create mode 100644 memory/numa_test.py.data/bench_movepages.c diff --git a/memory/numa_test.py b/memory/numa_test.py index 7ff5ac7df..9f2f693c4 100644 --- a/memory/numa_test.py +++ b/memory/numa_test.py @@ -82,7 +82,7 @@ def setUp(self): self.cancel('%s is needed for the test to be run' % package) for file_name in ['util.c', 'numa_test.c', 'softoffline.c', - 'Makefile']: + 'bench_movepages.c', 'Makefile']: self.copyutil(file_name) build.make(self.teststmpdir) @@ -112,6 +112,19 @@ def test_softoffline(self): if ret != 0: self.fail('Please check the logs for failure') + def test_thp_compare(self): + """ + Test PFN's before and after offlining + """ + self.nr_pages = self.params.get( + 'nr_pages', default=100) + os.chdir(self.teststmpdir) + self.log.info("Starting test...") + cmd = './bench_movepages -n %s' % self.nr_pages + ret = process.system(cmd, shell=True, sudo=True, ignore_status=True) + if ret != 0: + self.fail('Please check the logs for failure') + if __name__ == "__main__": main() diff --git a/memory/numa_test.py.data/Makefile b/memory/numa_test.py.data/Makefile index 001cdeb4c..ffe61c3e4 100644 --- a/memory/numa_test.py.data/Makefile +++ b/memory/numa_test.py.data/Makefile @@ -1,9 +1,10 @@ BIN=numa_test OFFL=softoffline -all: ${BIN} ${OFFL} +BENCH=bench_movepages +all: ${BIN} ${OFFL} ${BENCH} %: %.c util.c cc -o $@ $^ -lpthread -lnuma -lhugetlbfs clean: - rm ${BIN} ${OFFL} + rm ${BIN} ${OFFL} ${BENCH} diff --git a/memory/numa_test.py.data/bench_movepages.c b/memory/numa_test.py.data/bench_movepages.c new file mode 100644 index 000000000..2d715a0e8 --- /dev/null +++ b/memory/numa_test.py.data/bench_movepages.c @@ -0,0 +1,159 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE for more details. + * Copyright: 2020 IBM + * Author: Aneesh Kumar K.V + * Author: Harish + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define errmsg(x, ...) fprintf(stderr, x, ##__VA_ARGS__),exit(1) + +extern int is_thp(unsigned long pfn); +extern unsigned long get_pfn(void *addr); +extern unsigned long get_first_mem_node(void); +extern unsigned long get_next_mem_node(unsigned long node); + +int verbose; +void **addrs; +int nr_pages; /* number of pages in page size */ +int page_size; +int hpage_size; +unsigned long dest_node; +int *status, *nodes; + +double test_migration(void *p, char *msg) +{ + int non_thp = 0; + int i, thp, ret; + unsigned long pfn; + double time; + struct timespec ts_start, ts_end; + + if (verbose) + fprintf(stderr, "%s\n", msg); + for (i = 0; i < nr_pages; i++) { + addrs[i] = p + (i * page_size); + nodes[i] = dest_node; + status[i] = 0; + pfn = get_pfn(p + (i* page_size)); + if (pfn) { + if (!non_thp && !is_thp(pfn)) + non_thp = 1; + if (verbose) + fprintf(stderr, "pfn before move_pages 0x%lx is_thp %d\n", + pfn, is_thp(pfn)); + } + } + + clock_gettime(CLOCK_MONOTONIC, &ts_start); + ret = numa_move_pages(0, nr_pages, addrs, nodes, status, MPOL_MF_MOVE_ALL); + if (ret == -1) + errmsg("Failed move_pages\n"); + clock_gettime(CLOCK_MONOTONIC, &ts_end); + + for (i = 0; i < nr_pages; i++) { + pfn = get_pfn(p + (i* page_size)); + if (pfn && verbose) + fprintf(stderr, "pfn after move_pages 0x%lx is_thp %d\n", pfn, is_thp(pfn)); + } + time = ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9; + printf("%s time(seconds) (Non THP = %d) = %.6f\n", msg, non_thp, time); + return time; +} + +int main(int argc, char *argv[]) +{ + int c; + void *hp, *p; + int mapflag = MAP_ANONYMOUS | MAP_PRIVATE; + int protflag = PROT_READ|PROT_WRITE; + unsigned long nr_nodes = numa_max_node() + 1; + struct bitmask *all_nodes, *old_nodes; + unsigned long src_node; + double thp_time, bp_time; + + page_size = getpagesize(); + hpage_size = gethugepagesize(); + + while ((c = getopt(argc, argv, "n:vh")) != -1) { + switch(c) { + case 'n': + nr_pages = strtoul(optarg, NULL, 10); + /* Now update nr_pages using system page size */ + nr_pages = nr_pages * hpage_size/page_size; + break; + case 'h': + errmsg("%s -n \n", argv[0]); + break; + case 'v': + verbose = 1; + break; + default: + errmsg("invalid option\n"); + break; + } + } + + if (nr_nodes < 2) + errmsg("A minimum of 2 nodes is required for this test.\n"); + + + all_nodes = numa_bitmask_alloc(nr_nodes); + old_nodes = numa_bitmask_alloc(nr_nodes); + src_node = get_first_mem_node(); + dest_node = get_next_mem_node(src_node); + printf("src node = %ld and dest node = %ld\n", src_node, dest_node); + + numa_bitmask_setbit(all_nodes, src_node); + numa_bitmask_setbit(all_nodes, dest_node); + numa_bitmask_setbit(old_nodes, src_node); + + numa_sched_setaffinity(0, old_nodes); + addrs = malloc(sizeof(char *) * nr_pages + 1); + status = malloc(sizeof(char *) * nr_pages + 1); + nodes = malloc(sizeof(char *) * nr_pages + 1); + + p = aligned_alloc(page_size, nr_pages *page_size); + if (p == NULL) + errmsg("Failed mmap\n"); + + hp = aligned_alloc(hpage_size, nr_pages *page_size); + if (hp == NULL) + errmsg("Failed mmap\n"); + + madvise(hp, nr_pages * page_size, MADV_HUGEPAGE); + madvise(p, nr_pages * page_size, MADV_NOHUGEPAGE); + + memset(p, 'a', nr_pages * page_size); + memset(hp, 'a', nr_pages * page_size); + + numa_sched_setaffinity(0, all_nodes); + + thp_time = test_migration(hp, "THP migration"); + bp_time = test_migration(p, "Base migration"); + + if (bp_time >= thp_time) + errmsg("Base page migration took more time\n"); + return 0; +} diff --git a/memory/numa_test.py.data/util.c b/memory/numa_test.py.data/util.c index 0bcf777b0..017e13973 100644 --- a/memory/numa_test.py.data/util.c +++ b/memory/numa_test.py.data/util.c @@ -19,10 +19,17 @@ #include #include #include +#include #define PMAP_ENTRY_SIZE sizeof(unsigned long) #define PM_PFRAME_MASK 0x007FFFFFFFFFFFFFUL #define PM_PRESENT 0x8000000000000000UL +#define KPFLAGS_ENTRY_SIZE sizeof(unsigned long) +#define KPF_THP_FLAG (1UL<<22) + + +static int pagemap_fd = -1; +static int kpageflags_fd = -1; unsigned long get_pfn(unsigned long addr) { @@ -85,3 +92,55 @@ int *get_numa_nodes_to_use(int max_node, unsigned long memory_to_use) } return nodes_to_use; } + +int is_thp(unsigned long pfn) +{ + unsigned long page_flags_entry; + unsigned long page_flags_offset; + + + if (kpageflags_fd == -1) { + kpageflags_fd = open("/proc/kpageflags", O_RDONLY); + if (kpageflags_fd == -1) + return 0; + } + + page_flags_offset = pfn * KPFLAGS_ENTRY_SIZE; + + if (pread(kpageflags_fd, &page_flags_entry, KPFLAGS_ENTRY_SIZE, page_flags_offset) == -1) { + printf("%s Failed to read\n", __func__); + goto err_out; + } + return !!(page_flags_entry & KPF_THP_FLAG); + +err_out: + return 0; +} + +unsigned long get_next_mem_node(unsigned long node) +{ + + long node_size; + unsigned long i; + unsigned long max_node = numa_max_node(); + /* + * start from node and find the next memory node + */ +restart: + for (i = node + 1; i <= max_node; i++) { + node_size = numa_node_size(i, NULL); + if (node_size > 0) + return i; + } + /* But how can we run without memory? */ + if (node == -1) + return 0; + + node = -1; + goto restart; +} + +unsigned long get_first_mem_node(void) +{ + return get_next_mem_node(-1); +}