Skip to content

Commit acc11ea

Browse files
committed
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: sched: Avoid creating superfluous NUMA domains on non-NUMA systems sched: Allow for overlapping sched_domain spans sched: Break out cpu_power from the sched_group structure
2 parents 919d25a + d110235 commit acc11ea

File tree

4 files changed

+190
-61
lines changed

4 files changed

+190
-61
lines changed

include/linux/sched.h

+10-4
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,7 @@ enum cpu_idle_type {
844844
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
845845
#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
846846
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
847+
#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
847848

848849
enum powersavings_balance_level {
849850
POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */
@@ -893,16 +894,21 @@ static inline int sd_power_saving_flags(void)
893894
return 0;
894895
}
895896

896-
struct sched_group {
897-
struct sched_group *next; /* Must be a circular list */
897+
struct sched_group_power {
898898
atomic_t ref;
899-
900899
/*
901900
* CPU power of this group, SCHED_LOAD_SCALE being max power for a
902901
* single CPU.
903902
*/
904-
unsigned int cpu_power, cpu_power_orig;
903+
unsigned int power, power_orig;
904+
};
905+
906+
struct sched_group {
907+
struct sched_group *next; /* Must be a circular list */
908+
atomic_t ref;
909+
905910
unsigned int group_weight;
911+
struct sched_group_power *sgp;
906912

907913
/*
908914
* The CPUs this group covers.

kernel/sched.c

+155-34
Original file line numberDiff line numberDiff line change
@@ -6557,7 +6557,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
65576557
break;
65586558
}
65596559

6560-
if (!group->cpu_power) {
6560+
if (!group->sgp->power) {
65616561
printk(KERN_CONT "\n");
65626562
printk(KERN_ERR "ERROR: domain->cpu_power not "
65636563
"set\n");
@@ -6581,9 +6581,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
65816581
cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
65826582

65836583
printk(KERN_CONT " %s", str);
6584-
if (group->cpu_power != SCHED_POWER_SCALE) {
6584+
if (group->sgp->power != SCHED_POWER_SCALE) {
65856585
printk(KERN_CONT " (cpu_power = %d)",
6586-
group->cpu_power);
6586+
group->sgp->power);
65876587
}
65886588

65896589
group = group->next;
@@ -6774,11 +6774,39 @@ static struct root_domain *alloc_rootdomain(void)
67746774
return rd;
67756775
}
67766776

6777+
static void free_sched_groups(struct sched_group *sg, int free_sgp)
6778+
{
6779+
struct sched_group *tmp, *first;
6780+
6781+
if (!sg)
6782+
return;
6783+
6784+
first = sg;
6785+
do {
6786+
tmp = sg->next;
6787+
6788+
if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
6789+
kfree(sg->sgp);
6790+
6791+
kfree(sg);
6792+
sg = tmp;
6793+
} while (sg != first);
6794+
}
6795+
67776796
static void free_sched_domain(struct rcu_head *rcu)
67786797
{
67796798
struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
6780-
if (atomic_dec_and_test(&sd->groups->ref))
6799+
6800+
/*
6801+
* If its an overlapping domain it has private groups, iterate and
6802+
* nuke them all.
6803+
*/
6804+
if (sd->flags & SD_OVERLAP) {
6805+
free_sched_groups(sd->groups, 1);
6806+
} else if (atomic_dec_and_test(&sd->groups->ref)) {
6807+
kfree(sd->groups->sgp);
67816808
kfree(sd->groups);
6809+
}
67826810
kfree(sd);
67836811
}
67846812

@@ -6945,6 +6973,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
69456973
struct sd_data {
69466974
struct sched_domain **__percpu sd;
69476975
struct sched_group **__percpu sg;
6976+
struct sched_group_power **__percpu sgp;
69486977
};
69496978

69506979
struct s_data {
@@ -6964,15 +6993,73 @@ struct sched_domain_topology_level;
69646993
typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
69656994
typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
69666995

6996+
#define SDTL_OVERLAP 0x01
6997+
69676998
struct sched_domain_topology_level {
69686999
sched_domain_init_f init;
69697000
sched_domain_mask_f mask;
7001+
int flags;
69707002
struct sd_data data;
69717003
};
69727004

6973-
/*
6974-
* Assumes the sched_domain tree is fully constructed
6975-
*/
7005+
static int
7006+
build_overlap_sched_groups(struct sched_domain *sd, int cpu)
7007+
{
7008+
struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg;
7009+
const struct cpumask *span = sched_domain_span(sd);
7010+
struct cpumask *covered = sched_domains_tmpmask;
7011+
struct sd_data *sdd = sd->private;
7012+
struct sched_domain *child;
7013+
int i;
7014+
7015+
cpumask_clear(covered);
7016+
7017+
for_each_cpu(i, span) {
7018+
struct cpumask *sg_span;
7019+
7020+
if (cpumask_test_cpu(i, covered))
7021+
continue;
7022+
7023+
sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
7024+
GFP_KERNEL, cpu_to_node(i));
7025+
7026+
if (!sg)
7027+
goto fail;
7028+
7029+
sg_span = sched_group_cpus(sg);
7030+
7031+
child = *per_cpu_ptr(sdd->sd, i);
7032+
if (child->child) {
7033+
child = child->child;
7034+
cpumask_copy(sg_span, sched_domain_span(child));
7035+
} else
7036+
cpumask_set_cpu(i, sg_span);
7037+
7038+
cpumask_or(covered, covered, sg_span);
7039+
7040+
sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
7041+
atomic_inc(&sg->sgp->ref);
7042+
7043+
if (cpumask_test_cpu(cpu, sg_span))
7044+
groups = sg;
7045+
7046+
if (!first)
7047+
first = sg;
7048+
if (last)
7049+
last->next = sg;
7050+
last = sg;
7051+
last->next = first;
7052+
}
7053+
sd->groups = groups;
7054+
7055+
return 0;
7056+
7057+
fail:
7058+
free_sched_groups(first, 0);
7059+
7060+
return -ENOMEM;
7061+
}
7062+
69767063
static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
69777064
{
69787065
struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
@@ -6981,31 +7068,37 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
69817068
if (child)
69827069
cpu = cpumask_first(sched_domain_span(child));
69837070

6984-
if (sg)
7071+
if (sg) {
69857072
*sg = *per_cpu_ptr(sdd->sg, cpu);
7073+
(*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
7074+
atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
7075+
}
69867076

69877077
return cpu;
69887078
}
69897079

69907080
/*
6991-
* build_sched_groups takes the cpumask we wish to span, and a pointer
6992-
* to a function which identifies what group(along with sched group) a CPU
6993-
* belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
6994-
* (due to the fact that we keep track of groups covered with a struct cpumask).
6995-
*
69967081
* build_sched_groups will build a circular linked list of the groups
69977082
* covered by the given span, and will set each group's ->cpumask correctly,
69987083
* and ->cpu_power to 0.
7084+
*
7085+
* Assumes the sched_domain tree is fully constructed
69997086
*/
7000-
static void
7001-
build_sched_groups(struct sched_domain *sd)
7087+
static int
7088+
build_sched_groups(struct sched_domain *sd, int cpu)
70027089
{
70037090
struct sched_group *first = NULL, *last = NULL;
70047091
struct sd_data *sdd = sd->private;
70057092
const struct cpumask *span = sched_domain_span(sd);
70067093
struct cpumask *covered;
70077094
int i;
70087095

7096+
get_group(cpu, sdd, &sd->groups);
7097+
atomic_inc(&sd->groups->ref);
7098+
7099+
if (cpu != cpumask_first(sched_domain_span(sd)))
7100+
return 0;
7101+
70097102
lockdep_assert_held(&sched_domains_mutex);
70107103
covered = sched_domains_tmpmask;
70117104

@@ -7020,7 +7113,7 @@ build_sched_groups(struct sched_domain *sd)
70207113
continue;
70217114

70227115
cpumask_clear(sched_group_cpus(sg));
7023-
sg->cpu_power = 0;
7116+
sg->sgp->power = 0;
70247117

70257118
for_each_cpu(j, span) {
70267119
if (get_group(j, sdd, NULL) != group)
@@ -7037,6 +7130,8 @@ build_sched_groups(struct sched_domain *sd)
70377130
last = sg;
70387131
}
70397132
last->next = first;
7133+
7134+
return 0;
70407135
}
70417136

70427137
/*
@@ -7051,12 +7146,17 @@ build_sched_groups(struct sched_domain *sd)
70517146
*/
70527147
static void init_sched_groups_power(int cpu, struct sched_domain *sd)
70537148
{
7054-
WARN_ON(!sd || !sd->groups);
7149+
struct sched_group *sg = sd->groups;
70557150

7056-
if (cpu != group_first_cpu(sd->groups))
7057-
return;
7151+
WARN_ON(!sd || !sg);
70587152

7059-
sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
7153+
do {
7154+
sg->group_weight = cpumask_weight(sched_group_cpus(sg));
7155+
sg = sg->next;
7156+
} while (sg != sd->groups);
7157+
7158+
if (cpu != group_first_cpu(sg))
7159+
return;
70607160

70617161
update_group_power(sd, cpu);
70627162
}
@@ -7177,15 +7277,15 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
71777277
static void claim_allocations(int cpu, struct sched_domain *sd)
71787278
{
71797279
struct sd_data *sdd = sd->private;
7180-
struct sched_group *sg = sd->groups;
71817280

71827281
WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
71837282
*per_cpu_ptr(sdd->sd, cpu) = NULL;
71847283

7185-
if (cpu == cpumask_first(sched_group_cpus(sg))) {
7186-
WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
7284+
if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
71877285
*per_cpu_ptr(sdd->sg, cpu) = NULL;
7188-
}
7286+
7287+
if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
7288+
*per_cpu_ptr(sdd->sgp, cpu) = NULL;
71897289
}
71907290

71917291
#ifdef CONFIG_SCHED_SMT
@@ -7210,7 +7310,7 @@ static struct sched_domain_topology_level default_topology[] = {
72107310
#endif
72117311
{ sd_init_CPU, cpu_cpu_mask, },
72127312
#ifdef CONFIG_NUMA
7213-
{ sd_init_NODE, cpu_node_mask, },
7313+
{ sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, },
72147314
{ sd_init_ALLNODES, cpu_allnodes_mask, },
72157315
#endif
72167316
{ NULL, },
@@ -7234,9 +7334,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
72347334
if (!sdd->sg)
72357335
return -ENOMEM;
72367336

7337+
sdd->sgp = alloc_percpu(struct sched_group_power *);
7338+
if (!sdd->sgp)
7339+
return -ENOMEM;
7340+
72377341
for_each_cpu(j, cpu_map) {
72387342
struct sched_domain *sd;
72397343
struct sched_group *sg;
7344+
struct sched_group_power *sgp;
72407345

72417346
sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
72427347
GFP_KERNEL, cpu_to_node(j));
@@ -7251,6 +7356,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
72517356
return -ENOMEM;
72527357

72537358
*per_cpu_ptr(sdd->sg, j) = sg;
7359+
7360+
sgp = kzalloc_node(sizeof(struct sched_group_power),
7361+
GFP_KERNEL, cpu_to_node(j));
7362+
if (!sgp)
7363+
return -ENOMEM;
7364+
7365+
*per_cpu_ptr(sdd->sgp, j) = sgp;
72547366
}
72557367
}
72567368

@@ -7266,11 +7378,15 @@ static void __sdt_free(const struct cpumask *cpu_map)
72667378
struct sd_data *sdd = &tl->data;
72677379

72687380
for_each_cpu(j, cpu_map) {
7269-
kfree(*per_cpu_ptr(sdd->sd, j));
7381+
struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
7382+
if (sd && (sd->flags & SD_OVERLAP))
7383+
free_sched_groups(sd->groups, 0);
72707384
kfree(*per_cpu_ptr(sdd->sg, j));
7385+
kfree(*per_cpu_ptr(sdd->sgp, j));
72717386
}
72727387
free_percpu(sdd->sd);
72737388
free_percpu(sdd->sg);
7389+
free_percpu(sdd->sgp);
72747390
}
72757391
}
72767392

@@ -7316,8 +7432,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
73167432
struct sched_domain_topology_level *tl;
73177433

73187434
sd = NULL;
7319-
for (tl = sched_domain_topology; tl->init; tl++)
7435+
for (tl = sched_domain_topology; tl->init; tl++) {
73207436
sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
7437+
if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP))
7438+
sd->flags |= SD_OVERLAP;
7439+
if (cpumask_equal(cpu_map, sched_domain_span(sd)))
7440+
break;
7441+
}
73217442

73227443
while (sd->child)
73237444
sd = sd->child;
@@ -7329,13 +7450,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
73297450
for_each_cpu(i, cpu_map) {
73307451
for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
73317452
sd->span_weight = cpumask_weight(sched_domain_span(sd));
7332-
get_group(i, sd->private, &sd->groups);
7333-
atomic_inc(&sd->groups->ref);
7334-
7335-
if (i != cpumask_first(sched_domain_span(sd)))
7336-
continue;
7337-
7338-
build_sched_groups(sd);
7453+
if (sd->flags & SD_OVERLAP) {
7454+
if (build_overlap_sched_groups(sd, i))
7455+
goto error;
7456+
} else {
7457+
if (build_sched_groups(sd, i))
7458+
goto error;
7459+
}
73397460
}
73407461
}
73417462

0 commit comments

Comments
 (0)