@@ -6557,7 +6557,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6557
6557
break ;
6558
6558
}
6559
6559
6560
- if (!group -> cpu_power ) {
6560
+ if (!group -> sgp -> power ) {
6561
6561
printk (KERN_CONT "\n" );
6562
6562
printk (KERN_ERR "ERROR: domain->cpu_power not "
6563
6563
"set\n" );
@@ -6581,9 +6581,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6581
6581
cpulist_scnprintf (str , sizeof (str ), sched_group_cpus (group ));
6582
6582
6583
6583
printk (KERN_CONT " %s" , str );
6584
- if (group -> cpu_power != SCHED_POWER_SCALE ) {
6584
+ if (group -> sgp -> power != SCHED_POWER_SCALE ) {
6585
6585
printk (KERN_CONT " (cpu_power = %d)" ,
6586
- group -> cpu_power );
6586
+ group -> sgp -> power );
6587
6587
}
6588
6588
6589
6589
group = group -> next ;
@@ -6774,11 +6774,39 @@ static struct root_domain *alloc_rootdomain(void)
6774
6774
return rd ;
6775
6775
}
6776
6776
6777
+ static void free_sched_groups (struct sched_group * sg , int free_sgp )
6778
+ {
6779
+ struct sched_group * tmp , * first ;
6780
+
6781
+ if (!sg )
6782
+ return ;
6783
+
6784
+ first = sg ;
6785
+ do {
6786
+ tmp = sg -> next ;
6787
+
6788
+ if (free_sgp && atomic_dec_and_test (& sg -> sgp -> ref ))
6789
+ kfree (sg -> sgp );
6790
+
6791
+ kfree (sg );
6792
+ sg = tmp ;
6793
+ } while (sg != first );
6794
+ }
6795
+
6777
6796
static void free_sched_domain (struct rcu_head * rcu )
6778
6797
{
6779
6798
struct sched_domain * sd = container_of (rcu , struct sched_domain , rcu );
6780
- if (atomic_dec_and_test (& sd -> groups -> ref ))
6799
+
6800
+ /*
6801
+ * If its an overlapping domain it has private groups, iterate and
6802
+ * nuke them all.
6803
+ */
6804
+ if (sd -> flags & SD_OVERLAP ) {
6805
+ free_sched_groups (sd -> groups , 1 );
6806
+ } else if (atomic_dec_and_test (& sd -> groups -> ref )) {
6807
+ kfree (sd -> groups -> sgp );
6781
6808
kfree (sd -> groups );
6809
+ }
6782
6810
kfree (sd );
6783
6811
}
6784
6812
@@ -6945,6 +6973,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
6945
6973
struct sd_data {
6946
6974
struct sched_domain * * __percpu sd ;
6947
6975
struct sched_group * * __percpu sg ;
6976
+ struct sched_group_power * * __percpu sgp ;
6948
6977
};
6949
6978
6950
6979
struct s_data {
@@ -6964,15 +6993,73 @@ struct sched_domain_topology_level;
6964
6993
typedef struct sched_domain * (* sched_domain_init_f )(struct sched_domain_topology_level * tl , int cpu );
6965
6994
typedef const struct cpumask * (* sched_domain_mask_f )(int cpu );
6966
6995
6996
+ #define SDTL_OVERLAP 0x01
6997
+
6967
6998
struct sched_domain_topology_level {
6968
6999
sched_domain_init_f init ;
6969
7000
sched_domain_mask_f mask ;
7001
+ int flags ;
6970
7002
struct sd_data data ;
6971
7003
};
6972
7004
6973
- /*
6974
- * Assumes the sched_domain tree is fully constructed
6975
- */
7005
+ static int
7006
+ build_overlap_sched_groups (struct sched_domain * sd , int cpu )
7007
+ {
7008
+ struct sched_group * first = NULL , * last = NULL , * groups = NULL , * sg ;
7009
+ const struct cpumask * span = sched_domain_span (sd );
7010
+ struct cpumask * covered = sched_domains_tmpmask ;
7011
+ struct sd_data * sdd = sd -> private ;
7012
+ struct sched_domain * child ;
7013
+ int i ;
7014
+
7015
+ cpumask_clear (covered );
7016
+
7017
+ for_each_cpu (i , span ) {
7018
+ struct cpumask * sg_span ;
7019
+
7020
+ if (cpumask_test_cpu (i , covered ))
7021
+ continue ;
7022
+
7023
+ sg = kzalloc_node (sizeof (struct sched_group ) + cpumask_size (),
7024
+ GFP_KERNEL , cpu_to_node (i ));
7025
+
7026
+ if (!sg )
7027
+ goto fail ;
7028
+
7029
+ sg_span = sched_group_cpus (sg );
7030
+
7031
+ child = * per_cpu_ptr (sdd -> sd , i );
7032
+ if (child -> child ) {
7033
+ child = child -> child ;
7034
+ cpumask_copy (sg_span , sched_domain_span (child ));
7035
+ } else
7036
+ cpumask_set_cpu (i , sg_span );
7037
+
7038
+ cpumask_or (covered , covered , sg_span );
7039
+
7040
+ sg -> sgp = * per_cpu_ptr (sdd -> sgp , cpumask_first (sg_span ));
7041
+ atomic_inc (& sg -> sgp -> ref );
7042
+
7043
+ if (cpumask_test_cpu (cpu , sg_span ))
7044
+ groups = sg ;
7045
+
7046
+ if (!first )
7047
+ first = sg ;
7048
+ if (last )
7049
+ last -> next = sg ;
7050
+ last = sg ;
7051
+ last -> next = first ;
7052
+ }
7053
+ sd -> groups = groups ;
7054
+
7055
+ return 0 ;
7056
+
7057
+ fail :
7058
+ free_sched_groups (first , 0 );
7059
+
7060
+ return - ENOMEM ;
7061
+ }
7062
+
6976
7063
static int get_group (int cpu , struct sd_data * sdd , struct sched_group * * sg )
6977
7064
{
6978
7065
struct sched_domain * sd = * per_cpu_ptr (sdd -> sd , cpu );
@@ -6981,31 +7068,37 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
6981
7068
if (child )
6982
7069
cpu = cpumask_first (sched_domain_span (child ));
6983
7070
6984
- if (sg )
7071
+ if (sg ) {
6985
7072
* sg = * per_cpu_ptr (sdd -> sg , cpu );
7073
+ (* sg )-> sgp = * per_cpu_ptr (sdd -> sgp , cpu );
7074
+ atomic_set (& (* sg )-> sgp -> ref , 1 ); /* for claim_allocations */
7075
+ }
6986
7076
6987
7077
return cpu ;
6988
7078
}
6989
7079
6990
7080
/*
6991
- * build_sched_groups takes the cpumask we wish to span, and a pointer
6992
- * to a function which identifies what group(along with sched group) a CPU
6993
- * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
6994
- * (due to the fact that we keep track of groups covered with a struct cpumask).
6995
- *
6996
7081
* build_sched_groups will build a circular linked list of the groups
6997
7082
* covered by the given span, and will set each group's ->cpumask correctly,
6998
7083
* and ->cpu_power to 0.
7084
+ *
7085
+ * Assumes the sched_domain tree is fully constructed
6999
7086
*/
7000
- static void
7001
- build_sched_groups (struct sched_domain * sd )
7087
+ static int
7088
+ build_sched_groups (struct sched_domain * sd , int cpu )
7002
7089
{
7003
7090
struct sched_group * first = NULL , * last = NULL ;
7004
7091
struct sd_data * sdd = sd -> private ;
7005
7092
const struct cpumask * span = sched_domain_span (sd );
7006
7093
struct cpumask * covered ;
7007
7094
int i ;
7008
7095
7096
+ get_group (cpu , sdd , & sd -> groups );
7097
+ atomic_inc (& sd -> groups -> ref );
7098
+
7099
+ if (cpu != cpumask_first (sched_domain_span (sd )))
7100
+ return 0 ;
7101
+
7009
7102
lockdep_assert_held (& sched_domains_mutex );
7010
7103
covered = sched_domains_tmpmask ;
7011
7104
@@ -7020,7 +7113,7 @@ build_sched_groups(struct sched_domain *sd)
7020
7113
continue ;
7021
7114
7022
7115
cpumask_clear (sched_group_cpus (sg ));
7023
- sg -> cpu_power = 0 ;
7116
+ sg -> sgp -> power = 0 ;
7024
7117
7025
7118
for_each_cpu (j , span ) {
7026
7119
if (get_group (j , sdd , NULL ) != group )
@@ -7037,6 +7130,8 @@ build_sched_groups(struct sched_domain *sd)
7037
7130
last = sg ;
7038
7131
}
7039
7132
last -> next = first ;
7133
+
7134
+ return 0 ;
7040
7135
}
7041
7136
7042
7137
/*
@@ -7051,12 +7146,17 @@ build_sched_groups(struct sched_domain *sd)
7051
7146
*/
7052
7147
static void init_sched_groups_power (int cpu , struct sched_domain * sd )
7053
7148
{
7054
- WARN_ON (! sd || ! sd -> groups ) ;
7149
+ struct sched_group * sg = sd -> groups ;
7055
7150
7056
- if (cpu != group_first_cpu (sd -> groups ))
7057
- return ;
7151
+ WARN_ON (!sd || !sg );
7058
7152
7059
- sd -> groups -> group_weight = cpumask_weight (sched_group_cpus (sd -> groups ));
7153
+ do {
7154
+ sg -> group_weight = cpumask_weight (sched_group_cpus (sg ));
7155
+ sg = sg -> next ;
7156
+ } while (sg != sd -> groups );
7157
+
7158
+ if (cpu != group_first_cpu (sg ))
7159
+ return ;
7060
7160
7061
7161
update_group_power (sd , cpu );
7062
7162
}
@@ -7177,15 +7277,15 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
7177
7277
static void claim_allocations (int cpu , struct sched_domain * sd )
7178
7278
{
7179
7279
struct sd_data * sdd = sd -> private ;
7180
- struct sched_group * sg = sd -> groups ;
7181
7280
7182
7281
WARN_ON_ONCE (* per_cpu_ptr (sdd -> sd , cpu ) != sd );
7183
7282
* per_cpu_ptr (sdd -> sd , cpu ) = NULL ;
7184
7283
7185
- if (cpu == cpumask_first (sched_group_cpus (sg ))) {
7186
- WARN_ON_ONCE (* per_cpu_ptr (sdd -> sg , cpu ) != sg );
7284
+ if (atomic_read (& (* per_cpu_ptr (sdd -> sg , cpu ))-> ref ))
7187
7285
* per_cpu_ptr (sdd -> sg , cpu ) = NULL ;
7188
- }
7286
+
7287
+ if (atomic_read (& (* per_cpu_ptr (sdd -> sgp , cpu ))-> ref ))
7288
+ * per_cpu_ptr (sdd -> sgp , cpu ) = NULL ;
7189
7289
}
7190
7290
7191
7291
#ifdef CONFIG_SCHED_SMT
@@ -7210,7 +7310,7 @@ static struct sched_domain_topology_level default_topology[] = {
7210
7310
#endif
7211
7311
{ sd_init_CPU , cpu_cpu_mask , },
7212
7312
#ifdef CONFIG_NUMA
7213
- { sd_init_NODE , cpu_node_mask , },
7313
+ { sd_init_NODE , cpu_node_mask , SDTL_OVERLAP , },
7214
7314
{ sd_init_ALLNODES , cpu_allnodes_mask , },
7215
7315
#endif
7216
7316
{ NULL , },
@@ -7234,9 +7334,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
7234
7334
if (!sdd -> sg )
7235
7335
return - ENOMEM ;
7236
7336
7337
+ sdd -> sgp = alloc_percpu (struct sched_group_power * );
7338
+ if (!sdd -> sgp )
7339
+ return - ENOMEM ;
7340
+
7237
7341
for_each_cpu (j , cpu_map ) {
7238
7342
struct sched_domain * sd ;
7239
7343
struct sched_group * sg ;
7344
+ struct sched_group_power * sgp ;
7240
7345
7241
7346
sd = kzalloc_node (sizeof (struct sched_domain ) + cpumask_size (),
7242
7347
GFP_KERNEL , cpu_to_node (j ));
@@ -7251,6 +7356,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
7251
7356
return - ENOMEM ;
7252
7357
7253
7358
* per_cpu_ptr (sdd -> sg , j ) = sg ;
7359
+
7360
+ sgp = kzalloc_node (sizeof (struct sched_group_power ),
7361
+ GFP_KERNEL , cpu_to_node (j ));
7362
+ if (!sgp )
7363
+ return - ENOMEM ;
7364
+
7365
+ * per_cpu_ptr (sdd -> sgp , j ) = sgp ;
7254
7366
}
7255
7367
}
7256
7368
@@ -7266,11 +7378,15 @@ static void __sdt_free(const struct cpumask *cpu_map)
7266
7378
struct sd_data * sdd = & tl -> data ;
7267
7379
7268
7380
for_each_cpu (j , cpu_map ) {
7269
- kfree (* per_cpu_ptr (sdd -> sd , j ));
7381
+ struct sched_domain * sd = * per_cpu_ptr (sdd -> sd , j );
7382
+ if (sd && (sd -> flags & SD_OVERLAP ))
7383
+ free_sched_groups (sd -> groups , 0 );
7270
7384
kfree (* per_cpu_ptr (sdd -> sg , j ));
7385
+ kfree (* per_cpu_ptr (sdd -> sgp , j ));
7271
7386
}
7272
7387
free_percpu (sdd -> sd );
7273
7388
free_percpu (sdd -> sg );
7389
+ free_percpu (sdd -> sgp );
7274
7390
}
7275
7391
}
7276
7392
@@ -7316,8 +7432,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
7316
7432
struct sched_domain_topology_level * tl ;
7317
7433
7318
7434
sd = NULL ;
7319
- for (tl = sched_domain_topology ; tl -> init ; tl ++ )
7435
+ for (tl = sched_domain_topology ; tl -> init ; tl ++ ) {
7320
7436
sd = build_sched_domain (tl , & d , cpu_map , attr , sd , i );
7437
+ if (tl -> flags & SDTL_OVERLAP || sched_feat (FORCE_SD_OVERLAP ))
7438
+ sd -> flags |= SD_OVERLAP ;
7439
+ if (cpumask_equal (cpu_map , sched_domain_span (sd )))
7440
+ break ;
7441
+ }
7321
7442
7322
7443
while (sd -> child )
7323
7444
sd = sd -> child ;
@@ -7329,13 +7450,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
7329
7450
for_each_cpu (i , cpu_map ) {
7330
7451
for (sd = * per_cpu_ptr (d .sd , i ); sd ; sd = sd -> parent ) {
7331
7452
sd -> span_weight = cpumask_weight (sched_domain_span (sd ));
7332
- get_group ( i , sd -> private , & sd -> groups );
7333
- atomic_inc ( & sd -> groups -> ref );
7334
-
7335
- if ( i != cpumask_first ( sched_domain_span ( sd )))
7336
- continue ;
7337
-
7338
- build_sched_groups ( sd );
7453
+ if ( sd -> flags & SD_OVERLAP ) {
7454
+ if ( build_overlap_sched_groups ( sd , i ))
7455
+ goto error ;
7456
+ } else {
7457
+ if ( build_sched_groups ( sd , i ))
7458
+ goto error ;
7459
+ }
7339
7460
}
7340
7461
}
7341
7462
0 commit comments