Skip to content

Commit 275220f

Browse files
committed
Merge branch 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block: (43 commits) block: ensure that completion error gets properly traced blktrace: add missing probe argument to block_bio_complete block cfq: don't use atomic_t for cfq_group block cfq: don't use atomic_t for cfq_queue block: trace event block fix unassigned field block: add internal hd part table references block: fix accounting bug on cross partition merges kref: add kref_test_and_get bio-integrity: mark kintegrityd_wq highpri and CPU intensive block: make kblockd_workqueue smarter Revert "sd: implement sd_check_events()" block: Clean up exit_io_context() source code. Fix compile warnings due to missing removal of a 'ret' variable fs/block: type signature of major_to_index(int) to major_to_index(unsigned) block: convert !IS_ERR(p) && p to !IS_ERR_NOR_NULL(p) cfq-iosched: don't check cfqg in choose_service_tree() fs/splice: Pull buf->ops->confirm() from splice_from_pipe actors cdrom: export cdrom_check_events() sd: implement sd_check_events() sr: implement sr_check_events() ...
2 parents fe3c560 + 81c5e2a commit 275220f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1323
-1085
lines changed

Documentation/cgroups/blkio-controller.txt

+27
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,33 @@ Throttling/Upper Limit policy
8989

9090
Limits for writes can be put using blkio.write_bps_device file.
9191

92+
Hierarchical Cgroups
93+
====================
94+
- Currently none of the IO control policy supports hierarhical groups. But
95+
cgroup interface does allow creation of hierarhical cgroups and internally
96+
IO policies treat them as flat hierarchy.
97+
98+
So this patch will allow creation of cgroup hierarhcy but at the backend
99+
everything will be treated as flat. So if somebody created a hierarchy like
100+
as follows.
101+
102+
root
103+
/ \
104+
test1 test2
105+
|
106+
test3
107+
108+
CFQ and throttling will practically treat all groups at same level.
109+
110+
pivot
111+
/ | \ \
112+
root test1 test2 test3
113+
114+
Down the line we can implement hierarchical accounting/control support
115+
and also introduce a new cgroup file "use_hierarchy" which will control
116+
whether cgroup hierarchy is viewed as flat or hierarchical by the policy..
117+
This is how memory controller also has implemented the things.
118+
92119
Various user visible config options
93120
===================================
94121
CONFIG_BLK_CGROUP

block/blk-cgroup.c

-4
Original file line numberDiff line numberDiff line change
@@ -1452,10 +1452,6 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
14521452
goto done;
14531453
}
14541454

1455-
/* Currently we do not support hierarchy deeper than two level (0,1) */
1456-
if (parent != cgroup->top_cgroup)
1457-
return ERR_PTR(-EPERM);
1458-
14591455
blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
14601456
if (!blkcg)
14611457
return ERR_PTR(-ENOMEM);

block/blk-core.c

+29-11
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
#include "blk.h"
3535

36-
EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
36+
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
3737
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
3838
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
3939

@@ -64,13 +64,27 @@ static void drive_stat_acct(struct request *rq, int new_io)
6464
return;
6565

6666
cpu = part_stat_lock();
67-
part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
6867

69-
if (!new_io)
68+
if (!new_io) {
69+
part = rq->part;
7070
part_stat_inc(cpu, part, merges[rw]);
71-
else {
71+
} else {
72+
part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
73+
if (!hd_struct_try_get(part)) {
74+
/*
75+
* The partition is already being removed,
76+
* the request will be accounted on the disk only
77+
*
78+
* We take a reference on disk->part0 although that
79+
* partition will never be deleted, so we can treat
80+
* it as any other partition.
81+
*/
82+
part = &rq->rq_disk->part0;
83+
hd_struct_get(part);
84+
}
7285
part_round_stats(cpu, part);
7386
part_inc_in_flight(part, rw);
87+
rq->part = part;
7488
}
7589

7690
part_stat_unlock();
@@ -128,6 +142,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
128142
rq->ref_count = 1;
129143
rq->start_time = jiffies;
130144
set_start_time_ns(rq);
145+
rq->part = NULL;
131146
}
132147
EXPORT_SYMBOL(blk_rq_init);
133148

@@ -1329,9 +1344,9 @@ static inline void blk_partition_remap(struct bio *bio)
13291344
bio->bi_sector += p->start_sect;
13301345
bio->bi_bdev = bdev->bd_contains;
13311346

1332-
trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
1333-
bdev->bd_dev,
1334-
bio->bi_sector - p->start_sect);
1347+
trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1348+
bdev->bd_dev,
1349+
bio->bi_sector - p->start_sect);
13351350
}
13361351
}
13371352

@@ -1500,7 +1515,7 @@ static inline void __generic_make_request(struct bio *bio)
15001515
goto end_io;
15011516

15021517
if (old_sector != -1)
1503-
trace_block_remap(q, bio, old_dev, old_sector);
1518+
trace_block_bio_remap(q, bio, old_dev, old_sector);
15041519

15051520
old_sector = bio->bi_sector;
15061521
old_dev = bio->bi_bdev->bd_dev;
@@ -1776,7 +1791,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
17761791
int cpu;
17771792

17781793
cpu = part_stat_lock();
1779-
part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
1794+
part = req->part;
17801795
part_stat_add(cpu, part, sectors[rw], bytes >> 9);
17811796
part_stat_unlock();
17821797
}
@@ -1796,13 +1811,14 @@ static void blk_account_io_done(struct request *req)
17961811
int cpu;
17971812

17981813
cpu = part_stat_lock();
1799-
part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
1814+
part = req->part;
18001815

18011816
part_stat_inc(cpu, part, ios[rw]);
18021817
part_stat_add(cpu, part, ticks[rw], duration);
18031818
part_round_stats(cpu, part);
18041819
part_dec_in_flight(part, rw);
18051820

1821+
hd_struct_put(part);
18061822
part_stat_unlock();
18071823
}
18081824
}
@@ -2606,7 +2622,9 @@ int __init blk_dev_init(void)
26062622
BUILD_BUG_ON(__REQ_NR_BITS > 8 *
26072623
sizeof(((struct request *)0)->cmd_flags));
26082624

2609-
kblockd_workqueue = create_workqueue("kblockd");
2625+
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
2626+
kblockd_workqueue = alloc_workqueue("kblockd",
2627+
WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
26102628
if (!kblockd_workqueue)
26112629
panic("Failed to create kblockd\n");
26122630

block/blk-ioc.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ static void cfq_exit(struct io_context *ioc)
6464
rcu_read_unlock();
6565
}
6666

67-
/* Called by the exitting task */
67+
/* Called by the exiting task */
6868
void exit_io_context(struct task_struct *task)
6969
{
7070
struct io_context *ioc;
@@ -74,10 +74,9 @@ void exit_io_context(struct task_struct *task)
7474
task->io_context = NULL;
7575
task_unlock(task);
7676

77-
if (atomic_dec_and_test(&ioc->nr_tasks)) {
77+
if (atomic_dec_and_test(&ioc->nr_tasks))
7878
cfq_exit(ioc);
7979

80-
}
8180
put_io_context(ioc);
8281
}
8382

block/blk-merge.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -351,11 +351,12 @@ static void blk_account_io_merge(struct request *req)
351351
int cpu;
352352

353353
cpu = part_stat_lock();
354-
part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
354+
part = req->part;
355355

356356
part_round_stats(cpu, part);
357357
part_dec_in_flight(part, rq_data_dir(req));
358358

359+
hd_struct_put(part);
359360
part_stat_unlock();
360361
}
361362
}

0 commit comments

Comments
 (0)