forked from filebench/filebench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfb_localfs.c
685 lines (592 loc) · 16.1 KB
/
fb_localfs.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Portions Copyright 2008 Denis Cheng
*/
#include "config.h"
#include "filebench.h"
#include "flowop.h"
#include "threadflow.h" /* For aiolist definition */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <libgen.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/resource.h>
#include <strings.h>
#include "filebench.h"
#include "fsplug.h"
#ifdef HAVE_AIO
#include <aio.h>
#endif /* HAVE_AIO */
/*
* These routines implement local file access. They are placed into a
* vector of functions that are called by all I/O operations in fileset.c
* and flowop_library.c. This represents the default file system plug-in,
* and may be replaced by vectors for other file system plug-ins.
*/
static int fb_lfs_freemem(fb_fdesc_t *fd, off64_t size);
static int fb_lfs_open(fb_fdesc_t *, char *, int, int);
static int fb_lfs_pread(fb_fdesc_t *, caddr_t, fbint_t, off64_t);
static int fb_lfs_read(fb_fdesc_t *, caddr_t, fbint_t);
static int fb_lfs_pwrite(fb_fdesc_t *, caddr_t, fbint_t, off64_t);
static int fb_lfs_write(fb_fdesc_t *, caddr_t, fbint_t);
static int fb_lfs_lseek(fb_fdesc_t *, off64_t, int);
static int fb_lfs_truncate(fb_fdesc_t *, off64_t);
static int fb_lfs_rename(const char *, const char *);
static int fb_lfs_close(fb_fdesc_t *);
static int fb_lfs_link(const char *, const char *);
static int fb_lfs_symlink(const char *, const char *);
static int fb_lfs_unlink(char *);
static ssize_t fb_lfs_readlink(const char *, char *, size_t);
static int fb_lfs_mkdir(char *, int);
static int fb_lfs_rmdir(char *);
static DIR *fb_lfs_opendir(char *);
static struct dirent *fb_lfs_readdir(DIR *);
static int fb_lfs_closedir(DIR *);
static int fb_lfs_fsync(fb_fdesc_t *);
static int fb_lfs_stat(char *, struct stat64 *);
static int fb_lfs_fstat(fb_fdesc_t *, struct stat64 *);
static int fb_lfs_access(const char *, int);
static void fb_lfs_recur_rm(char *);
static fsplug_func_t fb_lfs_funcs =
{
"locfs",
fb_lfs_freemem, /* flush page cache */
fb_lfs_open, /* open */
fb_lfs_pread, /* pread */
fb_lfs_read, /* read */
fb_lfs_pwrite, /* pwrite */
fb_lfs_write, /* write */
fb_lfs_lseek, /* lseek */
fb_lfs_truncate, /* ftruncate */
fb_lfs_rename, /* rename */
fb_lfs_close, /* close */
fb_lfs_link, /* link */
fb_lfs_symlink, /* symlink */
fb_lfs_unlink, /* unlink */
fb_lfs_readlink, /* readlink */
fb_lfs_mkdir, /* mkdir */
fb_lfs_rmdir, /* rmdir */
fb_lfs_opendir, /* opendir */
fb_lfs_readdir, /* readdir */
fb_lfs_closedir, /* closedir */
fb_lfs_fsync, /* fsync */
fb_lfs_stat, /* stat */
fb_lfs_fstat, /* fstat */
fb_lfs_access, /* access */
fb_lfs_recur_rm /* recursive rm */
};
#ifdef HAVE_AIO
/*
* Local file system asynchronous IO flowops are in this module, as
* they have a number of local file system specific features.
*/
static int fb_lfsflow_aiowrite(threadflow_t *threadflow, flowop_t *flowop);
static int fb_lfsflow_aiowait(threadflow_t *threadflow, flowop_t *flowop);
static flowop_proto_t fb_lfsflow_funcs[] = {
{FLOW_TYPE_AIO, FLOW_ATTR_WRITE, "aiowrite", flowop_init_generic,
fb_lfsflow_aiowrite, flowop_destruct_generic},
{FLOW_TYPE_AIO, 0, "aiowait", flowop_init_generic,
fb_lfsflow_aiowait, flowop_destruct_generic}
};
#endif /* HAVE_AIO */
/*
* Initialize file system functions vector to point to the vector of local file
* system functions. This function will be called for the master process and
* every created worker process.
*/
void
fb_lfs_funcvecinit(void)
{
fs_functions_vec = &fb_lfs_funcs;
}
/*
* Initialize those flowops which implementation is file system specific. It is
* called only once in the master process.
*/
void
fb_lfs_newflowops(void)
{
#ifdef HAVE_AIO
int nops;
nops = sizeof (fb_lfsflow_funcs) / sizeof (flowop_proto_t);
flowop_add_from_proto(fb_lfsflow_funcs, nops);
#endif /* HAVE_AIO */
}
/*
* Frees up memory mapped file region of supplied size. The
* file descriptor "fd" indicates which memory mapped file.
* If successful, returns 0. Otherwise returns -1 if "size"
* is zero, or -1 times the number of times msync() failed.
*/
static int
fb_lfs_freemem(fb_fdesc_t *fd, off64_t size)
{
off64_t left;
int ret = 0;
for (left = size; left > 0; left -= MMAP_SIZE) {
off64_t thismapsize;
caddr_t addr;
thismapsize = MIN(MMAP_SIZE, left);
addr = mmap64(0, thismapsize, PROT_READ|PROT_WRITE,
MAP_SHARED, fd->fd_num, size - left);
ret += msync(addr, thismapsize, MS_INVALIDATE);
(void) munmap(addr, thismapsize);
}
return (ret);
}
/*
* Does a posix pread. Returns what the pread() returns.
*/
static int
fb_lfs_pread(fb_fdesc_t *fd, caddr_t iobuf, fbint_t iosize, off64_t fileoffset)
{
return (pread64(fd->fd_num, iobuf, iosize, fileoffset));
}
/*
* Does a posix read. Returns what the read() returns.
*/
static int
fb_lfs_read(fb_fdesc_t *fd, caddr_t iobuf, fbint_t iosize)
{
return (read(fd->fd_num, iobuf, iosize));
}
#ifdef HAVE_AIO
/*
* Asynchronous write section. An Asynchronous IO element
* (aiolist_t) is used to associate the asynchronous write request with
* its subsequent completion. This element includes a aiocb64 struct
* that is used by posix aio_xxx calls to track the asynchronous writes.
* The flowops aiowrite and aiowait result in calls to these posix
* aio_xxx system routines to do the actual asynchronous write IO
* operations.
*/
/*
* Allocates an asynchronous I/O list (aio, of type
* aiolist_t) element. Adds it to the flowop thread's
* threadflow aio list. Returns a pointer to the element.
*/
static aiolist_t *
aio_allocate(flowop_t *flowop)
{
aiolist_t *aiolist;
if ((aiolist = malloc(sizeof (aiolist_t))) == NULL) {
filebench_log(LOG_ERROR, "malloc aiolist failed");
filebench_shutdown(1);
}
bzero(aiolist, sizeof(*aiolist));
/* Add to list */
if (flowop->fo_thread->tf_aiolist == NULL) {
flowop->fo_thread->tf_aiolist = aiolist;
aiolist->al_next = NULL;
} else {
aiolist->al_next = flowop->fo_thread->tf_aiolist;
flowop->fo_thread->tf_aiolist = aiolist;
}
return (aiolist);
}
/*
* Searches for the aiolist element that has a matching
* completion block, aiocb. If none found returns FILEBENCH_ERROR. If
* found, removes the aiolist element from flowop thread's
* list and returns FILEBENCH_OK.
*/
static int
aio_deallocate(flowop_t *flowop, struct aiocb64 *aiocb)
{
aiolist_t *aiolist = flowop->fo_thread->tf_aiolist;
aiolist_t *previous = NULL;
aiolist_t *match = NULL;
if (aiocb == NULL) {
filebench_log(LOG_ERROR, "null aiocb deallocate");
return (FILEBENCH_OK);
}
while (aiolist) {
if (aiocb == &(aiolist->al_aiocb)) {
match = aiolist;
break;
}
previous = aiolist;
aiolist = aiolist->al_next;
}
if (match == NULL)
return (FILEBENCH_ERROR);
/* Remove from the list */
if (previous)
previous->al_next = match->al_next;
else
flowop->fo_thread->tf_aiolist = match->al_next;
return (FILEBENCH_OK);
}
/*
* Emulate posix aiowrite(). Determines which file to use,
* either one file of a fileset, or the file associated
* with a fileobj, allocates and fills an aiolist_t element
* for the write, and issues the asynchronous write. This
* operation is only valid for random IO, and returns an
* error if the flowop is set for sequential IO. Returns
* FILEBENCH_OK on success, FILEBENCH_NORSC if iosetup can't
* obtain a file to open, and FILEBENCH_ERROR on any
* encountered error.
*/
static int
fb_lfsflow_aiowrite(threadflow_t *threadflow, flowop_t *flowop)
{
caddr_t iobuf;
fbint_t wss;
fbint_t iosize;
fb_fdesc_t *fdesc;
int ret;
iosize = avd_get_int(flowop->fo_iosize);
if ((ret = flowoplib_iosetup(threadflow, flowop, &wss, &iobuf,
&fdesc, iosize)) != FILEBENCH_OK)
return (ret);
if (avd_get_bool(flowop->fo_random)) {
uint64_t fileoffset;
struct aiocb64 *aiocb;
aiolist_t *aiolist;
if (wss < iosize) {
filebench_log(LOG_ERROR,
"file size smaller than IO size for thread %s",
flowop->fo_name);
return (FILEBENCH_ERROR);
}
fb_random64(&fileoffset, wss, iosize, NULL);
aiolist = aio_allocate(flowop);
aiolist->al_type = AL_WRITE;
aiocb = &aiolist->al_aiocb;
aiocb->aio_fildes = fdesc->fd_num;
aiocb->aio_buf = iobuf;
aiocb->aio_nbytes = (size_t)iosize;
aiocb->aio_offset = (off64_t)fileoffset;
aiocb->aio_reqprio = 0;
filebench_log(LOG_DEBUG_IMPL,
"aio fd=%d, bytes=%llu, offset=%llu",
fdesc->fd_num, (u_longlong_t)iosize,
(u_longlong_t)fileoffset);
flowop_beginop(threadflow, flowop);
if (aio_write64(aiocb) < 0) {
filebench_log(LOG_ERROR, "aiowrite failed: %s",
strerror(errno));
filebench_shutdown(1);
}
flowop_endop(threadflow, flowop, iosize);
} else {
return (FILEBENCH_ERROR);
}
return (FILEBENCH_OK);
}
#define MAXREAP 4096
/*
* Emulate posix aiowait(). Waits for the completion of half the
* outstanding asynchronous IOs, or a single IO, which ever is
* larger. The routine will return after a sufficient number of
* completed calls issued by any thread in the procflow have
* completed, or a 1 second timout elapses. All completed
* IO operations are deleted from the thread's aiolist.
*/
static int
fb_lfsflow_aiowait(threadflow_t *threadflow, flowop_t *flowop)
{
struct aiocb64 **worklist;
aiolist_t *aio = flowop->fo_thread->tf_aiolist;
int uncompleted = 0;
#ifdef HAVE_AIOWAITN
int i;
#endif
worklist = calloc(MAXREAP, sizeof (struct aiocb64 *));
/* Count the list of pending aios */
while (aio) {
uncompleted++;
aio = aio->al_next;
}
do {
uint_t ncompleted = 0;
uint_t todo;
int inprogress;
#ifdef HAVE_AIOWAITN
struct timespec timeout;
/* Wait for half of the outstanding requests */
timeout.tv_sec = 1;
timeout.tv_nsec = 0;
#endif
if (uncompleted > MAXREAP)
todo = MAXREAP;
else
todo = uncompleted / 2;
if (todo == 0)
todo = 1;
flowop_beginop(threadflow, flowop);
#ifdef HAVE_AIOWAITN
if (((aio_waitn64((struct aiocb64 **)worklist,
MAXREAP, &todo, &timeout)) == -1) &&
errno && (errno != ETIME)) {
filebench_log(LOG_ERROR,
"aiowait failed: %s, outstanding = %d, "
"ncompleted = %d ",
strerror(errno), uncompleted, todo);
}
ncompleted = todo;
/* Take the completed I/Os from the list */
inprogress = 0;
for (i = 0; i < ncompleted; i++) {
if ((aio_return64(worklist[i]) == -1) &&
(errno == EINPROGRESS)) {
inprogress++;
continue;
}
if (aio_deallocate(flowop, worklist[i])
== FILEBENCH_ERROR) {
filebench_log(LOG_ERROR, "Could not remove "
"aio from list ");
flowop_endop(threadflow, flowop, 0);
return (FILEBENCH_ERROR);
}
}
uncompleted -= ncompleted;
uncompleted += inprogress;
#else
for (ncompleted = 0, inprogress = 0,
aio = flowop->fo_thread->tf_aiolist;
ncompleted < todo && aio != NULL; aio = aio->al_next) {
int result = aio_error64(&aio->al_aiocb);
if (result == EINPROGRESS) {
inprogress++;
continue;
}
if ((aio_return64(&aio->al_aiocb) == -1) || result) {
filebench_log(LOG_ERROR, "aio failed: %s",
strerror(result));
continue;
}
ncompleted++;
if (aio_deallocate(flowop, &aio->al_aiocb) < 0) {
filebench_log(LOG_ERROR, "Could not remove "
"aio from list ");
flowop_endop(threadflow, flowop, 0);
return (FILEBENCH_ERROR);
}
}
uncompleted -= ncompleted;
#endif
filebench_log(LOG_DEBUG_SCRIPT,
"aio2 completed %d ios, uncompleted = %d, inprogress = %d",
ncompleted, uncompleted, inprogress);
} while (uncompleted > MAXREAP);
flowop_endop(threadflow, flowop, 0);
free(worklist);
return (FILEBENCH_OK);
}
#endif /* HAVE_AIO */
/*
* Does an open64 of a file. Inserts the file descriptor number returned
* by open() into the supplied filebench fd. Returns FILEBENCH_OK on
* successs, and FILEBENCH_ERROR on failure.
*/
static int
fb_lfs_open(fb_fdesc_t *fd, char *path, int flags, int perms)
{
if ((fd->fd_num = open64(path, flags, perms)) < 0)
return (FILEBENCH_ERROR);
else
return (FILEBENCH_OK);
}
/*
* Does an unlink (delete) of a file.
*/
static int
fb_lfs_unlink(char *path)
{
return (unlink(path));
}
/*
* Does a readlink of a symbolic link.
*/
static ssize_t
fb_lfs_readlink(const char *path, char *buf, size_t buf_size)
{
return (readlink(path, buf, buf_size));
}
/*
* Does fsync of a file. Returns with fsync return info.
*/
static int
fb_lfs_fsync(fb_fdesc_t *fd)
{
return (fsync(fd->fd_num));
}
/*
* Do a posix lseek of a file. Return what lseek() returns.
*/
static int
fb_lfs_lseek(fb_fdesc_t *fd, off64_t offset, int whence)
{
return (lseek64(fd->fd_num, offset, whence));
}
/*
* Do a posix rename of a file. Return what rename() returns.
*/
static int
fb_lfs_rename(const char *old, const char *new)
{
return (rename(old, new));
}
/*
* Do a posix close of a file. Return what close() returns.
*/
static int
fb_lfs_close(fb_fdesc_t *fd)
{
return (close(fd->fd_num));
}
/*
* Use mkdir to create a directory.
*/
static int
fb_lfs_mkdir(char *path, int perm)
{
return (mkdir(path, perm));
}
/*
* Use rmdir to delete a directory. Returns what rmdir() returns.
*/
static int
fb_lfs_rmdir(char *path)
{
return (rmdir(path));
}
/*
* does a recursive rm to remove an entire directory tree (i.e. a fileset).
* Supplied with the path to the root of the tree.
*/
static void
fb_lfs_recur_rm(char *path)
{
char cmd[MAXPATHLEN];
(void) snprintf(cmd, sizeof (cmd), "rm -rf %s", path);
/* We ignore system()'s return value */
if (system(cmd));
return;
}
/*
* Does a posix opendir(), Returns a directory handle on success,
* NULL on failure.
*/
static DIR *
fb_lfs_opendir(char *path)
{
return (opendir(path));
}
/*
* Does a readdir() call. Returns a pointer to a table of directory
* information on success, NULL on failure.
*/
static struct dirent *
fb_lfs_readdir(DIR *dirp)
{
return (readdir(dirp));
}
/*
* Does a closedir() call.
*/
static int
fb_lfs_closedir(DIR *dirp)
{
return (closedir(dirp));
}
/*
* Does an fstat of a file.
*/
static int
fb_lfs_fstat(fb_fdesc_t *fd, struct stat64 *statbufp)
{
return (fstat64(fd->fd_num, statbufp));
}
/*
* Does a stat of a file.
*/
static int
fb_lfs_stat(char *path, struct stat64 *statbufp)
{
return (stat64(path, statbufp));
}
/*
* Do a pwrite64 to a file.
*/
static int
fb_lfs_pwrite(fb_fdesc_t *fd, caddr_t iobuf, fbint_t iosize, off64_t offset)
{
return (pwrite64(fd->fd_num, iobuf, iosize, offset));
}
/*
* Do a write to a file.
*/
static int
fb_lfs_write(fb_fdesc_t *fd, caddr_t iobuf, fbint_t iosize)
{
return (write(fd->fd_num, iobuf, iosize));
}
/*
* Does a truncate operation and returns the result
*/
static int
fb_lfs_truncate(fb_fdesc_t *fd, off64_t fse_size)
{
#ifdef HAVE_FTRUNCATE64
return (ftruncate64(fd->fd_num, fse_size));
#else
filebench_log(LOG_ERROR, "Converting off64_t to off_t in ftruncate,"
" might be a possible problem");
return (ftruncate(fd->fd_num, (off_t)fse_size));
#endif
}
/*
* Does a link operation and returns the result
*/
static int
fb_lfs_link(const char *existing, const char *new)
{
return (link(existing, new));
}
/*
* Does a symlink operation and returns the result
*/
static int
fb_lfs_symlink(const char *existing, const char *new)
{
return (symlink(existing, new));
}
/*
* Does an access() check on a file.
*/
static int
fb_lfs_access(const char *path, int amode)
{
return (access(path, amode));
}