-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #90 from CCBR/quick_assess
quick assess
- Loading branch information
Showing
8 changed files
with
238 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
redirect |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
## spacesavers2_pdq | ||
|
||
pdq = Pretty Darn Quick | ||
|
||
This uses `glob` library to list all files in a user-provided folder recursively. | ||
|
||
For each user it gathers information like: | ||
- total number of files | ||
- total number of bytes | ||
|
||
It is quick tool to gather datapoints to monitor filesystem usage. Typically, can be run once daily and compared with previous days run to find large changes. | ||
|
||
### Inputs | ||
- `--folder`: Path to the folder to run `spacesavers2_pdq` on. | ||
- `--threads`: `spacesavers2_pdq` uses multiprocessing library to parallelize orchestration. This defines the number of threads to run in parallel. | ||
- `--outfile`: If not supplied then the optput is written to the screen. | ||
|
||
> NOTE: `spacesavers2_pdq` reports errors (eg. cannot read file) to STDERR | ||
```bash | ||
usage: spacesavers2_pdq [-h] -f FOLDER [-p THREADS] [-o OUTFILE] [-v] | ||
|
||
spacesavers2_pdq: get quick per user info (number of files and bytes). | ||
|
||
options: | ||
-h, --help show this help message and exit | ||
-f FOLDER, --folder FOLDER | ||
spacesavers2_pdq will be run on all files in this folder and its subfolders | ||
-p THREADS, --threads THREADS | ||
number of threads to be used (default 4) | ||
-o OUTFILE, --outfile OUTFILE | ||
outfile ... catalog file .. by default output is printed to screen | ||
-v, --version show program's version number and exit | ||
Version: | ||
v0.11.5 | ||
Example: | ||
> spacesavers2_pdq -f /path/to/folder -p 4 -o /path/to/output_file | ||
``` | ||
### Output | ||
## tab-delimited output (file) | ||
`spacesavers2_pdq` creates one tab seperated output line per user: | ||
```bash | ||
% head -n1 test.out | ||
user1 1386138 6089531321856 | ||
user2 230616 2835680212992 | ||
user3 1499 126442496 | ||
``` | ||
The 3 items in the line are as follows: | ||
| Column | Description | Example | | ||
| ------ | ------------------------ | ---------------------------------------------------------------------------------------------- | | ||
| 1 | username | "user1" | | ||
| 2 | total no. of files owned | 1386138 | | ||
| 3 | total no. of bytes occupied | 6089531321856 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
#!/usr/bin/env python3 | ||
# pqd = pretty darn quick | ||
|
||
from src.VersionCheck import version_check | ||
from src.VersionCheck import __version__ | ||
from src.utils import * | ||
|
||
version_check() | ||
|
||
# import required modules | ||
import textwrap | ||
import tqdm | ||
import sys | ||
from src.pdq import pdq | ||
from multiprocessing import Pool | ||
import argparse | ||
from pathlib import Path | ||
|
||
|
||
def task(f): | ||
fd = pdq() | ||
fd.set(f) | ||
return fd | ||
|
||
|
||
def main(): | ||
elog = textwrap.dedent( | ||
"""\ | ||
Version: | ||
{} | ||
Example: | ||
> spacesavers2_pdq -f /path/to/folder -p 4 -o /path/to/output_file | ||
""".format( | ||
__version__ | ||
) | ||
) | ||
parser = argparse.ArgumentParser( | ||
description="spacesavers2_pdq: get quick per user info (number of files and bytes).", | ||
epilog=elog, | ||
formatter_class=argparse.RawDescriptionHelpFormatter, | ||
) | ||
parser.add_argument( | ||
"-f", | ||
"--folder", | ||
dest="folder", | ||
required=True, | ||
type=str, | ||
help="spacesavers2_pdq will be run on all files in this folder and its subfolders", | ||
) | ||
parser.add_argument( | ||
"-p", | ||
"--threads", | ||
dest="threads", | ||
required=False, | ||
type=int, | ||
default=4, | ||
help="number of threads to be used (default 4)", | ||
) | ||
parser.add_argument( | ||
"-o", | ||
"--outfile", | ||
dest="outfile", | ||
required=False, | ||
type=str, | ||
help="outfile ... catalog file .. by default output is printed to screen", | ||
) | ||
parser.add_argument("-v", "--version", action="version", version=__version__) | ||
|
||
global args | ||
args = parser.parse_args() | ||
|
||
folder = args.folder | ||
p = Path(folder) | ||
files = [p] | ||
files2 = p.glob("**/*") | ||
files.extend(files2) | ||
|
||
if args.outfile: | ||
outfh = open(args.outfile, 'w') | ||
else: | ||
outfh = sys.stdout | ||
|
||
bigdict=dict() | ||
|
||
with Pool(processes=args.threads) as pool: | ||
for fd in tqdm.tqdm(pool.imap_unordered(task, files),total=len(files)): | ||
if not fd.is_file(): continue | ||
uid = fd.get_uid() | ||
if not uid in bigdict: bigdict[uid]=dict() | ||
inode = fd.get_inode() | ||
if not inode in bigdict[uid]: bigdict[uid][inode]=fd.get_size() | ||
|
||
for uid in bigdict.keys(): | ||
username = get_username_groupname(uid) | ||
nfiles = len(bigdict[uid]) | ||
nbytes = 0 | ||
for inode in bigdict[uid].keys(): | ||
nbytes += bigdict[uid][inode] | ||
outfh.write(f"{username}\t{nfiles}\t{nbytes}\n") | ||
|
||
if args.outfile: | ||
outfh.close() | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
0.11.4 | ||
0.11.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
from pathlib import Path | ||
import sys | ||
|
||
def get_type(p): # copy paste from FileDetails | ||
# input: | ||
# 1. PosixPath object | ||
# output: | ||
# 1. type of path | ||
# u = unknown | ||
# L = broken symlink | ||
# l = symlink | ||
# f = file | ||
# d = folder or directory | ||
x = "u" # unknown | ||
try: | ||
if p.is_symlink(): | ||
x = "l" # link or symlink | ||
try: | ||
p.exists() | ||
except: | ||
x = "L" # upper case L is broken symlink | ||
sys.stderr.write("spacesavers2:Broken symlink found:{}\n".format(p)) | ||
return x | ||
if not p.exists(): | ||
x = "a" # absent | ||
return x | ||
if p.is_dir(): | ||
x = "d" # directory | ||
return x | ||
if p.is_file(): | ||
x = "f" # file | ||
return x | ||
except: # mainly to catch PermissionError: | ||
sys.stderr.write("spacesavers2:File cannot be read:{}\n".format(p)) | ||
return x | ||
|
||
class pdq: | ||
def __init__(self): | ||
self.inode = -1 | ||
self.fld = "u" # u or f or l or d | ||
self.size = -1 | ||
self.uid = 0 | ||
def set(self,p,st_block_byte_size=512): | ||
p = Path(p).absolute() | ||
try: | ||
st = p.stat(follow_symlinks=False) | ||
self.size = st.st_blocks * st_block_byte_size | ||
self.inode = st.st_ino | ||
self.uid = st.st_uid | ||
self.fld = get_type(p) | ||
except: | ||
print(f"spacesavers2_pdq: {p} File not found!") | ||
def get_uid(self): | ||
return self.uid | ||
def get_fld(self): | ||
return self.fld | ||
def is_file(self): | ||
if self.fld == "f": return True | ||
return False | ||
def get_inode(self): | ||
return self.inode | ||
def get_size(self): | ||
return self.size |