Skip to content

Commit

Permalink
Merge pull request #80 from Oxid15/develop
Browse files Browse the repository at this point in the history
0.6.0
  • Loading branch information
Oxid15 authored Jul 31, 2022
2 parents 5f40383 + d1dbcba commit 6ad91da
Show file tree
Hide file tree
Showing 80 changed files with 4,892 additions and 952 deletions.
20 changes: 16 additions & 4 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,33 @@ jobs:
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
pwd
ls
python -m pip install --upgrade pip
python -m pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f utils_requirements.txt ]; then pip install -r utils_requirements.txt; fi
python -m pip install -r requirements.txt
python -m pip install -r cascade/tests/requirements.txt
python -m pip install -r utils_requirements.txt
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test
run: |
pwd
ls
python --version
python -m unittest discover ./cascade/tests
cd ./cascade/tests
pytest --cov=cascade .
- name: Test utils
run: |
pwd
ls
cd ./cascade/utils/tests
pytest
4 changes: 2 additions & 2 deletions cascade/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
limitations under the License.
"""

__version__ = '0.5.2'
__version__ = '0.6.0'
__author__ = 'Ilia Moiseev'
__author_email__ = '[email protected]'

Expand All @@ -25,7 +25,7 @@
from . import tests

# cascade does not have
# `from . import utils`
# from . import utils
# because it will bring additional dependencies that may not be needed by the user
# if you need to use cascade.utils, you can install utils_requirements.txt and then
# import as any other cascade module
1 change: 1 addition & 0 deletions cascade/base/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .meta_handler import MetaHandler
from .traceable import Traceable
from .meta_handler import CustomEncoder as JSONEncoder
102 changes: 89 additions & 13 deletions cascade/base/meta_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,28 @@

import os
import json
from typing import Union, Dict, List
import datetime
from typing import List, Dict
from json import JSONEncoder

import yaml
import numpy as np


class CustomEncoder(JSONEncoder):
def default(self, obj):
if isinstance(obj, type):
return str(obj)
if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)):
return obj.isoformat()

elif isinstance(obj, datetime.timedelta):
return (datetime.datetime.min + obj).time().isoformat()

elif isinstance(obj, (np.int_, np.intc, np.intp, np.int8,
np.int16, np.int32, np.int64, np.uint8,
np.uint16, np.uint32, np.uint64)):

return int(obj)

elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
Expand All @@ -44,29 +49,39 @@ def default(self, obj):
elif isinstance(obj, (np.ndarray,)):
return obj.tolist()

elif isinstance(obj, (np.bool_)):
elif isinstance(obj, np.bool_):
return bool(obj)

elif isinstance(obj, (np.void)):
elif isinstance(obj, np.void):
return None

return super(CustomEncoder, self).default(obj)

def obj_to_dict(self, obj):
return json.loads(self.encode(obj))

class MetaHandler:

class BaseHandler:
def read(self, path) -> List[Dict]:
raise NotImplementedError()

def write(self, path, obj, overwrite=True) -> None:
raise NotImplementedError()


class JSONHandler(BaseHandler):
"""
Handles the logic of dumping and loading json files
"""
def read(self, path) -> dict:
def read(self, path) -> Union[Dict, List[Dict]]:
"""
Reads json from path
Parameters
----------
path:
Path to the file. If no extension provided, then .json assumed
Path to the file. If no extension provided, then .json will be added
"""
assert os.path.exists(path)
_, ext = os.path.splitext(path)
if ext == '':
path += '.json'
Expand All @@ -77,16 +92,77 @@ def read(self, path) -> dict:
meta = json.loads(meta)
return meta

def write(self, name, obj, overwrite=True) -> None:
def write(self, name, obj:List[Dict], overwrite=True) -> None:
"""
Writes json to path using custom encoder
"""

if not overwrite and os.path.exists(name):
return

with open(name, 'w') as json_meta:
json.dump(obj, json_meta, cls=CustomEncoder, indent=4)
with open(name, 'w') as f:
json.dump(obj, f, cls=CustomEncoder, indent=4)


class YAMLHandler(BaseHandler):
def read(self, path) -> Union[Dict, List[Dict]]:
"""
Reads yaml from path
Parameters
----------
path:
Path to the file. If no extension provided, then .yml will be added
"""
_, ext = os.path.splitext(path)
if ext == '':
path += '.yml'

with open(path, 'r') as meta_file:
meta = yaml.safe_load(meta_file)
return meta

def write(self, path, obj, overwrite=True) -> None:
if not overwrite and os.path.exists(path):
return

obj = CustomEncoder().obj_to_dict(obj)
with open(path, 'w') as f:
yaml.safe_dump(obj, f)

def encode(self, obj):
return CustomEncoder().encode(obj)

class TextHandler(BaseHandler):
def read(self, path) -> Dict:
"""
Reads text file from path and returns dict in the form {path: 'text from file'}
Parameters
----------
path:
Path to the file
"""

with open(path, 'r') as meta_file:
meta = {path: ''.join(meta_file.readlines())}
return meta

def write(self, path, obj, overwrite=True) -> None:
raise NotImplementedError('MetaHandler does not write text files, only reads')


class MetaHandler:
def read(self, path) -> List[Dict]:
handler = self._get_handler(path)
return handler.read(path)

def write(self, path, obj, overwrite=True) -> None:
handler = self._get_handler(path)
return handler.write(path, obj, overwrite=overwrite)

def _get_handler(self, path) -> BaseHandler:
ext = os.path.splitext(path)[-1]
if ext == '.json':
return JSONHandler()
elif ext == '.yml':
return YAMLHandler()
else:
return TextHandler()
17 changes: 11 additions & 6 deletions cascade/base/traceable.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import warnings
from typing import List, Dict
from typing import List, Dict, Union


class Traceable:
def __init__(self, *args, meta_prefix=None, **kwargs) -> None:
if meta_prefix is None:
meta_prefix = {}
if isinstance(meta_prefix, str):
from . import MetaHandler

meta_prefix = MetaHandler().read(meta_prefix)
elif isinstance(meta_prefix, str):
meta_prefix = self._read_meta_from_file(meta_prefix)
self.meta_prefix = meta_prefix

def _read_meta_from_file(self, path: str) -> Union[List[Dict], Dict]:
from . import MetaHandler
return MetaHandler().read(path)

def get_meta(self) -> List[Dict]:
"""
Returns
Expand All @@ -30,10 +32,13 @@ def get_meta(self) -> List[Dict]:
self._warn_no_prefix()
return [meta]

def update_meta(self, obj: Dict) -> None:
def update_meta(self, obj: Union[Dict, str]) -> None:
"""
Updates meta_prefix, which is then updates dataset's meta when get_meta() is called
"""
if isinstance(obj, str):
obj = self._read_meta_from_file(obj)

if hasattr(self, 'meta_prefix'):
self.meta_prefix.update(obj)
else:
Expand Down
6 changes: 3 additions & 3 deletions cascade/data/concatenator.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ def __repr__(self) -> str:

def get_meta(self) -> List[Dict]:
"""
Concatenator calls `get_meta()` of all its datasets and appends to its own meta
Concatenator calls `get_meta()` of all its datasets
"""
meta = super().get_meta()
meta[0]['data'] = []
meta[0]['data'] = {}
for ds in self._datasets:
meta[0]['data'] += ds.get_meta()
meta[0]['data'][repr(ds)] = ds.get_meta()
return meta
7 changes: 6 additions & 1 deletion cascade/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ def __getitem__(self, item):
def __iter__(self):
for item in self._data:
yield item

def get_meta(self):
meta = super().get_meta()
meta[0]['obj_type'] = str(type(self._data))
return meta


class Wrapper(Dataset):
Expand All @@ -95,7 +100,7 @@ def __len__(self) -> int:
def get_meta(self):
meta = super().get_meta()
meta[0]['len'] = len(self)
meta[0]['obj_type'] = type(self._data)
meta[0]['obj_type'] = str(type(self._data))
return meta


Expand Down
6 changes: 3 additions & 3 deletions cascade/data/pickler.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ class Pickler(Modifier):
"""
def __init__(self, path, dataset=None, *args, **kwargs) -> None:
"""
Loads pickled dataset or dumps one depending on parameters passed
Loads pickled dataset or dumps one depending on parameters passed:
If only path is passed - loads dataset from path provided if path exists
if path provided with a dataset dumps dataset to the path
1. If only path is passed - loads dataset from path provided if path exists
2. if path provided with a dataset dumps dataset to the path
Parameters
----------
Expand Down
13 changes: 13 additions & 0 deletions cascade/data/random_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,20 @@


class RandomSampler(Sampler):
"""
Shuffles dataset. Can randomly sample from dataset
if num_samples is not None and less than length of dataset.
"""
def __init__(self, dataset: Dataset, num_samples=None, **kwargs) -> None:
"""
Parameters
----------
dataset: Dataset
Input dataset to sample from
num_samples: int, optional
Should be less than len(dataset), but oversampling can be added in the future.
If None, then just shuffles the dataset.
"""
if num_samples is None:
num_samples = len(dataset)
super().__init__(dataset, num_samples, **kwargs)
Expand Down
1 change: 1 addition & 0 deletions cascade/docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
sphinx
furo
sphinx-copybutton
nbsphinx
5 changes: 0 additions & 5 deletions cascade/docs/source/build_pipeline.py

This file was deleted.

19 changes: 19 additions & 0 deletions cascade/docs/source/cascade.base.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
cascade.base
============
.. autoclass:: cascade.base.Traceable
:members:

|
|
|
.. autoclass:: cascade.base.MetaHandler
:members:

|
|
|
12 changes: 10 additions & 2 deletions cascade/docs/source/cascade.data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,14 @@ cascade.data
|

.. autoclass:: cascade.data.FolderDataset
:members:

|
|
|
|
.. autoclass:: cascade.data.Pickler
:members:
Expand All @@ -99,6 +98,15 @@ cascade.data
|
|
.. autoclass:: cascade.data.RandomSampler
:members:

|
|
|
.. autoclass:: cascade.data.SequentialCacher
Expand Down
Loading

0 comments on commit 6ad91da

Please sign in to comment.