diff --git a/dataprocessing/process_manifolds.py b/dataprocessing/process_manifolds.py index ab66314..61ae64a 100644 --- a/dataprocessing/process_manifolds.py +++ b/dataprocessing/process_manifolds.py @@ -17,15 +17,8 @@ import numpy as np import pandas as pd import pydantic -import os -import os.path as osp -import ssl -import sys -import urllib -from typing import Optional -import fsspec -import gzip -import sys +from torch_geometric.data import extract_gz, makedirs, download_url + # Constants @@ -66,83 +59,6 @@ class Homology(pydantic.BaseModel): betti_numbers: List[int] -################################################################################ -### Copied from torch geometric -################################################################################ - - -def makedirs(path): - os.makedirs(path, exist_ok=True) - - -def maybe_log(path: str, log: bool = True) -> None: - if log and "pytest" not in sys.modules: - print(f"Extracting {path}", file=sys.stderr) - - -def download_url( - url: str, - folder: str, - log: bool = True, - filename: Optional[str] = None, -): - r"""Downloads the content of an URL to a specific folder. - - Args: - url (str): The URL. - folder (str): The folder. - log (bool, optional): If :obj:`False`, will not print anything to the - console. (default: :obj:`True`) - filename (str, optional): The filename of the downloaded file. If set - to :obj:`None`, will correspond to the filename given by the URL. - (default: :obj:`None`) - """ - if filename is None: - filename = url.rpartition("/")[2] - filename = filename if filename[0] == "?" else filename.split("?")[0] - - path = os.path.join(folder, filename) - - if fsspec.core.url_to_fs(path)[0].exists(path): # pragma: no cover - if log and "pytest" not in sys.modules: - print(f"Using existing file {filename}", file=sys.stderr) - return path - - if log and "pytest" not in sys.modules: - print(f"Downloading {url}", file=sys.stderr) - - os.makedirs(folder, exist_ok=True) - - context = ssl._create_unverified_context() - data = urllib.request.urlopen(url, context=context) - - with fsspec.open(path, "wb") as f: - # workaround for https://bugs.python.org/issue42853 - while True: - chunk = data.read(10 * 1024 * 1024) - if not chunk: - break - f.write(chunk) - - return path - - -def extract_gz(path: str, folder: str, log: bool = True) -> None: - r"""Extracts a gz archive to a specific folder. - - Args: - path (str): The path to the tar archive. - folder (str): The folder. - log (bool, optional): If :obj:`False`, will not print anything to the - console. (default: :obj:`True`) - """ - maybe_log(path, log) - path = osp.abspath(path) - with gzip.open(path, "r") as r: - with open(osp.join(folder, ".".join(path.split(".")[:-1])), "wb") as w: - w.write(r.read()) - - ################################################################################ ### Processing scripts ################################################################################ diff --git a/requirements.txt b/requirements.txt index 6ec0180..62f66cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ numpy pandas pydantic -fsspec \ No newline at end of file +torch-geometric \ No newline at end of file