Skip to content

Commit c0eb867

Browse files
committed
modified setup
1 parent cb60c1b commit c0eb867

File tree

2 files changed

+230
-39
lines changed

2 files changed

+230
-39
lines changed

nbs/tsfeatures.ipynb

+142-17
Original file line numberDiff line numberDiff line change
@@ -94,17 +94,6 @@
9494
"from tsfeatures.utils import *"
9595
]
9696
},
97-
{
98-
"cell_type": "code",
99-
"execution_count": null,
100-
"metadata": {},
101-
"outputs": [],
102-
"source": [
103-
"# |export\n",
104-
"\n",
105-
"FREQS = {\"H\": 24, \"D\": 1, \"M\": 12, \"Q\": 4, \"W\": 1, \"Y\": 1}"
106-
]
107-
},
10897
{
10998
"cell_type": "code",
11099
"execution_count": null,
@@ -153,7 +142,12 @@
153142
" else:\n",
154143
" acfdiff2x = [np.nan] * 2\n",
155144
" # first autocorrelation coefficient\n",
156-
" acf_1 = acfx[1]\n",
145+
"\n",
146+
" try:\n",
147+
" acf_1 = acfx[1]\n",
148+
" except:\n",
149+
" acf_1 = np.nan\n",
150+
"\n",
157151
" # sum of squares of first 10 autocorrelation coefficients\n",
158152
" sum_of_sq_acf10 = np.sum((acfx[1:11]) ** 2) if size_x > 10 else np.nan\n",
159153
" # first autocorrelation ciefficient of differenced series\n",
@@ -255,7 +249,7 @@
255249
" if len(x) <= lags + 1:\n",
256250
" return {\"arch_lm\": np.nan}\n",
257251
" if demean:\n",
258-
" x -= np.mean(x)\n",
252+
" x = x - np.mean(x)\n",
259253
"\n",
260254
" size_x = len(x)\n",
261255
" mat = embed(x**2, lags + 1)\n",
@@ -431,7 +425,9 @@
431425
" except:\n",
432426
" return {\"flat_spots\": np.nan}\n",
433427
"\n",
434-
" rlex = np.array([sum(1 for i in g) for k, g in groupby(cutx)]).max()"
428+
" rlex = np.array([sum(1 for i in g) for k, g in groupby(cutx)]).max()\n",
429+
" return {\"flat_spots\": rlex}\n",
430+
"\n"
435431
]
436432
},
437433
{
@@ -1103,8 +1099,15 @@
11031099
" time_x = add_constant(poly_m)\n",
11041100
" coefs = OLS(trend0, time_x).fit().params\n",
11051101
"\n",
1106-
" linearity = coefs[1]\n",
1107-
" curvature = -coefs[2]\n",
1102+
"\n",
1103+
" try:\n",
1104+
" linearity = coefs[1]\n",
1105+
" except:\n",
1106+
" linearity = np.nan\n",
1107+
" try:\n",
1108+
" curvature = -coefs[2]\n",
1109+
" except:\n",
1110+
" curvature = np.nan\n",
11081111
" # ACF features\n",
11091112
" acfremainder = acf_features(remainder, m)\n",
11101113
" # Assemble features\n",
@@ -1194,6 +1197,56 @@
11941197
" return {\"unitroot_pp\": test_pp}"
11951198
]
11961199
},
1200+
{
1201+
"cell_type": "code",
1202+
"execution_count": null,
1203+
"metadata": {},
1204+
"outputs": [],
1205+
"source": [
1206+
"def statistics(x: np.array, freq: int = 1) -> Dict[str, float]:\n",
1207+
" \"\"\"Computes basic statistics of x.\n",
1208+
"\n",
1209+
" Parameters\n",
1210+
" ----------\n",
1211+
" x: numpy array\n",
1212+
" The time series.\n",
1213+
" freq: int\n",
1214+
" Frequency of the time series\n",
1215+
"\n",
1216+
" Returns\n",
1217+
" -------\n",
1218+
" dict\n",
1219+
" 'total_sum': Total sum of the series.\n",
1220+
" 'mean': Mean value.\n",
1221+
" 'variance': variance of the time series.\n",
1222+
" 'median': Median value.\n",
1223+
" 'p2point5': 2.5 Percentile.\n",
1224+
" 'p5': 5 percentile.\n",
1225+
" 'p25': 25 percentile.\n",
1226+
" 'p75': 75 percentile.\n",
1227+
" 'p95': 95 percentile.\n",
1228+
" 'p97point5': 97.5 percentile.\n",
1229+
" 'max': Max value.\n",
1230+
" 'min': Min value.\n",
1231+
" \"\"\"\n",
1232+
" res = dict(\n",
1233+
" total_sum=np.sum(x),\n",
1234+
" mean=np.mean(x),\n",
1235+
" variance=np.var(x, ddof=1),\n",
1236+
" median=np.median(x),\n",
1237+
" p2point5=np.quantile(x, q=0.025),\n",
1238+
" p5=np.quantile(x, q=0.05),\n",
1239+
" p25=np.quantile(x, q=0.25),\n",
1240+
" p75=np.quantile(x, q=0.75),\n",
1241+
" p95=np.quantile(x, q=0.95),\n",
1242+
" p97point5=np.quantile(x, q=0.975),\n",
1243+
" max=np.max(x),\n",
1244+
" min=np.min(x),\n",
1245+
" )\n",
1246+
"\n",
1247+
" return res"
1248+
]
1249+
},
11971250
{
11981251
"cell_type": "code",
11991252
"execution_count": null,
@@ -1227,7 +1280,6 @@
12271280
" ],\n",
12281281
" dict_freqs=FREQS,\n",
12291282
"):\n",
1230-
" print(\"dict_freq\")\n",
12311283
" if freq is None:\n",
12321284
" inf_freq = pd.infer_freq(ts[\"ds\"])\n",
12331285
" if inf_freq is None:\n",
@@ -1334,6 +1386,79 @@
13341386
" return ts_features"
13351387
]
13361388
},
1389+
{
1390+
"cell_type": "code",
1391+
"execution_count": null,
1392+
"metadata": {},
1393+
"outputs": [],
1394+
"source": [
1395+
"def _get_feats_wide(index,\n",
1396+
" ts,\n",
1397+
" scale = True,\n",
1398+
" features = [acf_features, arch_stat, crossing_points,\n",
1399+
" entropy, flat_spots, heterogeneity, holt_parameters,\n",
1400+
" lumpiness, nonlinearity, pacf_features, stl_features,\n",
1401+
" stability, hw_parameters, unitroot_kpss, unitroot_pp,\n",
1402+
" series_length, hurst]):\n",
1403+
" seasonality = ts['seasonality'].item()\n",
1404+
" y = ts['y'].item()\n",
1405+
" y = np.array(y)\n",
1406+
"\n",
1407+
" if scale:\n",
1408+
" y = scalets(y)\n",
1409+
"\n",
1410+
" c_map = ChainMap(*[dict_feat for dict_feat in [func(y, seasonality) for func in features]])\n",
1411+
"\n",
1412+
" return pd.DataFrame(dict(c_map), index = [index])\n"
1413+
]
1414+
},
1415+
{
1416+
"cell_type": "code",
1417+
"execution_count": null,
1418+
"metadata": {},
1419+
"outputs": [],
1420+
"source": [
1421+
"def tsfeatures_wide(ts: pd.DataFrame,\n",
1422+
" features: List[Callable] = [acf_features, arch_stat, crossing_points,\n",
1423+
" entropy, flat_spots, heterogeneity,\n",
1424+
" holt_parameters, lumpiness, nonlinearity,\n",
1425+
" pacf_features, stl_features, stability,\n",
1426+
" hw_parameters, unitroot_kpss, unitroot_pp,\n",
1427+
" series_length, hurst],\n",
1428+
" scale: bool = True,\n",
1429+
" threads: Optional[int] = None) -> pd.DataFrame:\n",
1430+
" \"\"\"Calculates features for time series.\n",
1431+
"\n",
1432+
" Parameters\n",
1433+
" ----------\n",
1434+
" ts: pandas df\n",
1435+
" Pandas DataFrame with columns ['unique_id', 'seasonality', 'y'].\n",
1436+
" Wide panel of time series.\n",
1437+
" features: iterable\n",
1438+
" Iterable of features functions.\n",
1439+
" scale: bool\n",
1440+
" Whether (mean-std)scale data.\n",
1441+
" threads: int\n",
1442+
" Number of threads to use. Use None (default) for parallel processing.\n",
1443+
"\n",
1444+
" Returns\n",
1445+
" -------\n",
1446+
" pandas df\n",
1447+
" Pandas DataFrame where each column is a feature and each row\n",
1448+
" a time series.\n",
1449+
" \"\"\"\n",
1450+
" partial_get_feats = partial(_get_feats_wide, scale=scale,\n",
1451+
" features=features)\n",
1452+
"\n",
1453+
" with Pool(threads) as pool:\n",
1454+
" ts_features = pool.starmap(partial_get_feats, ts.groupby('unique_id'))\n",
1455+
"\n",
1456+
" ts_features = pd.concat(ts_features).rename_axis('unique_id')\n",
1457+
" ts_features = ts_features.reset_index()\n",
1458+
"\n",
1459+
" return ts_features"
1460+
]
1461+
},
13371462
{
13381463
"cell_type": "code",
13391464
"execution_count": null,

setup.py

+88-22
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,94 @@
1+
import shlex
2+
from configparser import ConfigParser
3+
14
import setuptools
5+
from pkg_resources import parse_version
6+
7+
assert parse_version(setuptools.__version__) >= parse_version("36.2")
8+
9+
# note: all settings are in settings.ini; edit there, not here
10+
config = ConfigParser(delimiters=["="])
11+
config.read("settings.ini", encoding="utf-8")
12+
cfg = config["DEFAULT"]
13+
14+
cfg_keys = "version description keywords author author_email".split()
15+
expected = (
16+
cfg_keys
17+
+ "lib_name user branch license status min_python audience language".split()
18+
)
19+
for o in expected:
20+
assert o in cfg, "missing expected setting: {}".format(o)
21+
setup_cfg = {o: cfg[o] for o in cfg_keys}
222

3-
with open("README.md", "r") as fh:
4-
long_description = fh.read()
23+
licenses = {
24+
"apache2": (
25+
"Apache Software License 2.0",
26+
"OSI Approved :: Apache Software License",
27+
),
28+
"mit": ("MIT License", "OSI Approved :: MIT License"),
29+
"gpl2": (
30+
"GNU General Public License v2",
31+
"OSI Approved :: GNU General Public License v2 (GPLv2)",
32+
),
33+
"gpl3": (
34+
"GNU General Public License v3",
35+
"OSI Approved :: GNU General Public License v3 (GPLv3)",
36+
),
37+
"agpl3": (
38+
"GNU Affero General Public License v3",
39+
"OSI Approved :: GNU Affero General Public License (AGPLv3)",
40+
),
41+
"bsd3": ("BSD License", "OSI Approved :: BSD License"),
42+
}
43+
statuses = [
44+
"0 - Pre-Planning",
45+
"1 - Planning",
46+
"2 - Pre-Alpha",
47+
"3 - Alpha",
48+
"4 - Beta",
49+
"5 - Production/Stable",
50+
"6 - Mature",
51+
"7 - Inactive",
52+
]
53+
py_versions = "3.7 3.8 3.9 3.10 3.11".split()
54+
55+
requirements = shlex.split(cfg.get("requirements", ""))
56+
if cfg.get("pip_requirements"):
57+
requirements += shlex.split(cfg.get("pip_requirements", ""))
58+
min_python = cfg["min_python"]
59+
lic = licenses.get(cfg["license"].lower(), (cfg["license"], None))
60+
dev_requirements = (cfg.get("dev_requirements") or "").split()
61+
project_urls = {}
62+
if cfg.get("doc_host"):
63+
project_urls["Documentation"] = cfg["doc_host"] + cfg.get("doc_baseurl", "")
564

665
setuptools.setup(
7-
name="tsfeatures",
8-
version="0.4.5",
9-
description="Calculates various features from time series data.",
10-
long_description=long_description,
11-
long_description_content_type="text/markdown",
12-
url="https://github.com/Nixtla/tsfeatures",
13-
packages=setuptools.find_packages(),
66+
name=cfg["lib_name"],
67+
license=lic[0],
1468
classifiers=[
15-
"Programming Language :: Python :: 3",
16-
"License :: OSI Approved :: MIT License",
17-
"Operating System :: OS Independent",
18-
],
19-
python_requires=">=3.7",
20-
install_requires=[
21-
"antropy>=0.1.4",
22-
"arch>=4.11",
23-
"pandas>=1.0.5",
24-
"scikit-learn>=0.23.1",
25-
"statsmodels>=0.13.2",
26-
"supersmoother>=0.4",
27-
],
69+
"Development Status :: " + statuses[int(cfg["status"])],
70+
"Intended Audience :: " + cfg["audience"].title(),
71+
"Natural Language :: " + cfg["language"].title(),
72+
]
73+
+ [
74+
"Programming Language :: Python :: " + o
75+
for o in py_versions[py_versions.index(min_python) :]
76+
]
77+
+ (["License :: " + lic[1]] if lic[1] else []),
78+
url=cfg["git_url"],
79+
packages=setuptools.find_packages(),
80+
include_package_data=True,
81+
install_requires=requirements,
82+
extras_require={"dev": dev_requirements},
83+
dependency_links=cfg.get("dep_links", "").split(),
84+
python_requires=">=" + cfg["min_python"],
85+
long_description=open("README.md", encoding="utf8").read(),
86+
long_description_content_type="text/markdown",
87+
zip_safe=False,
88+
entry_points={
89+
"console_scripts": cfg.get("console_scripts", "").split(),
90+
"nbdev": [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d'],
91+
},
92+
project_urls=project_urls,
93+
**setup_cfg,
2894
)

0 commit comments

Comments
 (0)