Skip to content

Commit

Permalink
a
Browse files Browse the repository at this point in the history
  • Loading branch information
kotaNakm committed Aug 19, 2023
1 parent 1169288 commit a93af35
Show file tree
Hide file tree
Showing 12 changed files with 6,160 additions and 1,022 deletions.
1 change: 1 addition & 0 deletions datasets/alpi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def create_params_list(data_path, params, verbose=True):

# PHASE 1


# current_offset must be an integer and it must indicate minutes
def generate_dataset_by_serial_offset(data, params, current_offset):
data["current_offset"] = current_offset
Expand Down
1 change: 1 addition & 0 deletions datasets/alpi/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def create_params_list(data_path, params, verbose=True):

# PHASE 1


# current_offset must be an integer and it must indicate minutes
def generate_dataset_by_serial_offset(data, params, current_offset):
data["current_offset"] = current_offset
Expand Down
3 changes: 0 additions & 3 deletions datasets/cbm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@


def parse_feature_names(fn):

with open(fn) as f:
names, lines = [], f.readlines()
for line in lines:
Expand All @@ -17,7 +16,6 @@ def parse_feature_names(fn):


def load_data(shorten_feature_names=True):

fp = os.path.dirname(__file__)
raw_data = np.loadtxt(fp + "/data.txt.gz")
features = parse_feature_names(fp + "/Features.txt")
Expand Down Expand Up @@ -51,7 +49,6 @@ def load_clean_data():


def gen_summary(wd=400, outdir=None):

if outdir is None:
outdir = os.path.dirname(__file__)

Expand Down
44 changes: 33 additions & 11 deletions datasets/cmapss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
import pandas as pd
import tqdm

# from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

Expand Down Expand Up @@ -86,13 +87,21 @@ def load_clean_data_rul(
):
train, test, labels = load_mesurement_list(index=index, features=features)
label = "RUL"
class_label_name = "Class"
if index=="FD001" or index=="FD002":
class_=1
elif index=="FD003" or index=="FD004":
class_=2
else:
class_=0

# train
train_df_list = []
for tt in train:
max_rul = len(tt)
rul_array = np.array(range(max_rul))[::-1]
tt[label] = rul_array
tt[class_label_name] = class_
train_df_list.append(tt)

# test
Expand All @@ -102,38 +111,55 @@ def load_clean_data_rul(
rul_array = np.array(range(max_rul))[::-1]
rul_array += int(la)
tt[label] = rul_array
tt[class_label_name] = class_
test_df_list.append(tt)

return train_df_list, test_df_list


def load_clean_data_rul_k_folds(
split_ind,
indices=["FD004",],
indices=[
"FD004",
],
k=5,
features=[2, 3, 4, 7, 11, 12, 15],
random_state=0,
use_test=True,
val=0.1,
):
df_list = []
for index in indices:
train_df_list, test_df_list = load_clean_data_rul(index=index, features=features,)
train_df_list, test_df_list = load_clean_data_rul(
index=index,
features=features,
)
df_list.extend(train_df_list)
df_list.extend(test_df_list)

if use_test:
df_list.extend(test_df_list)

data_index = range(len(df_list))

kf = KFold(
n_splits=k,
random_state=random_state,
shuffle=True,)
shuffle=True,
)

train_idx,test_idx = list(kf.split(data_index))[split_ind]
train_idx, test_idx = list(kf.split(data_index))[split_ind]
len(train_idx)
print(f"all index:{len(data_index)}")
print(train_idx)
print(test_idx)

new_train_df_list = [df_list[i] for i in train_idx]
new_test_df_list = [df_list[i] for i in test_idx]

return new_train_df_list, new_test_df_list




def load_mesurement_list(
index="FD004",
features=[2, 3, 4, 7, 11, 12, 15],
Expand Down Expand Up @@ -168,7 +194,6 @@ def load_mesurement_list(


def run_to_failure_aux(df, lifetime, unit_number):

assert lifetime <= df.shape[0]
broken = 0 if lifetime < df.shape[0] else 1
sample = pd.DataFrame(
Expand All @@ -187,7 +212,6 @@ def run_to_failure_aux(df, lifetime, unit_number):


def censoring_augmentation(raw_data, n_samples=10, seed=123):

np.random.seed(seed)
datasets = [g for _, g in raw_data.groupby("unit_number")]
timeseries = raw_data.groupby("unit_number").size()
Expand All @@ -206,7 +230,6 @@ def censoring_augmentation(raw_data, n_samples=10, seed=123):


def generate_run_to_failure(df, health_censor_aug=0, seed=123):

samples = []
for unit_id, timeseries in tqdm.tqdm(df.groupby("unit_number"), desc="RUL"):
samples.append(run_to_failure_aux(timeseries, timeseries.shape[0], unit_id))
Expand All @@ -227,7 +250,6 @@ def leave_one_out(
input_fn=None,
output_fn=None,
):

if input_fn is not None:
subsets = pd.read_csv(input_fn)

Expand Down
3 changes: 0 additions & 3 deletions datasets/gdd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@


def load_data(index="state"):

assert index in ["state", "anomaly", "normal", "linear", "pressure"]
fp = os.path.dirname(__file__)

Expand Down Expand Up @@ -163,15 +162,13 @@ def plot_genesis_nonlabels(df, figsize=(15, 20), cmap="tab10"):


def gen_summary(outdir=None):

if outdir is None:
outdir = os.path.dirname(__file__)

os.makedirs(outdir, exist_ok=True)
sns.set(font_scale=1.1, style="whitegrid")

with PdfPages(outdir + "/gdd_summary.pdf") as pp:

print("Plotting Genesis_StateMachineLabel...")
df = load_data(index="state")
fig, _ = plot_genesis_labels(df)
Expand Down
1 change: 0 additions & 1 deletion datasets/gfd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def plot_sequence(df, st=0, ed=None, ax=None, figsize=(10, 3), individual=True):


def gen_summary(outdir=None, st=0, ed=500, wd=20, hg=8):

if outdir is None:
outdir = os.path.dirname(__file__)

Expand Down
2 changes: 0 additions & 2 deletions datasets/hydsys/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@


def load_data(sensor=None, rw=0):

if sensor is None:
# load full data
# rw is ignored to concatenate all sensor data
Expand All @@ -21,7 +20,6 @@ def load_data(sensor=None, rw=0):


def load_sensor_data(sensor, rw=0):

data = []
sensor_list = get_sensor_list(sensor)
fp = os.path.dirname(__file__)
Expand Down
Loading

0 comments on commit a93af35

Please sign in to comment.