Skip to content

Commit

Permalink
edit
Browse files Browse the repository at this point in the history
  • Loading branch information
kotaNakm committed Jun 16, 2023
1 parent aad3c0c commit 1169288
Show file tree
Hide file tree
Showing 11 changed files with 754 additions and 526 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ Note that RUL means remaining useful life.
| | Timestamp | #Sensor | #Alarm | RUL | License |
| :--- | :--: | :--: | :--: | :--: | :--- |
| ALPI* | x | | 140 | | CC-BY |
| CBM | x | 15 | 3 | | Other |
| CBM | x | 16 | 3 | | Other |
| CMAPSS | x | 26 | 2-6 | x | CC0: Public Domain |
| GDD | x | 5(1) | 3 | | CC-BY-NC-SA |
| GFD | x | 4 | 2 | | CC-BY-SA |
| HydSys* | x | 17 | 2-4 | | Other |
| MAPM* | x | 4 | 5 | x | Other |
| PPD | x | x | | x | CC-BY-SA |
| PPD | x | 25 | | x | CC-BY-SA |
| UFD | | 37-52 | 4 | | Other |

</center>
Expand Down
1 change: 0 additions & 1 deletion datasets/cmapss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@


def load_data(index="FD004", features=None):

if type(index) == str:
assert index in ["FD001", "FD002", "FD003", "FD004"]
elif type(index) == int:
Expand Down
130 changes: 127 additions & 3 deletions datasets/mapm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import os
import tqdm
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from collections import Counter
from sklearn import model_selection
from sklearn.model_selection import KFold



sensors=["volt","rotate","pressure","vibration"]

def load_data():
fp = os.path.dirname(__file__)

Expand Down Expand Up @@ -41,8 +45,8 @@ def cleaning(df):
df.failure = df.failure.factorize()[0]
df = df.sort_values(["machineID", "datetime"])

df.errorID = df.errorID.astype("category")
df.failure = df.failure.astype("category")
df.errorID = df.errorID.astype("int")
df.failure = df.failure.astype("int")

df.volt = df.volt.astype("float32")
df.rotate = df.rotate.astype("float32")
Expand Down Expand Up @@ -283,3 +287,123 @@ def gen_summary(outdir=None):
fig.savefig(pp, format="pdf")
plt.clf()
plt.close()


def load_failure_sequences_list(dim=sensors):
"""
Returns
-------
run-to-failure sequences [# of seq, [dim, time]]
failure labels [# of seq]
"""

clean_df =load_clean_data()
sequence_df_list = []
failure_list = []

# source_df = pd.DataFrame(colums=["seq_id","machine_id"])

clean_df["seq_id"] = 0
for machine_id, m_df in tqdm(clean_df.groupby("machineID"),desc="Segment each machine data"):
# sort
m_df = m_df.sort_values("datetime")

# segment & set seq_id
failures_index = m_df["failure"][m_df["failure"]>-1].index
failures_values = m_df["failure"][m_df["failure"]>-1].values

for ind in failures_index:
m_df.loc[ind:,"seq_id"] +=1

for (seq_id, seq_df), f_val in zip(m_df.groupby("seq_id"),failures_values):
sequence_df_list.append(seq_df.sort_values("datetime").reset_index(drop=True).loc[:,dim])
failure_list.append(f_val)

return sequence_df_list, failure_list

def load_clean_data_rul_k_folds(
split_ind,
k=5,
random_state=0,
):

df_list = add_rul(*refine_data(*load_failure_sequences_list()))

data_index = range(len(df_list))

kf = KFold(
n_splits=k,
random_state=random_state,
shuffle=True,)

train_idx,test_idx = list(kf.split(data_index))[split_ind]

train_df_list = [df_list[i] for i in train_idx]
test_df_list = [df_list[i] for i in test_idx]

return train_df_list, test_df_list


def refine_data(sequence_df_list, failure_list, event_type="only",min_len=100):
"""
refine_event: This data contain some sequences with complex/mulitple failue
if "only",
use sequences with only a failue, remove sequences with mulitple failue
elif "all"
use all sequences, and regard complex failues as a new types of failure
"""

num_seq = len(failure_list)
length_arr = np.array([len(ss) for ss in sequence_df_list])
complex_ind = np.arange(num_seq)[length_arr<=1]

# whether all complex failues contains two types of failues or not
assert not np.sum(np.diff(complex_ind)<2), "complex failue events contain three or more failues"

# remove complex_ind
apply_arr = length_arr>1

if event_type=="only":
# remove complex_ind-1
for c_id in complex_ind:
apply_arr[c_id-1] = False

elif event_type=="all":
NotImplementedError
# check combination and define new type failures
# complex_val = np.array(failure_list)[complex_ind]

# # set new failure
# for c_id,n_f in zip(complex_ind,new_failures):
# failure_list[c_id-1] = n_f
else:
NotImplementedError

# remove sequences that not contain min_len values
apply_length_arr = length_arr>min_len
apply_arr &= apply_length_arr

refined_sequence_df_list=[]
refined_failure_list=[]
for a, seq, failure in zip(apply_arr,sequence_df_list, failure_list):
if a:
refined_sequence_df_list.append(seq)
refined_failure_list.append(failure)

return refined_sequence_df_list,refined_failure_list


def add_rul(sequence_df_list, failure_list):

label_name = "RUL"
class_label_name="Class"
#
df_list = []
for seq_df, class_label in zip(sequence_df_list, failure_list):
seq_df[class_label_name] = class_label
rul_array = np.arange(len(seq_df))[::-1]
seq_df[label_name] = rul_array
df_list.append(seq_df)

return df_list
18 changes: 15 additions & 3 deletions datasets/mapm/view_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1589,7 +1589,7 @@
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
},
"kernelspec": {
"display_name": "Python 3.6.9 64-bit",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -1603,9 +1603,21 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.11"
"version": "3.9.13"
},
"orig_nbformat": 4
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
Expand Down
211 changes: 91 additions & 120 deletions notebooks/view_alpi.ipynb

Large diffs are not rendered by default.

184 changes: 109 additions & 75 deletions notebooks/view_cbm.ipynb

Large diffs are not rendered by default.

19 changes: 16 additions & 3 deletions notebooks/view_gdd.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1238,7 +1238,8 @@
"hash": "a79856092f03529f78cf72db1144171d864473d6b010bd73997d0bc77ba1f76e"
},
"kernelspec": {
"display_name": "Python 3.9.5 64-bit ('poissonprocess': conda)",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
Expand All @@ -1251,9 +1252,21 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
"version": "3.9.13"
},
"orig_nbformat": 4
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
Expand Down
80 changes: 62 additions & 18 deletions notebooks/view_hydsys.ipynb

Large diffs are not rendered by default.

19 changes: 16 additions & 3 deletions notebooks/view_mapm.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1413,7 +1413,8 @@
"hash": "16dbba425ba0731f581592bff1ceb33a70db2cdf9052cbf447b98983e28f2ced"
},
"kernelspec": {
"display_name": "Python 3.9.7 64-bit ('survival': conda)",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
Expand All @@ -1426,9 +1427,21 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.9.13"
},
"orig_nbformat": 4
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
Expand Down
19 changes: 16 additions & 3 deletions notebooks/view_ppd.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,8 @@
"hash": "6b7d4e3645d79f0dd99ff0b8fd94fe18257c4e26166458b56db964f0fb1abec2"
},
"kernelspec": {
"display_name": "Python 3.7.4 64-bit ('base': conda)",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
Expand All @@ -667,9 +668,21 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
"version": "3.9.13"
},
"orig_nbformat": 4
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
Expand Down
Loading

0 comments on commit 1169288

Please sign in to comment.