This repository has been archived by the owner on Oct 4, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqcut_examples.py
104 lines (25 loc) · 1.6 KB
/
qcut_examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import pandas as pd
df = pd.read_csv('mountains.csv')
# (1) Print of DataFrame for context.
print(df)
# (2) This is the collumn we´ll be performing qcut today.
print(df["Height"])
# (3) Function call with the minimum parameters given (note that "labels", "precision", "retbins" and "duplicates"
# are all set to the standard values and only serve the demonstration.)
hight_qcut_basic = pd.qcut(x=df["Height"], q=5, labels=None, precision=3, retbins=False, duplicates="raise")
print(hight_qcut_basic)
# (4) Now with "precision = 1" and "retbins = True" which will resolve in rounding adjustments for the bins
# Also the bins (boundaries) now gat returned in an array.
hight_qcut_precision_retbins = pd.qcut(x=df["Height"], q=5, labels=None, precision=2, retbins=True, duplicates="raise")
print(hight_qcut_precision_retbins)
# Example with "labels = False", so that bins are labeld with ints.
hight_qcut_labels_false = pd.qcut(x=df["Height"], q=5, labels=False, precision=2, retbins=True, duplicates="raise")
print(hight_qcut_labels_false)
# (6) Introcucing custom labels in form of an array whichs length corresponds to the number of quantiles.
hight_labels = ["Boring", "Modest", "Medium", "High", "Super High"]
hight_qcut_labels = pd.qcut(x=df["Height"], q=len(hight_labels), labels=hight_labels, precision=2, retbins=False, duplicates="raise")
print(hight_qcut_labels)
# 7 Allows quick overview based on performed cuts.
print(hight_qcut_labels.value_counts())
# (8) Generated labels merged into original DataFrame.
df["Height_qcut"] = hight_qcut_labels