-
Notifications
You must be signed in to change notification settings - Fork 0
/
todo.yml
164 lines (140 loc) · 6.11 KB
/
todo.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#
# JBZoo Toolbox - Csv-Blueprint.
#
# This file is part of the JBZoo Toolbox project.
# For the full copyright and license information, please view the LICENSE
# file that was distributed with this source code.
#
# @license MIT
# @copyright Copyright (C) JBZoo.com, All rights reserved.
# @see https://github.com/JBZoo/Csv-Blueprint
#
# File contains just ideas. It's invalid!
csv: # How to parse file before validation
auto_detect: false # If true, then the control chars will be detected automatically.
empty_values: # List of values that will be treated as empty
- "" # By default, only empty string is treated as empty (string length = 0).
- null
- none
- empty
- nil
structural_rules:
file_ends_with_newline: false # If true, then the file must end with a newline character (\n).
allow_blank_lines: true # If true, then the file can contain blank lines.
columns_count_min: ~ # Minimum number of columns in the file. By default, it is equal to the number of columns in the schema.
columns_count: ~ # Exact number of columns in the file. By default, it is equal to the number of columns in the schema.
columns_count_max: ~ # Minimum number of columns in the file. By default, it is equal to the number of columns in the schema.
ignore_duplicate_rows: false # If true, then duplicate rows will be ignored. Duplicate rows are rows that have the same values in all columns - 100% match.
allow_duplicate_column_names: false # Allow duplicate rows in the CSV file for different columns.
columns:
- empty_values: [ '' ] # Override csv.empty_values. List of values that will be treated as empty.
# Multi prop
multiple: true
multiple_separator: "|" # Separator for multiple values
faker: [ faker_method arg1 arg2 ] # Faker method with arguments to generate random CSV data
rules:
is_null: true # see csv.empty_values and column.empty_values
_list: true # Example: starts_with_list: [ 'a', 'b', 'c' ]
# File system
is_filename: true
is_dirname: true
is_realtive_path: true
is_scientific_notation: true
is_positive: true
is_positive_zero: true
is_negative: true
is_negative_zero: true
soft_precision: "1.1 == 1.10"
# https://stackoverflow.com/questions/69107743/regular-expression-for-wkt-polygon: true
is_unicode: true
is_ascii: true
is_latin: true
coordinates: "1.0, -2.0"
# identifier
is_bsn: true # Validates a Dutch citizen service number (BSN).
is_cnh: true # Validates a Brazilian national health card number (CNH).
is_cnpj: true # Validates a Brazilian company identifier (CNPJ).
is_cpf: true # Validates a Brazilian individual taxpayer identifier (CPF).
is_nfe_access_key: true # Validates a Brazilian Nota Fiscal Eletronica (NFe) access key.
is_pis: true # Validates a Brazilian individual social security number (PIS).
is_hetu: true # Validates a Finnish personal identity code (HETU).
is_nip: true # Validates a Polish taxpayer identification number (NIP).
is_pesel: true # Validates a Polish national identification number (PESEL).
is_polish_id_card: true # Validates a Polish identity card number.
is_portuguese_nif: true # Validates a Portuguese taxpayer number (NIF).
# Logical OR for group of rules. If one of the rules is true, then the column is valid.
group_or:
- not_empty: true
is_int: true
- length_min: 3
# Custom functions for validation
custom_func_1: 'static fn (string $cellValue): bool => $cellValue !== "";' # eval????
custom_func_2: '\My\Custom\Class::myMethod'
custom_func_3: 'myFunction'
# Combination of rules to make it easier to read
custom_some_rule:
- is_int: true
- length_min: 3
aggregate_rules:
# https://github.com/markrogoyski/math-php#statistics---averages
truncated_mean: [ 1, 25 ] # 25 percent of observations trimmed from each end of distribution
generalized_mean: [ 1, 2 ] # p-power mean
power_mean: [ 1, 2 ] # p-power mean
lehmer_mean: [ 1, 3 ] # p-power mean
simple_moving_average: [ 1, n ] # SMA
cumulative_moving_average: 1 # CMA
exponential_moving_average: 1 # EPA
# Logical OR for group of rules. If one of the rules is true, then the column is valid.
group_or:
- is_unique: true
sorted: [ asc, natural ]
- sum_min: 1.0
sum_greater: 2.0
# Custom function for validation
custom_func_1: 'static fn (array $cellValue): bool => $cellValue !== [];' # eval????
custom_func_2: 'My\Custom\Class::myMethod'
custom_func_3: 'myFunction'
complex_rules:
- sum_by_group:
group_column: City
sum_column: population
sums:
- [ New York, 10000 ]
- [ Los Angeles, 20000 ]
- count_by_group:
group_column: City
value: New York
count: 10
- handler: 'static fn (string $colum0, string $column1): bool => $colum0 === $colum1;' # eval????
handler_args:
- column:0
- column:1
- handler: '\My\Complex\Rule::myMethod'
handler_args:
- column:0
- column:1
- handler: 'myFunction'
handler_args:
- column:0
- column:1
# Logical OR for group of rules. If one of the rules is true, then the column is valid.
- group_or:
- count_by_group:
group_column: City
value: New York
count: 10
- sum_by_group:
group_column: City
sum_column: population
sums:
- [ New York, 10000 ]
- [ Los Angeles, 20000 ]
analyser:
rules:
other:
- language_code:
- alpha-2
- alpha-3
- credit_card:
- all
- by_brand