Skip to content

Commit 92eb084

Browse files
committed
ci(check-fixtures): parallelise fixture checking commands
Introduces several fixture-checking commands to the `run-tests.sh` checking script (`--check-jsonlint`, `--check-record-ids`, `--check-record-dois`, `--check-docs-slugs`, `--check-secondary-typeS`, `--check-trailing-whitespace`) in order to make the lengthy `--check-fixtures` process paralellisable. Removes `--check-pycodestyle` command since we are using the Black code formatter. Amends CI accordingly, and renames formatters and linters for clarity. Amends `.editorconfig` to add rules for shell scripts and to remove ReST file rules as we have switched to Markdown. BREAKING CHANGE: Drops `run-tests.sh --check-fixtures` and `run-tests.sh --check-pycodestyle` commands.
1 parent 952b104 commit 92eb084

File tree

4 files changed

+151
-106
lines changed

4 files changed

+151
-106
lines changed

Diff for: .editorconfig

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
22
#
33
# This file is part of CERN Open Data Portal.
4-
# Copyright (C) 2016 CERN.
4+
# Copyright (C) 2016, 2025 CERN.
55
#
66
# CERN Open Data Portal is free software; you can redistribute it
77
# and/or modify it under the terms of the GNU General Public License as
@@ -40,8 +40,8 @@ known_first_party = cernopendata
4040
multi_line_output = 2
4141
default_section = THIRDPARTY
4242

43-
# RST files (used by sphinx)
44-
[*.rst]
43+
# Shell script files
44+
[*.sh]
4545
indent_size = 4
4646

4747
# CSS, HTML, JS, JSON, YML

Diff for: .github/workflows/ci.yml

+80-40
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# This file is part of CERN Open Data Portal.
2-
# Copyright (C) 2020, 2023, 2024 CERN.
2+
# Copyright (C) 2020, 2023, 2024, 2025 CERN.
33
#
44
# CERN Open Data Portal is free software; you can redistribute it
55
# and/or modify it under the terms of the GNU General Public License as
@@ -20,23 +20,12 @@
2020
# waive the privileges and immunities granted to it by virtue of its status
2121
# as an Intergovernmental Organization or submit itself to any jurisdiction.
2222

23-
name: CI
23+
name: ci
2424

2525
on: [push, pull_request]
2626

2727
jobs:
28-
lint-shellcheck:
29-
runs-on: ubuntu-20.04
30-
steps:
31-
- name: Checkout
32-
uses: actions/checkout@v2
33-
34-
- name: Runs shell script static analysis
35-
run: |
36-
sudo apt-get install shellcheck
37-
./run-tests.sh --check-shellscript
38-
39-
lint-black:
28+
format-black:
4029
runs-on: ubuntu-20.04
4130
steps:
4231
- uses: actions/checkout@v2
@@ -51,7 +40,7 @@ jobs:
5140
pip install black
5241
./run-tests.sh --check-black
5342
54-
lint-pycodestyle:
43+
format-isort:
5544
runs-on: ubuntu-20.04
5645
steps:
5746
- name: Checkout
@@ -62,11 +51,25 @@ jobs:
6251
with:
6352
python-version: 3.9
6453

65-
- name: Check compliance with Python coding style conventions
54+
- name: Check isort
6655
run: |
6756
pip install --upgrade pip
68-
pip install pycodestyle
69-
./run-tests.sh --check-pycodestyle
57+
pip install isort
58+
./run-tests.sh --check-isort
59+
60+
lint-jsonlint:
61+
runs-on: ubuntu-24.04
62+
steps:
63+
- name: Checkout
64+
uses: actions/checkout@v4
65+
66+
- name: Setup Node
67+
uses: actions/setup-node@v4
68+
69+
- name: Lint JSON files
70+
run: |
71+
npm install jsonlint --global
72+
./run-tests.sh --check-jsonlint
7073
7174
lint-pydocstyle:
7275
runs-on: ubuntu-20.04
@@ -85,39 +88,28 @@ jobs:
8588
pip install pydocstyle
8689
./run-tests.sh --check-pydocstyle
8790
88-
check-fixtures:
91+
lint-shellcheck:
8992
runs-on: ubuntu-20.04
9093
steps:
9194
- name: Checkout
9295
uses: actions/checkout@v2
9396

94-
- name: Setup node
95-
uses: actions/setup-node@v1
96-
with:
97-
node-version: "14"
98-
99-
- name: Install Node dependencies
100-
run: npm install -g jsonlint
101-
102-
- name: Check fixtures
103-
run: ./run-tests.sh --check-fixtures
97+
- name: Runs shell script static analysis
98+
run: |
99+
sudo apt-get install shellcheck
100+
./run-tests.sh --check-shellcheck
104101
105-
check-isort:
106-
runs-on: ubuntu-20.04
102+
check-docs-slugs:
103+
runs-on: ubuntu-24.04
107104
steps:
108105
- name: Checkout
109106
uses: actions/checkout@v2
110107

111-
- name: Setup Python
112-
uses: actions/setup-python@v2
113-
with:
114-
python-version: 3.9
108+
- name: Setup node
109+
uses: actions/setup-node@v4
115110

116-
- name: Check isort
117-
run: |
118-
pip install --upgrade pip
119-
pip install isort
120-
./run-tests.sh --check-isort
111+
- name: Check docs slugs
112+
run: ./run-tests.sh --check-docs-slugs
121113

122114
check-licenses:
123115
runs-on: ubuntu-20.04
@@ -134,3 +126,51 @@ jobs:
134126
run: |
135127
pip install --upgrade pip
136128
./run-tests.sh --check-licenses
129+
130+
check-record-dois:
131+
runs-on: ubuntu-24.04
132+
steps:
133+
- name: Checkout
134+
uses: actions/checkout@v2
135+
136+
- name: Setup node
137+
uses: actions/setup-node@v4
138+
139+
- name: Check record DOIs
140+
run: ./run-tests.sh --check-record-dois
141+
142+
check-record-ids:
143+
runs-on: ubuntu-24.04
144+
steps:
145+
- name: Checkout
146+
uses: actions/checkout@v2
147+
148+
- name: Setup node
149+
uses: actions/setup-node@v4
150+
151+
- name: Check record IDs
152+
run: ./run-tests.sh --check-record-ids
153+
154+
check-secondary-types:
155+
runs-on: ubuntu-24.04
156+
steps:
157+
- name: Checkout
158+
uses: actions/checkout@v2
159+
160+
- name: Setup node
161+
uses: actions/setup-node@v4
162+
163+
- name: Check docs slugs
164+
run: ./run-tests.sh --check-secondary-types
165+
166+
check-trailing-whitespace:
167+
runs-on: ubuntu-24.04
168+
steps:
169+
- name: Checkout
170+
uses: actions/checkout@v2
171+
172+
- name: Setup node
173+
uses: actions/setup-node@v4
174+
175+
- name: Check docs slugs
176+
run: ./run-tests.sh --check-trailing-whitespace

Diff for: run-tests.sh

+67-62
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env bash
22
#
33
# This file is part of CERN Open Data Portal.
4-
# Copyright (C) 2015, 2016, 2017, 2018, 2019, 2020, 2024 CERN.
4+
# Copyright (C) 2015, 2016, 2017, 2018, 2019, 2020, 2024, 2025 CERN.
55
#
66
# CERN Open Data Portal is free software; you can redistribute it
77
# and/or modify it under the terms of the GNU General Public License as
@@ -22,62 +22,58 @@
2222
# waive the privileges and immunities granted to it by virtue of its status
2323
# as an Intergovernmental Organization or submit itself to any jurisdiction.
2424

25-
# quit on errors and potentially unbound symbols:
2625
set -o errexit
2726
set -o nounset
2827

29-
check_script () {
30-
shellcheck run-tests.sh
31-
}
32-
33-
check_black () {
28+
check_black() {
3429
black --check .
3530
}
3631

37-
38-
check_fixtures () {
39-
# check for possibly incorrect JSON files:
40-
find data/ -name "*.json" -exec jsonlint -q {} \;
41-
42-
# check record ID uniqueness:
43-
dupes=$(jq '.[].recid' data/{records,skeletons}/*.json | sort | uniq -d)
32+
check_docs_slugs() {
33+
# shellcheck disable=SC2044
34+
dupes=$(for file in $(find data/docs -name "*.json"); do jq '.[].slug' "$file"; done | sort | grep -v null | uniq -d)
4435
if [ "x${dupes}" != "x" ]; then
45-
echo "[ERROR] Found duplicate record IDs:"
36+
echo "[ERROR] Found duplicate docs slugs:"
4637
echo "${dupes}"
4738
exit 1
4839
fi
40+
}
41+
42+
check_isort() {
43+
isort -rc -c -df --profile black -- **/*.py
44+
}
45+
46+
check_jsonlint() {
47+
find . -name "*.json" -exec jsonlint -q {} \+
48+
}
49+
50+
check_licenses() {
51+
scripts/check_licenses.py
52+
}
4953

50-
# check DOI uniqueness:
54+
check_pydocstyle() {
55+
pydocstyle scripts
56+
}
57+
58+
check_record_dois() {
5159
dupes=$(jq '.[].doi' data/{records,skeletons}/*.json | sort | grep -v null | uniq -d)
5260
if [ "x${dupes}" != "x" ]; then
5361
echo "[ERROR] Found duplicate record DOIs:"
5462
echo "${dupes}"
5563
exit 1
5664
fi
65+
}
5766

58-
# check docs slug uniqueness:
59-
# shellcheck disable=SC2044
60-
dupes=$(for file in $(find data/docs -name "*.json"); do jq '.[].slug' "$file"; done | sort | grep -v null | uniq -d)
67+
check_record_ids() {
68+
dupes=$(jq '.[].recid' data/{records,skeletons}/*.json | sort | uniq -d)
6169
if [ "x${dupes}" != "x" ]; then
62-
echo "[ERROR] Found duplicate docs slugs:"
70+
echo "[ERROR] Found duplicate record IDs:"
6371
echo "${dupes}"
6472
exit 1
6573
fi
74+
}
6675

67-
# check trailing whitespace:
68-
whitespace_found_p=0
69-
for file in $(git ls-files | grep -E '.(py|html|css|json|md|sh|txt|yml)$'); do
70-
if grep -q ' $' "$file"; then
71-
whitespace_found_p=1
72-
echo "[ERROR] Found trailing whitespace in ${file}."
73-
fi
74-
done
75-
76-
if [ "${whitespace_found_p}" != "0" ]; then
77-
exit 1
78-
fi
79-
80-
# check for empty secondary type in fixtures
76+
check_secondary_types() {
8177
# shellcheck disable=SC2044
8278
for file in $(find data/{records,docs}/ -name "*.json"); do
8379
secondaries=$(jq '.[].type.secondary' "$file" -c | sort | uniq)
@@ -87,46 +83,55 @@ check_fixtures () {
8783
done
8884
}
8985

90-
check_pycodestyle () {
91-
pycodestyle --max-line-length=120 scripts
92-
}
93-
94-
check_pydocstyle () {
95-
pydocstyle scripts
86+
check_shellcheck() {
87+
shellcheck run-tests.sh
9688
}
9789

98-
check_isort () {
99-
isort -rc -c -df --profile black -- **/*.py
100-
}
90+
check_trailing_whitespace() {
91+
whitespace_found_p=0
92+
for file in $(git ls-files | grep -E '.(py|html|css|json|md|sh|txt|yml)$'); do
93+
if grep -q ' $' "$file"; then
94+
whitespace_found_p=1
95+
echo "[ERROR] Found trailing whitespace in ${file}."
96+
fi
97+
done
10198

102-
check_licenses () {
103-
scripts/check_licenses.py
99+
if [ "${whitespace_found_p}" != "0" ]; then
100+
exit 1
101+
fi
104102
}
105103

106-
check_all () {
107-
check_script
108-
check_fixtures
109-
check_pycodestyle
104+
check_all() {
110105
check_black
111-
check_pydocstyle
106+
check_docs_slugs
112107
check_isort
108+
check_jsonlint
113109
check_licenses
110+
check_pydocstyle
111+
check_record_dois
112+
check_record_ids
113+
check_secondary_types
114+
check_shellcheck
115+
check_trailing_whitespace
114116
}
115117

116118
if [ $# -eq 0 ]; then
117119
check_all
118120
exit 0
119121
fi
120122

121-
for arg in "$@"
122-
do
123-
case $arg in
124-
--check-shellscript) check_script;;
125-
--check-fixtures) check_fixtures;;
126-
--check-pycodestyle) check_pycodestyle;;
127-
--check-pydocstyle) check_pydocstyle;;
128-
--check-isort) check_isort;;
129-
--check-licenses) check_licenses;;
130-
*)
131-
esac
132-
done
123+
arg="$1"
124+
case $arg in
125+
--check-black) check_black ;;
126+
--check-docs-slugs) check_docs_slugs ;;
127+
--check-isort) check_isort ;;
128+
--check-jsonlint) check_jsonlint ;;
129+
--check-licenses) check_licenses ;;
130+
--check-pydocstyle) check_pydocstyle ;;
131+
--check-record-dois) check_record_dois ;;
132+
--check-record-ids) check_record_ids ;;
133+
--check-secondary-types) check_secondary_types ;;
134+
--check-shellcheck) check_shellcheck ;;
135+
--check-trailing-whitespace) check_trailing_whitespace ;;
136+
*) echo "[ERROR] Invalid argument '$arg'. Exiting." && exit 1 ;;
137+
esac

Diff for: scripts/split_json_file.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def split_json_file(filename, split):
2828
)
2929
print("[INFO] Creating file %s..." % filenamepart)
3030
split_content = json.dumps(
31-
records[split * i: split * (i + 1)],
31+
records[split * i : split * (i + 1)],
3232
indent=2,
3333
sort_keys=True,
3434
ensure_ascii=False,

0 commit comments

Comments
 (0)