Skip to content

Commit 4545841

Browse files
taylorfturnerJGSweetsgliptakSchadtJ
authored
staging/main/0.12.0 (#1145)
* refactor: Upgrade the models to use keras 3.0 (#1138) * Replace snappy with cramjam (#1091) * add downloads tile (#1085) * Replace snappy with cramjam * Delete test_no_snappy --------- Co-authored-by: Taylor Turner <[email protected]> * pre-commit fix (#1122) * Bug fix for float precision calculation using categorical data with trailing zeros. (#1125) * Revert "Bug fix for float precision calculation using categorical data with t…" (#1133) This reverts commit d3159bd. * refactor: move layers outside of class * refactor: update model to keras 3.0 * fix: manifest * fix: bugs in compile and train * fix: bug in load_from_library * fix: bugs in CharCNN * refactor: loading tf model labeler * fix: bug in data_labeler identification * fix: update model to use proper softmax layer names * fix: formatting * fix: remove unused line * refactor: drop support for 3.8 * fix: comments * fix: comment --------- Co-authored-by: Gábor Lipták <[email protected]> Co-authored-by: Taylor Turner <[email protected]> Co-authored-by: James Schadt <[email protected]> * Fix Tox (#1143) * tox new * update * update * update * update * update * update * update * update tox.ini * update * update * remove docs * empty retrigger * update (#1146) * bump version * update 3.11 * remove dist/ --------- Co-authored-by: JGSweets <[email protected]> Co-authored-by: Gábor Lipták <[email protected]> Co-authored-by: James Schadt <[email protected]>
1 parent a448694 commit 4545841

25 files changed

+247
-243
lines changed

.github/workflows/publish-python-package.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
- name: Set up Python
2121
uses: actions/setup-python@v5
2222
with:
23-
python-version: '3.10'
23+
python-version: '3.11'
2424
- name: Install dependencies
2525
run: |
2626
python -m pip install --upgrade pip

.github/workflows/test-python-package.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
runs-on: ubuntu-latest
1717
strategy:
1818
matrix:
19-
python-version: [3.8, 3.9, "3.10"]
19+
python-version: [3.9, "3.10", "3.11"]
2020

2121
steps:
2222
- uses: actions/checkout@v4

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
global-exclude .DS_Store
2+
global-exclude */__pycache__/*
23

34
include *.txt
45
include CODEOWNERS

dataprofiler/labelers/char_load_tf_model.py

+26-15
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,8 @@ def _construct_model(self) -> None:
237237
model_loc = self._parameters["model_path"]
238238

239239
self._model: tf.keras.Model = tf.keras.models.load_model(model_loc)
240-
softmax_output_layer_name = self._model.outputs[0].name.split("/")[0]
240+
self._model = tf.keras.Model(self._model.inputs, self._model.outputs)
241+
softmax_output_layer_name = self._model.output_names[0]
241242
softmax_layer_ind = cast(
242243
int,
243244
labeler_utils.get_tf_layer_index_from_name(
@@ -252,21 +253,28 @@ def _construct_model(self) -> None:
252253
num_labels, activation="softmax", name="softmax_output"
253254
)(self._model.layers[softmax_layer_ind - 1].output)
254255

255-
# Output the model into a .pb file for TensorFlow
256-
argmax_layer = tf.keras.backend.argmax(new_softmax_layer)
256+
# Add argmax layer to get labels directly as an output
257+
argmax_layer = tf.keras.ops.argmax(new_softmax_layer, axis=2)
257258

258259
argmax_outputs = [new_softmax_layer, argmax_layer]
259260
self._model = tf.keras.Model(self._model.inputs, argmax_outputs)
261+
self._model = tf.keras.Model(self._model.inputs, self._model.outputs)
260262

261263
# Compile the model w/ metrics
262-
softmax_output_layer_name = self._model.outputs[0].name.split("/")[0]
264+
softmax_output_layer_name = self._model.output_names[0]
263265
losses = {softmax_output_layer_name: "categorical_crossentropy"}
264266

265267
# use f1 score metric
266268
f1_score_training = labeler_utils.F1Score(
267269
num_classes=num_labels, average="micro"
268270
)
269-
metrics = {softmax_output_layer_name: ["acc", f1_score_training]}
271+
metrics = {
272+
softmax_output_layer_name: [
273+
"categorical_crossentropy",
274+
"acc",
275+
f1_score_training,
276+
]
277+
}
270278

271279
self._model.compile(loss=losses, optimizer="adam", metrics=metrics)
272280

@@ -294,30 +302,33 @@ def _reconstruct_model(self) -> None:
294302
num_labels = self.num_labels
295303
default_ind = self.label_mapping[self._parameters["default_label"]]
296304

297-
# Remove the 2 output layers ('softmax', 'tf_op_layer_ArgMax')
298-
for _ in range(2):
299-
self._model.layers.pop()
300-
301305
# Add the final Softmax layer to the previous spot
306+
# self._model.layers[-2] to skip: original softmax
302307
final_softmax_layer = tf.keras.layers.Dense(
303308
num_labels, activation="softmax", name="softmax_output"
304-
)(self._model.layers[-4].output)
309+
)(self._model.layers[-2].output)
305310

306-
# Output the model into a .pb file for TensorFlow
307-
argmax_layer = tf.keras.backend.argmax(final_softmax_layer)
311+
# Add argmax layer to get labels directly as an output
312+
argmax_layer = tf.keras.ops.argmax(final_softmax_layer, axis=2)
308313

309314
argmax_outputs = [final_softmax_layer, argmax_layer]
310315
self._model = tf.keras.Model(self._model.inputs, argmax_outputs)
311316

312317
# Compile the model
313-
softmax_output_layer_name = self._model.outputs[0].name.split("/")[0]
318+
softmax_output_layer_name = self._model.output_names[0]
314319
losses = {softmax_output_layer_name: "categorical_crossentropy"}
315320

316321
# use f1 score metric
317322
f1_score_training = labeler_utils.F1Score(
318323
num_classes=num_labels, average="micro"
319324
)
320-
metrics = {softmax_output_layer_name: ["acc", f1_score_training]}
325+
metrics = {
326+
softmax_output_layer_name: [
327+
"categorical_crossentropy",
328+
"acc",
329+
f1_score_training,
330+
]
331+
}
321332

322333
self._model.compile(loss=losses, optimizer="adam", metrics=metrics)
323334

@@ -370,7 +381,7 @@ def fit(
370381
f1_report: dict = {}
371382

372383
self._model.reset_metrics()
373-
softmax_output_layer_name = self._model.outputs[0].name.split("/")[0]
384+
softmax_output_layer_name = self._model.output_names[0]
374385

375386
start_time = time.time()
376387
batch_id = 0

0 commit comments

Comments
 (0)