CUNY-CL · kylebgorman · Jan 14, 2026 · Jan 14, 2026 · Jan 15, 2026 · Jan 15, 2026
diff --git a/README.md b/README.md
@@ -60,31 +60,29 @@ Dependencies project](https://universaldependencies.org/).
 
 UDTube can perform up to four morphological tasks simultaneously:
 
--   Lemmatization is performed using the `LEMMA` field and [edit
-    scripts](https://aclanthology.org/P14-2111/).
-
--   [Universal part-of-speech
-    tagging](https://universaldependencies.org/u/pos/index.html) is performed
-    using the `UPOS` field: enable with `data: use_upos: true`.
-
--   Language-specific part-of-speech tagging is performed using the `XPOS`
-    field: enable with `data: use_xpos: true`.
-
--   Morphological feature tagging is performed using the `FEATS` field: enable
-    with `data: use_feats: true`.
+- Lemmatization is performed using the `LEMMA` field and edit scripts.
+- [Universal part-of-speech
+  tagging](https://universaldependencies.org/u/pos/index.html) is performed
+  using the `UPOS` field.
+- Language-specific part-of-speech tagging is performed using the `XPOS` field.
+- Morphological feature tagging is performed using the `FEATS` field.
+- Dependency parsing is performed using the `HEAD` and `DEPREL` fields, a deep
+  biaffine parser, and minimum spanning tree decoding.
 
 The following caveats apply:
 
--   Note that many newer Universal Dependencies datasets do not have
-    language-specific part-of-speech-tags.
--   The `FEATS` field is treated as a single unit and is not segmented in any
-    way.
--   One can convert from [Universal Dependencies morphological
-    features](https://universaldependencies.org/u/feat/index.html) to [UniMorph
-    features](https://unimorph.github.io/schema/) using
-    [`scripts/convert_to_um.py`](scripts/convert_to_um.py).
--   UDTube does not perform dependency parsing at present, so the `HEAD`,
-    `DEPREL`, and `DEPS` fields are ignored and should be specified as `_`.
+- By default, lemmatization uses reverse-edit scripts. This is appropriate for
+  predominantly suffixal languages, which are thought to represent the majority
+  of the world's languages. If working with a predominantly prefixal language,
+  disable this with `data: reverse_edits: false`.
+- Note that many newer Universal Dependencies datasets do not have
+  language-specific part-of-speech-tags so this task should be disabled
+  (`data: use_xpos: false`).
+- The `FEATS` field is treated as a single unit and is not segmented in any way.
+- One can convert from [Universal Dependencies morphological
+  features](https://universaldependencies.org/u/feat/index.html) to [UniMorph
+  features](https://unimorph.github.io/schema/) using
+  [`scripts/convert_to_um.py`](scripts/convert_to_um.py).
 
 ## Usage
 
@@ -189,7 +187,7 @@ information](https://github.com/CUNY-CL/yoyodyne/blob/master/README.md#logging).
 
 #### Other options
 
-By default, UDTube attempts to model all four tasks; one can disable the
+By default, UDTube attempts to model all five tasks; one can disable the
 language-specific tagging task using `model: use_xpos: false`, and so on.
 
 Dropout probability is specified using `model: dropout: ...`.
@@ -198,25 +196,19 @@ The encoder has multiple layers. The input to the classifier consists of just
 the last few layers mean-pooled together. The number of layers used for
 mean-pooling is specified using `model: pooling_layers: ...`.
 
-By default, lemmatization uses reverse-edit scripts. This is appropriate for
-predominantly suffixal languages, which are thought to represent the majority of
-the world's languages. If working with a predominantly prefixal language,
-disable this with `model: reverse_edits: false`.
-
 The following YAML snippet shows the default architectural arguments.
 
     ...
     model:
       dropout: 0.5
       encoder: google-bert/bert-base-multilingual-cased
       pooling_layers: 1
-      reverse_edits: true
       use_upos: true
       use_xpos: true
       use_lemma: true
       use_feats: true
+      use_parse: true
       ...
-
 
 Batch size is specified using `data: batch_size: ...` and defaults to 32.
 
@@ -268,14 +260,14 @@ written.
 
 Here are some additional details:
 
--   In `predict` mode UDTube loads the file to be labeled incrementally (i.e.,
-    one sentence at a time) so this can be used with very large files.
--   In `predict` mode, if no path for the predictions is specified, stdout will
-    be used. If using this in conjunction with \> or \|, add
-    `--trainer.enable_progress_bar false` on the command line.
--   The target task fields are overriden if their heads are active.
--   Use [`scripts/pretokenize.py`](scripts/pretokenize.py) to convert raw text
-    files to CoNLL-U input files.
+- In `predict` mode UDTube loads the file to be labeled incrementally (i.e., one
+  sentence at a time) so this can be used with very large files.
+- In `predict` mode, if no path for the predictions is specified, stdout will be
+  used. If using this in conjunction with \> or \|, add
+  `--trainer.enable_progress_bar false` on the command line.
+- The target task fields are overriden if their heads are active.
+- Use [`scripts/pretokenize.py`](scripts/pretokenize.py) to convert raw text
+  files to CoNLL-U input files.
 
 This mode is invoked using the `predict` subcommand, like so:
 
@@ -322,3 +314,6 @@ following document, which describes the model:
 Yakubov, D. 2024. [How do we learn what we cannot
 say?](https://academicworks.cuny.edu/gc_etds/5622/) Master's thesis, CUNY
 Graduate Center.
+
+(See also [`udtube.bib`](udtube.bib) for more work used during the development
+of this library.)
diff --git a/configs/ewt_bert.yaml b/configs/ewt_bert.yaml
@@ -22,12 +22,6 @@ trainer:
 model:
   dropout: 0.4
   encoder: google-bert/bert-base-cased
-  pooling_layers: 4
-  reverse_edits: true
-  use_upos: true
-  use_xpos: true
-  use_lemma: true
-  use_feats: true
   encoder_optimizer:
     class_path: torch.optim.Adam
     init_args:

diff --git a/configs/ewt_distilbert.yaml b/configs/ewt_distilbert.yaml
@@ -22,12 +22,6 @@ trainer:
 model:
   dropout: 0.4
   encoder: distilbert/distilbert-base-cased
-  pooling_layers: 4
-  reverse_edits: true
-  use_upos: true
-  use_xpos: true
-  use_lemma: true
-  use_feats: true
   encoder_optimizer:
     class_path: torch.optim.Adam
     init_args:
@@ -52,6 +46,7 @@ data:
   test: /Users/Shinji/UD_English-EWT/en_ewt-ud-test.conllu
   predict: /Users/Shinji/UD_English-EWT/en_ewt-ud-test.conllu
   batch_size: 32
+  reverse_edits: true
 checkpoint:
   filename: "model-{epoch:03d}-{val_loss:.4f}"
   monitor: val_loss

diff --git a/configs/ewt_roberta.yaml b/configs/ewt_roberta.yaml
@@ -22,12 +22,6 @@ trainer:
 model:
   dropout: 0.4
   encoder: FacebookAI/roberta-base
-  pooling_layers: 4
-  reverse_edits: true
-  use_upos: true
-  use_xpos: true
-  use_lemma: true
-  use_feats: true
   encoder_optimizer:
     class_path: torch.optim.Adam
     init_args:

diff --git a/configs/syntagrus_mbert.yaml b/configs/syntagrus_mbert.yaml
@@ -22,12 +22,7 @@ trainer:
 model:
   dropout: 0.4
   encoder: google-bert/bert-base-multilingual-cased
-  pooling_layers: 4
-  reverse_edits: true
-  use_upos: true
   use_xpos: false
-  use_lemma: true
-  use_feats: true
   encoder_optimizer:
     class_path: torch.optim.Adam
     init_args:

diff --git a/configs/syntagrus_rubert.yaml b/configs/syntagrus_rubert.yaml
@@ -22,12 +22,7 @@ trainer:
 model:
   dropout: 0.4
   encoder: DeepPavlov/rubert
-  pooling_layers: 4
-  reverse_edits: true
-  use_upos: true
   use_xpos: false
-  use_lemma: true
-  use_feats: true
   encoder_optimizer:
     class_path: torch.optim.Adam
     init_args:

diff --git a/configs/syntagrus_xlm-roberta.yaml b/configs/syntagrus_xlm-roberta.yaml
@@ -22,12 +22,7 @@ trainer:
 model:
   dropout: 0.4
   encoder: FacebookAI/xlm-roberta-base
-  pooling_layers: 4
-  reverse_edits: true
-  use_upos: true
   use_xpos: false
-  use_lemma: true
-  use_feats: true
   encoder_optimizer:
     class_path: torch.optim.Adam
     init_args:

diff --git a/examples/wandb_sweeps/configs/ewt_grid.yaml b/examples/wandb_sweeps/configs/ewt_grid.yaml
@@ -1,4 +1,4 @@
-method: random
+method: bayes
 metric:
   name: val_loss
   goal: minimize
@@ -10,6 +10,7 @@ parameters:
     min: 0
     max: 0.5
   model.encoder:
+    distribution: categorical
     values:
       - FacebookAI/roberta-base
       - distilbert/distilbert-base-cased
@@ -18,7 +19,7 @@ parameters:
     distribution: q_uniform
     q: 1
     min: 1
-    max: 8
+    max: 4
   model.encoder_optimizer.class_path:
     value: torch.optim.Adam
   model.encoder_optimizer.init_args.lr:
@@ -31,7 +32,7 @@ parameters:
     distribution: q_uniform
     q: 1
     min: 1
-    max: 20
+    max: 40
   model.classifier_optimizer.class_path:
     value: torch.optim.Adam
   model.classifier_optimizer.init_args.lr:
@@ -49,6 +50,7 @@ parameters:
   model.classifier_scheduler.init_args.patience:
     value: 5
   data.batch_size:
+    distribution: categorical
     values:
       - 8
       - 16
diff --git a/examples/wandb_sweeps/configs/gdt_grid.yaml b/examples/wandb_sweeps/configs/gdt_grid.yaml
@@ -1,4 +1,4 @@
-method: random
+method: bayes
 metric:
   name: val_loss
   goal: minimize
@@ -10,14 +10,15 @@ parameters:
     min: 0
     max: 0.5
   model.encoder:
+    distribution: categorical
     values:
       - google-bert/bert-base-multilingual-cased
       - FacebookAI/xlm-roberta-base
   model.pooling_layers:
     distribution: q_uniform
     q: 1
     min: 1
-    max: 8
+    max: 4
   model.encoder_optimizer.class_path:
     value: torch.optim.Adam
   model.encoder_optimizer.init_args.lr:
@@ -30,7 +31,7 @@ parameters:
     distribution: q_uniform
     q: 1
     min: 1
-    max: 20
+    max: 40
   model.classifier_optimizer.class_path:
     value: torch.optim.Adam
   model.classifier_optimizer.init_args.lr:
@@ -48,6 +49,7 @@ parameters:
   model.classifier_scheduler.init_args.patience:
     value: 5
   data.batch_size:
+    distribution: categorical
     values:
       - 8
       - 16
diff --git a/examples/wandb_sweeps/configs/syntagrus_grid.yaml b/examples/wandb_sweeps/configs/syntagrus_grid.yaml
@@ -1,4 +1,4 @@
-method: random
+method: bayes
 metric:
   name: val_loss
   goal: minimize
@@ -10,6 +10,7 @@ parameters:
     min: 0
     max: 0.5
   model.encoder:
+    distribution: categorical
     values:
       - google-bert/bert-base-multilingual-cased
       - FacebookAI/xlm-roberta-base
@@ -18,7 +19,7 @@ parameters:
     distribution: q_uniform
     q: 1
     min: 1
-    max: 8
+    max: 4
   model.encoder_optimizer.class_path:
     value: torch.optim.Adam
   model.encoder_optimizer.init_args.lr:
@@ -31,7 +32,7 @@ parameters:
     distribution: q_uniform
     q: 1
     min: 1
-    max: 20
+    max: 40
   model.classifier_optimizer.class_path:
     value: torch.optim.Adam
   model.classifier_optimizer.init_args.lr:
@@ -49,6 +50,7 @@ parameters:
   model.classifier_scheduler.init_args.patience:
     value: 5
   data.batch_size:
+    distribution: categorical
     values:
       - 8
       - 16
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "udtube"
-version = "0.1.12"
+version = "0.2.0"
 description = "Neural morphological analysis"
 license = "Apache-2.0"
 readme = "README.md"