-
Notifications
You must be signed in to change notification settings - Fork 36
/
Copy pathlearn-lang-diary-part-five.lyx
9261 lines (7379 loc) · 172 KB
/
learn-lang-diary-part-five.lyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#LyX 2.3 created this file. For more info see http://www.lyx.org/
\lyxformat 544
\begin_document
\begin_header
\save_transient_properties true
\origin unavailable
\textclass article
\begin_preamble
\usepackage{url}
\usepackage{slashed}
\end_preamble
\use_default_options false
\maintain_unincluded_children false
\language english
\language_package default
\inputencoding utf8
\fontencoding global
\font_roman "times" "default"
\font_sans "helvet" "default"
\font_typewriter "cmtt" "default"
\font_math "auto" "auto"
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100 100
\font_tt_scale 100 100
\use_microtype false
\use_dash_ligatures false
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command default
\index_command default
\paperfontsize default
\spacing single
\use_hyperref true
\pdf_bookmarks true
\pdf_bookmarksnumbered false
\pdf_bookmarksopen false
\pdf_bookmarksopenlevel 1
\pdf_breaklinks true
\pdf_pdfborder true
\pdf_colorlinks true
\pdf_backref false
\pdf_pdfusetitle true
\papersize default
\use_geometry false
\use_package amsmath 2
\use_package amssymb 2
\use_package cancel 1
\use_package esint 0
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 0
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine basic
\cite_engine_type default
\biblio_style plain
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\justification true
\use_refstyle 0
\use_minted 0
\index Index
\shortcut idx
\color #008000
\end_index
\secnumdepth 3
\tocdepth 3
\paragraph_separation indent
\paragraph_indentation default
\is_math_indent 0
\math_numbering_side default
\quotes_style english
\dynamic_quotes 0
\papercolumns 1
\papersides 1
\paperpagestyle default
\listings_params "basicstyle={\ttfamily},basewidth={0.45em}"
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header
\begin_body
\begin_layout Title
Language Learning Diary - Part Five
\end_layout
\begin_layout Date
Jan 2022 - March 2022
\end_layout
\begin_layout Author
Linas Vepštas
\end_layout
\begin_layout Abstract
The language-learning effort involves research and software development
to implement the ideas concerning unsupervised learning of grammar, syntax
and semantics from corpora.
This document contains supplementary notes and a loosely-organized semi-chronol
ogical diary of results.
The notes here might not always makes sense; they are a short-hand for
my own benefit, rather than aimed at you, dear reader!
\end_layout
\begin_layout Section*
Introduction
\end_layout
\begin_layout Standard
Part Five of the diary on the language-learning effort continues work on
the English dataset.
\end_layout
\begin_layout Standard
Good progress was made.
It appears that we have a high-quality, stable, debugged clustering algorithm
in place, and it is generating good-quality clusters.
However, dozens of questions and hypothesis arise, which is a bit overwhelming.
The lack of any kind of theoretical foundations for any of this stuff is
frustrating.
Without theory, it is hard to get strong insight.
\end_layout
\begin_layout Standard
At any rate, results seem solid enough that we can now move on to the next
steps of the program.
I'm hoping to start work on multi-sentence and cross-paragraph correlations
in the coming months.
Although, before doing this, perhaps a comparison to the hand-built LG
grammars is in order.
\end_layout
\begin_layout Section*
Summary Conclusions
\end_layout
\begin_layout Standard
A summary of what is found in this part of the diary:
\end_layout
\begin_layout Itemize
From Experiment-10: Ranked-MI (Variational MI) seems to mostly make good
cluster recommendations, except when it doesn't.
Since the actual merge proceeds via a Jaccard-type voting in of disjuncts,
it is possible that the poor recommendations results in few contributions.
But there is no obvious way to automate a quality measurement.
\end_layout
\begin_layout Itemize
From Experiment-10: A list of the first 31 clusters is presented.
It seems OK but not great.
Hard to say, as the Jaccard-selection mechanism selects membership, and
we don't have a window into that.
Is there a way of visualizing the
\begin_inset Quotes eld
\end_inset
word-sense
\begin_inset Quotes erd
\end_inset
of a cluster?
\end_layout
\begin_layout Itemize
From Experiment-10: A fairly large number of merges are expanding or merging
prior classes.
This suggests that
\begin_inset Quotes eld
\end_inset
preferential attachment
\begin_inset Quotes erd
\end_inset
is hard at work, and so we should not be surprised to later observe scale-free
results.
\end_layout
\begin_layout Itemize
This implies that ranked-MI/variational-MI automatically, inherently makes
these kind of preferential-attachment suggestions.
How does this work? What are the details? Are there alternatives?
\end_layout
\begin_layout Itemize
As clustering proceeds, there are several dozen different measurements and
indicator values that can be tracked, including the dataset entropy, the
MMT-Q value, the size of clusters, the ranked-MI of the top-ranked word-pair,
and so on.
Many graphs of these indicators are presented.
Experiment-10 tracks four of these; experiments 11 and 12 tracks dozens
of them.
A significant portion of the parameter space is explored.
\end_layout
\begin_layout Itemize
Graphs of the word-disjunct marginal entropy distribution is presented.
This for the dataset, before clustering is started.
It appears to be a normal (Gaussian) distribution.
\end_layout
\begin_layout Itemize
Multiple alternatives to the variational-MI (ranked-MI) are proposed and
developed.
These are computed for the top-200 most common words in the dataset (
\emph on
i.e.
\emph default
for
\begin_inset Formula $N\left(N+1\right)/2$
\end_inset
word-pairs, for
\begin_inset Formula $N=200$
\end_inset
.) Although they make different ranking recommendations, the overall distribution
s are nearly identical.
Although roughly Gaussian, these have a very fat tail to the left (
\emph on
i.e.
\emph default
on the negative side: they find a lot of dis-similarity.) The fat tail is
hypothesized to be driven by the number of distinct word-senses (
\emph on
i.e.
\emph default
word-senses should be dis-similar.) All of these alternatives are more computati
onally intensive than the ranked-MI, and thus are not really practical to
apply.
It is hypothesized that all of these might still end up selecting the same
in-group (
\emph on
i.e.
\emph default
although the top-ranked pairs differ, the in-groups would not.)
\end_layout
\begin_layout Itemize
From Experiment-11: The top ranked-MI pair is used to form the initial seed
of a cluster, and then the regular MI is used to nominate additional members.
When the Jaccard overlap of the nominees is computed, it is surprisingly
low.
Why? Is it because MI is pair-wise, and Jaccard is group-wise?
\end_layout
\begin_layout Itemize
Despite the low overlap, the majority of sections get merged, only because
their counts are below the noise-floor.
How do low-count disjuncts influence the MI?
\end_layout
\begin_layout Itemize
Setting the noise-floor causes disjuncts with a count below this floor to
be swept up into the group.
Later, during the preferential attachment and growth of groups, is it possible
that this
\begin_inset Quotes eld
\end_inset
random noise
\begin_inset Quotes erd
\end_inset
hijacks the initial cluster, twisting it in a different direction from
where it started? Is it causing the signal to be washed out? What happens
if we set the noise-floor to zero?
\end_layout
\begin_layout Itemize
After running the algo for about 1060 steps, about 530 word-classes were
created (the fact that its about a half is unexplained, but gives a hint
of the role of preferential attachment).
When comparing the
\begin_inset Formula $N\left(N-1\right)/2$
\end_inset
class-pairs (for
\begin_inset Formula $N=530)$
\end_inset
, about 93% of them have a similarity of zero: that is, they are completely
orthogonal.
That's a good thing: we wanted to have classes that are distinct from one-anoth
er, and that is what we are getting.
Of the remaining 7%, the distribution of MI is almost perfectly a Gaussian,
centered at about MI of negative 3, and an stddev of 3.5.
Negative MI is also good: it means that even if they are not perfectly
orthogonal, they are almost-so.
\end_layout
\begin_layout Itemize
The above stands in contrast to the word-word orthogonality of the remaining
(as yet unmerged) words: more than 80% of them have a non-zero similarity.
The distribution of these is almost Gaussian, with a bit of a fat tail
towards the low-end.
Compared to the pre-merge MI distribution, there is little change, except
perhaps in the fat tail.
\end_layout
\begin_layout Standard
That pretty much wraps things up for this chapter.
Merge experiments are ongoing, and further results will be presented in
later diary chapters.
I don't expect many changes from the above; just some longer runs with
maybe clearer results.
A noise=0 run is clearly called for.
Examining orthogonality as a function of time is also interesting, to be
reported later.
\end_layout
\begin_layout Section*
TODO
\end_layout
\begin_layout Standard
A list of things to do, here or later:
\end_layout
\begin_layout Itemize
In population genetics, a neutral evolutionary model with a static population
size results in a power-law distribution of alleles in the population.
Can this process model also explain the power-law (Zipfian, square-root-Zipfian
) distributions seen elsewhere? There is a similar set of results in ecology,
with regards to species distribution in an ecological niche.
\end_layout
\begin_layout Section*
Expt-10 Merge exploration (Jan 2022)
\end_layout
\begin_layout Standard
The end of Diary Part Four describes a dataset with several thousand merged
words in it.
However, that dataset had assorted issues, and recreating it before investing
time in characterizing it seems like a good idea.
There were numerous (and ongoing) bug-fixes made.
Phew.
5 Jan 2022 Looks like the last of the bugs are now fixed.
This took two weeks of very tedious debugging.
Ouch.
Ready to restart, at last.
\end_layout
\begin_layout Standard
Now that we are ready, the questions below arise.
\end_layout
\begin_layout Subsection*
Things worth exploring.
\end_layout
\begin_layout Standard
What have we actually got? Some questions about the dataset:
\end_layout
\begin_layout Itemize
Distribution: size of word-class vs.
rank.
This was previously examined in
\begin_inset Quotes eld
\end_inset
diary part one
\begin_inset Quotes erd
\end_inset
, page 99, for a different collection of merge algorithms (and earlier,
different datasets).
Do we still get something similar?
\end_layout
\begin_layout Itemize
Above,
\begin_inset Quotes eld
\end_inset
size
\begin_inset Quotes erd
\end_inset
might mean
\begin_inset Quotes eld
\end_inset
number of words in the word-class
\begin_inset Quotes erd
\end_inset
or it might mean
\begin_inset Quotes eld
\end_inset
number of disjuncts in word-class
\begin_inset Quotes erd
\end_inset
or it might mean
\begin_inset Quotes eld
\end_inset
number of disjuncts in word-class with count weighting
\begin_inset Quotes erd
\end_inset
.
Oof.
It would be nice to have a dashboard for this, instead of lots of manual
work.
\end_layout
\begin_layout Itemize
Distribution of word-senses.
That is, how many words participate in more than one word-class? If they
do participate in more than one word-class, what is the weight in each
class?
\end_layout
\begin_layout Itemize
For words that belong to word-classes, what fraction of their weight remains
unassigned to any word-class?
\end_layout
\begin_layout Itemize
Distribution of MI of pairs of word-classes.
We might hope that this is low, so that different word-classes are different
from one another.
\end_layout
\begin_layout Itemize
Distribution of self-MI of word-classes.
One might hope that this is high, so that the word-classes do not share
much in common with other words or word-classes.
\end_layout
\begin_layout Itemize
As above, but distribution of MI of pairs consisting of a word-class, and
a word.
\end_layout
\begin_layout Itemize
Prior to starting the merge, there's an MI between words and disjuncts.
I don't recall examining that in detail, before.
Then, after the merge, how does this change?
\end_layout
\begin_layout Standard
That's a lot of questions.
Not clear which ones should be answered first.
\end_layout
\begin_layout Subsection*
Round 43
\end_layout
\begin_layout Standard
Done with bug-fixing.
\end_layout
\begin_layout Standard
I'm currently using `
\family sans
r9-sim-200.rdb
\family default
` for marginals plus the similarities for the top 200 most frequent words.
Then run
\begin_inset listings
inline false
status open
\begin_layout Plain Layout
cp -pr r9-sim-200.rdb r10-merge.rdb
\end_layout
\begin_layout Plain Layout
guile -l cogserver-gram.scm
\end_layout
\begin_layout Plain Layout
(in-group-cluster covr-obj 0.5 0.2 4 200 100)
\end_layout
\end_inset
\end_layout
\begin_layout Standard
This works great until merge 43 where we get
\end_layout
\begin_layout Standard
\begin_inset listings
inline false
status open
\begin_layout Plain Layout
------ Round 43 Next in line: ranked-MI = 5.6267 MI = 5.1464 (`could must
would should may will can might`, `to`)
\end_layout
\begin_layout Plain Layout
In-group size=5: `to` `could must would should may will can might` `you`
`I` `we`
\end_layout
\end_inset
\end_layout
\begin_layout Standard
So that looks like a bug.
Issues:
\end_layout
\begin_layout Itemize
Hard to believe that this is the top ranked-MI pair.
\end_layout
\begin_deeper
\begin_layout Itemize
Is the MI being computed correctly? It almost surely is, but still ...
??
\end_layout
\begin_layout Itemize
Should the ranked-MI for a cluster be de-rated, say, by the number of words
in the cluster?
\end_layout
\begin_layout Itemize
The next 10 highest ranked-MI merges look great! So maybe somehow the ranked
MI for clusters is wrong?
\end_layout
\begin_layout Itemize
Since merging is via a Jaccard-overlap selection mechanism, perhaps most
of the contributing disjuncts in the bad recommendations will be ignored?
\end_layout
\end_deeper
\begin_layout Standard
\begin_inset Separator plain
\end_inset
\end_layout
\begin_layout Subsection*
Round 88 - Majority voting bug?
\end_layout
\begin_layout Standard
Subsequent merges look ..
pretty good, except when they don't, and then they look ugly.
\end_layout
\begin_layout Standard
At round 88 it goes nuts: it merges a few disjuncts of
\begin_inset Quotes eld
\end_inset
As as
\begin_inset Quotes erd
\end_inset
together.
Then it merges zero of them into
\begin_inset Quotes eld
\end_inset
As as.i
\begin_inset Quotes erd
\end_inset
, and then hits an inf loop, because what is left after the merge still
has a high MI.
\end_layout
\begin_layout Standard
Conclude: the majority-voting scheme left behind (left unmerged) too much;
enough that the MI between what is left is still high.
Can we redefine the voting procedure to lessen this?
\end_layout
\begin_layout Subsection*
Bug Fixes
\end_layout
\begin_layout Standard
There were multiple bugs, difficult to locate.
Some cross-section merges were being done wrong.
A bug in cog-delete! was erasing data in DB, thus restarts loaded missing/corru
pt data.
As of 15 Jan all seems to be fixed.
\end_layout
\begin_layout Subsection*
Round 31
\end_layout
\begin_layout Standard
After fixes, the first 31 rounds merge the following words.
The is with quorum=0.5 commonality=0.2 noise=4.
\begin_inset VSpace defskip
\end_inset
\end_layout
\begin_layout Standard
\begin_inset Tabular
<lyxtabular version="3" rows="34" columns="3">
<features islongtable="true" headTopDL="true" headBottomDL="true" longtabularalignment="center">
<column alignment="center" valignment="top">
<column alignment="center" valignment="top" width="0pt">
<column alignment="center" valignment="top" width="0pt">
<row>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
N
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
class
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
comments
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
\family roman
\series medium
\shape up
\size normal
\emph off
\bar no
\strikeout off
\xout off
\uuline off
\uwave off
\noun off
\color none
1
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
\family roman
\series medium
\shape up
\size normal
\emph off
\bar no
\strikeout off
\xout off
\uuline off
\uwave off
\noun off
\color none
+ — “ ” _
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
OK
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
\family roman
\series medium
\shape up
\size normal
\emph off
\bar no
\strikeout off
\xout off
\uuline off
\uwave off
\noun off
\color none
2
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
\family roman
\series medium
\shape up
\size normal
\emph off
\bar no
\strikeout off
\xout off
\uuline off
\uwave off
\noun off
\color none
, ;
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
OK
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
3
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
was is
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
OK
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
4
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
but and that as
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
OK
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
5
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
? .
!
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
OK
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
6
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
He It I There
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
OK, sentence starters
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
7
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
" ” , ? .
! what
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Expand group N=5
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
8
\end_layout
\end_inset
</cell>
<cell multirow="3" alignment="center" valignment="middle" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
He It I There She This
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Expansion of group N=6
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
9
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
A No He It I There She This The
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Expansion of group above!
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
10
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
of in to from
\end_layout