@@ -640,12 +640,12 @@ public sealed class Vocabulary
640
640
/// <summary>
641
641
/// Contains unique tokens that are supposed to end the generation (e.g.: EOS, EOT, etc)
642
642
/// </summary>
643
- public readonly HashSet < LLamaToken > EOGTokens ;
643
+ public readonly IReadOnlyList < LLamaToken > EOGTokens ;
644
644
645
645
/// <summary>
646
646
/// Contains unique tokens that exist for inference control rather than text output
647
647
/// </summary>
648
- public readonly HashSet < LLamaToken > ControlTokens ;
648
+ public readonly IReadOnlyList < LLamaToken > ControlTokens ;
649
649
650
650
internal Vocabulary ( SafeLlamaModelHandle model )
651
651
{
@@ -674,8 +674,8 @@ internal Vocabulary(SafeLlamaModelHandle model)
674
674
ShouldAddBOS = LLamaVocabNative . llama_vocab_get_add_bos ( vocabNative ) ;
675
675
ShouldAddEOS = LLamaVocabNative . llama_vocab_get_add_eos ( vocabNative ) ;
676
676
677
- EOGTokens = new HashSet < LLamaToken > ( TokenToString . Keys . Where ( token => LLamaVocabNative . llama_vocab_is_eog ( vocabNative , token ) ) ) ;
678
- ControlTokens = new HashSet < LLamaToken > ( TokenToString . Keys . Where ( token => LLamaVocabNative . llama_vocab_is_control ( vocabNative , token ) ) ) ;
677
+ EOGTokens = TokenToString . Keys . Where ( token => LLamaVocabNative . llama_vocab_is_eog ( vocabNative , token ) ) . ToList ( ) ;
678
+ ControlTokens = TokenToString . Keys . Where ( token => LLamaVocabNative . llama_vocab_is_control ( vocabNative , token ) ) . ToList ( ) ;
679
679
}
680
680
}
681
681
@@ -701,88 +701,88 @@ private Dictionary<LLamaToken, string> GetVocabCache()
701
701
/// <summary>
702
702
/// Total number of tokens in this vocabulary
703
703
/// </summary>
704
- public int Count { get ; init ; }
704
+ public int Count { get ; }
705
705
706
706
/// <summary>
707
707
/// Get the the type of this vocabulary
708
708
/// </summary>
709
- public LLamaVocabType Type { get ; init ; }
709
+ public LLamaVocabType Type { get ; }
710
710
711
711
/// <summary>
712
712
/// Get the Beginning of Sentence token for this model
713
713
/// </summary>
714
- public LLamaToken ? BOS { get ; init ; }
714
+ public LLamaToken ? BOS { get ; }
715
715
716
716
/// <summary>
717
717
/// Get the End of Sentence token for this model
718
718
/// </summary>
719
- public LLamaToken ? EOS { get ; init ; }
719
+ public LLamaToken ? EOS { get ; }
720
720
721
721
/// <summary>
722
722
/// Get the newline token for this model
723
723
/// </summary>
724
- public LLamaToken ? Newline { get ; init ; }
724
+ public LLamaToken ? Newline { get ; }
725
725
726
726
/// <summary>
727
727
/// Get the padding token for this model
728
728
/// </summary>
729
- public LLamaToken ? Pad { get ; init ; }
729
+ public LLamaToken ? Pad { get ; }
730
730
731
731
/// <summary>
732
732
/// Get the sentence separator token for this model
733
733
/// </summary>
734
- public LLamaToken ? SEP { get ; init ; }
734
+ public LLamaToken ? SEP { get ; }
735
735
736
736
/// <summary>
737
737
/// Codellama beginning of infill prefix
738
738
/// </summary>
739
- public LLamaToken ? InfillPrefix { get ; init ; }
739
+ public LLamaToken ? InfillPrefix { get ; }
740
740
741
741
/// <summary>
742
742
/// Codellama beginning of infill middle
743
743
/// </summary>
744
- public LLamaToken ? InfillMiddle { get ; init ; }
744
+ public LLamaToken ? InfillMiddle { get ; }
745
745
746
746
/// <summary>
747
747
/// Codellama beginning of infill suffix
748
748
/// </summary>
749
- public LLamaToken ? InfillSuffix { get ; init ; }
749
+ public LLamaToken ? InfillSuffix { get ; }
750
750
751
751
/// <summary>
752
752
/// Codellama pad
753
753
/// </summary>
754
- public LLamaToken ? InfillPad { get ; init ; }
754
+ public LLamaToken ? InfillPad { get ; }
755
755
756
756
/// <summary>
757
757
/// Codellama rep
758
758
/// </summary>
759
- public LLamaToken ? InfillRep { get ; init ; }
759
+ public LLamaToken ? InfillRep { get ; }
760
760
761
761
/// <summary>
762
762
/// Codellama rep
763
763
/// </summary>
764
- public LLamaToken ? InfillSep { get ; init ; }
764
+ public LLamaToken ? InfillSep { get ; }
765
765
766
766
/// <summary>
767
767
/// end-of-turn token
768
768
/// </summary>
769
- public LLamaToken ? EOT { get ; init ; }
769
+ public LLamaToken ? EOT { get ; }
770
770
771
771
/// <summary>
772
772
/// For encoder-decoder models, this function returns id of the token that must be provided
773
773
/// to the decoder to start generating output sequence.
774
774
/// </summary>
775
- public LLamaToken ? DecoderStartToken { get ; init ; }
775
+ public LLamaToken ? DecoderStartToken { get ; }
776
776
777
777
/// <summary>
778
778
/// Check if the current model requires a BOS token added
779
779
/// </summary>
780
- public bool ShouldAddBOS { get ; init ; }
780
+ public bool ShouldAddBOS { get ; }
781
781
782
782
/// <summary>
783
783
/// Check if the current model requires a EOS token added
784
784
/// </summary>
785
- public bool ShouldAddEOS { get ; init ; }
785
+ public bool ShouldAddEOS { get ; }
786
786
}
787
787
}
788
788
}
0 commit comments