Skip to content

Commit c982047

Browse files
committed
Addressed change requests
1 parent 1df9568 commit c982047

File tree

2 files changed

+22
-21
lines changed

2 files changed

+22
-21
lines changed

LLama/Native/LLamaToken.cs

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System.Diagnostics;
2+
using System.Linq;
23

34
namespace LLama.Native;
45

LLama/Native/SafeLlamaModelHandle.cs

+21-21
Original file line numberDiff line numberDiff line change
@@ -640,12 +640,12 @@ public sealed class Vocabulary
640640
/// <summary>
641641
/// Contains unique tokens that are supposed to end the generation (e.g.: EOS, EOT, etc)
642642
/// </summary>
643-
public readonly HashSet<LLamaToken> EOGTokens;
643+
public readonly IReadOnlyList<LLamaToken> EOGTokens;
644644

645645
/// <summary>
646646
/// Contains unique tokens that exist for inference control rather than text output
647647
/// </summary>
648-
public readonly HashSet<LLamaToken> ControlTokens;
648+
public readonly IReadOnlyList<LLamaToken> ControlTokens;
649649

650650
internal Vocabulary(SafeLlamaModelHandle model)
651651
{
@@ -674,8 +674,8 @@ internal Vocabulary(SafeLlamaModelHandle model)
674674
ShouldAddBOS = LLamaVocabNative.llama_vocab_get_add_bos(vocabNative);
675675
ShouldAddEOS = LLamaVocabNative.llama_vocab_get_add_eos(vocabNative);
676676

677-
EOGTokens = new HashSet<LLamaToken>(TokenToString.Keys.Where(token => LLamaVocabNative.llama_vocab_is_eog(vocabNative, token)));
678-
ControlTokens = new HashSet<LLamaToken>(TokenToString.Keys.Where(token => LLamaVocabNative.llama_vocab_is_control(vocabNative, token)));
677+
EOGTokens = TokenToString.Keys.Where(token => LLamaVocabNative.llama_vocab_is_eog(vocabNative, token)).ToList();
678+
ControlTokens = TokenToString.Keys.Where(token => LLamaVocabNative.llama_vocab_is_control(vocabNative, token)).ToList();
679679
}
680680
}
681681

@@ -701,88 +701,88 @@ private Dictionary<LLamaToken, string> GetVocabCache()
701701
/// <summary>
702702
/// Total number of tokens in this vocabulary
703703
/// </summary>
704-
public int Count { get; init; }
704+
public int Count { get; }
705705

706706
/// <summary>
707707
/// Get the the type of this vocabulary
708708
/// </summary>
709-
public LLamaVocabType Type { get; init; }
709+
public LLamaVocabType Type { get; }
710710

711711
/// <summary>
712712
/// Get the Beginning of Sentence token for this model
713713
/// </summary>
714-
public LLamaToken? BOS { get; init; }
714+
public LLamaToken? BOS { get; }
715715

716716
/// <summary>
717717
/// Get the End of Sentence token for this model
718718
/// </summary>
719-
public LLamaToken? EOS { get; init; }
719+
public LLamaToken? EOS { get; }
720720

721721
/// <summary>
722722
/// Get the newline token for this model
723723
/// </summary>
724-
public LLamaToken? Newline { get; init; }
724+
public LLamaToken? Newline { get; }
725725

726726
/// <summary>
727727
/// Get the padding token for this model
728728
/// </summary>
729-
public LLamaToken? Pad { get; init; }
729+
public LLamaToken? Pad { get; }
730730

731731
/// <summary>
732732
/// Get the sentence separator token for this model
733733
/// </summary>
734-
public LLamaToken? SEP { get; init; }
734+
public LLamaToken? SEP { get; }
735735

736736
/// <summary>
737737
/// Codellama beginning of infill prefix
738738
/// </summary>
739-
public LLamaToken? InfillPrefix { get; init; }
739+
public LLamaToken? InfillPrefix { get; }
740740

741741
/// <summary>
742742
/// Codellama beginning of infill middle
743743
/// </summary>
744-
public LLamaToken? InfillMiddle { get; init; }
744+
public LLamaToken? InfillMiddle { get; }
745745

746746
/// <summary>
747747
/// Codellama beginning of infill suffix
748748
/// </summary>
749-
public LLamaToken? InfillSuffix { get; init; }
749+
public LLamaToken? InfillSuffix { get; }
750750

751751
/// <summary>
752752
/// Codellama pad
753753
/// </summary>
754-
public LLamaToken? InfillPad { get; init; }
754+
public LLamaToken? InfillPad { get; }
755755

756756
/// <summary>
757757
/// Codellama rep
758758
/// </summary>
759-
public LLamaToken? InfillRep { get; init; }
759+
public LLamaToken? InfillRep { get; }
760760

761761
/// <summary>
762762
/// Codellama rep
763763
/// </summary>
764-
public LLamaToken? InfillSep { get; init; }
764+
public LLamaToken? InfillSep { get; }
765765

766766
/// <summary>
767767
/// end-of-turn token
768768
/// </summary>
769-
public LLamaToken? EOT { get; init; }
769+
public LLamaToken? EOT { get; }
770770

771771
/// <summary>
772772
/// For encoder-decoder models, this function returns id of the token that must be provided
773773
/// to the decoder to start generating output sequence.
774774
/// </summary>
775-
public LLamaToken? DecoderStartToken { get; init; }
775+
public LLamaToken? DecoderStartToken { get; }
776776

777777
/// <summary>
778778
/// Check if the current model requires a BOS token added
779779
/// </summary>
780-
public bool ShouldAddBOS { get; init; }
780+
public bool ShouldAddBOS { get; }
781781

782782
/// <summary>
783783
/// Check if the current model requires a EOS token added
784784
/// </summary>
785-
public bool ShouldAddEOS { get; init; }
785+
public bool ShouldAddEOS { get; }
786786
}
787787
}
788788
}

0 commit comments

Comments
 (0)