diff --git a/docs/results/BAAI/bge-m3/summary.json b/docs/results/BAAI/bge-m3/summary.json
new file mode 100644
index 0000000..72a5ee8
--- /dev/null
+++ b/docs/results/BAAI/bge-m3/summary.json
@@ -0,0 +1,96 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.718621425743256
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5664555524508175
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9441075327867781
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7868184551588373
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8970320222457714
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8424907003170607
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4316630478439933
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9327323748768209
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8955144849023412
+ },
+ "jqara": {
+ "ndcg@10": 0.5391637817603238
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8596271423829606
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9778261029468881
+ }
+ },
+ "Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.851348898788452
+ },
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.6906829361885021
+ },
+ "jaqket": {
+ "ndcg@10": 0.5659460589444328
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.32175483024897333
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.734809783755516
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.5126063501865914
+ },
+ "mrtydi": {
+ "ndcg@10": 0.45179452203971654
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.9521915103722084
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9752948774973371
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9602075886902439
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.9197525363243463
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7926524802982091
+ },
+ "jsts": {
+ "spearman": 0.8020865982595183
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5475619174246511
+ },
+ "mewsc16": {
+ "v_measure_score": 0.4200457612686986
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.3991288954568376
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json b/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json
index 1b99a44..beacb01 100644
--- a/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json
+++ b/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7809527709426081
+ "macro_f1": 0.7769528027441275
},
"amazon_review_classification": {
- "macro_f1": 0.5155899232320224
+ "macro_f1": 0.5146406875677701
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8844781754440035
},
"massive_intent_classification": {
- "macro_f1": 0.7879373479249787
+ "macro_f1": 0.7872353730798753
},
"massive_scenario_classification": {
- "macro_f1": 0.8662625888023707
+ "macro_f1": 0.8639715373498098
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8350488266987821
+ },
+ "wrime_classification": {
+ "macro_f1": 0.3815230965003785
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9095168116460639
+ "ndcg@10": 0.909518320556229
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.5981293078380808
+ },
+ "jqara": {
+ "ndcg@10": 0.3719557553111225
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.6789908587925922
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8281088898171538
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.4085978545476503
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.42314124780036416
+ "ndcg@10": 0.43879890119990833
},
"jaqket": {
- "ndcg@10": 0.36199154051747723
+ "ndcg@10": 0.3555985699236658
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.1997740482697841
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.16521386136598404
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.12060735418211223
},
"mrtydi": {
- "ndcg@10": 0.07810683176415421
+ "ndcg@10": 0.07107405961190999
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.5430415601583998
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.6077212544951452
+ "ndcg@10": 0.5585881454407594
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.6433890489201118
+ "ndcg@10": 0.629620778788499
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.39317174536190913
+ "ndcg@10": 0.3517328767423871
}
},
"STS": {
"jsick": {
- "spearman": 0.754165277432144
+ "spearman": 0.7775668305928584
},
"jsts": {
- "spearman": 0.7558202366183716
+ "spearman": 0.7563460117163054
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.4966545453348478
+ "v_measure_score": 0.4601335671191492
},
"mewsc16": {
- "v_measure_score": 0.3877356318022785
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6237623762376237
+ "v_measure_score": 0.3922006290468797
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.3456006554316726
}
}
}
\ No newline at end of file
diff --git a/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json b/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json
index ea227c2..6a83eb2 100644
--- a/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json
+++ b/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.776174162517931
+ "macro_f1": 0.7779156199278396
},
"amazon_review_classification": {
- "macro_f1": 0.5085781180553806
+ "macro_f1": 0.5111451768867725
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8782111274457993
},
"massive_intent_classification": {
- "macro_f1": 0.7718541530739129
+ "macro_f1": 0.7796973463634825
},
"massive_scenario_classification": {
- "macro_f1": 0.8592571786794985
+ "macro_f1": 0.8634142669499835
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8506408877596591
+ },
+ "wrime_classification": {
+ "macro_f1": 0.3656175961601361
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9100551950168166
+ "ndcg@10": 0.9092446252246911
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.605113846464576
+ },
+ "jqara": {
+ "ndcg@10": 0.36840730960684165
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.693114284522583
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8530771666734125
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.42431895793525753
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.42368135774043536
+ "ndcg@10": 0.43601956332213093
},
"jaqket": {
- "ndcg@10": 0.37721850397542034
+ "ndcg@10": 0.37354035206874886
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.2518443007449429
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.14756204576714857
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.16862391555076126
},
"mrtydi": {
- "ndcg@10": 0.07878085186566607
+ "ndcg@10": 0.07770347901718931
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.5689006657309228
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.636999375405723
+ "ndcg@10": 0.5911474254499767
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.6413498649875696
+ "ndcg@10": 0.618101892252404
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.397250919496823
+ "ndcg@10": 0.3287673013916751
}
},
"STS": {
"jsick": {
- "spearman": 0.7756925231422259
+ "spearman": 0.7893346270810556
},
"jsts": {
- "spearman": 0.7652968548841591
+ "spearman": 0.7657111966582518
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5262387436934941
+ "v_measure_score": 0.48558605187442483
},
"mewsc16": {
- "v_measure_score": 0.37277574537292835
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.623321554770318
+ "v_measure_score": 0.4319848997472401
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.3860004176729398
}
}
}
\ No newline at end of file
diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json
index dbed068..ebc1037 100644
--- a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json
+++ b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7619809437515043
+ "macro_f1": 0.7430232193667698
},
"amazon_review_classification": {
- "macro_f1": 0.5205592432502059
+ "macro_f1": 0.5196833867285527
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8969457721352727
},
"massive_intent_classification": {
- "macro_f1": 0.7789367871593064
+ "macro_f1": 0.7782504182162112
},
"massive_scenario_classification": {
- "macro_f1": 0.8490320705866646
+ "macro_f1": 0.8459551634050977
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8382321236746973
+ },
+ "wrime_classification": {
+ "macro_f1": 0.3814631725334783
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9065584234991577
+ "ndcg@10": 0.906706098295787
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.581551030502223
+ },
+ "jqara": {
+ "ndcg@10": 0.3666097794082717
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.6908907697836885
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8615323536010276
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.39917758524262303
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.4411487123884245
+ "ndcg@10": 0.4460371569059824
},
"jaqket": {
- "ndcg@10": 0.39613283459361814
+ "ndcg@10": 0.3845053301501902
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.2239147895010841
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.13942471586306499
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.139069576010256
},
"mrtydi": {
- "ndcg@10": 0.08154879873415645
+ "ndcg@10": 0.07299085059942924
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.5835049460335981
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.6276035246534508
+ "ndcg@10": 0.5863133806218087
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.5838785018803183
+ "ndcg@10": 0.5743459511193183
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.3489329387182086
+ "ndcg@10": 0.32465205260710006
}
},
"STS": {
"jsick": {
- "spearman": 0.7463567093877269
+ "spearman": 0.7525289500265361
},
"jsts": {
- "spearman": 0.7468283806971927
+ "spearman": 0.7466329702466956
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.41041888940251137
+ "v_measure_score": 0.45840176801621957
},
"mewsc16": {
- "v_measure_score": 0.45175891401665724
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6236711552090717
+ "v_measure_score": 0.4407932537977668
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.38669286929581886
}
}
}
\ No newline at end of file
diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json
index 9528312..46f5e26 100644
--- a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json
+++ b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7619809437515043
+ "macro_f1": 0.7640029182013914
},
"amazon_review_classification": {
- "macro_f1": 0.5152108946679324
+ "macro_f1": 0.5165133824101508
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8785996540635361
},
"massive_intent_classification": {
- "macro_f1": 0.7895128475562229
+ "macro_f1": 0.7815141648175687
},
"massive_scenario_classification": {
- "macro_f1": 0.865430249169577
+ "macro_f1": 0.8643739735863134
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8179797886754027
+ },
+ "wrime_classification": {
+ "macro_f1": 0.37929751450328747
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9115815294581953
+ "ndcg@10": 0.9116742957456255
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.6540921936468603
+ },
+ "jqara": {
+ "ndcg@10": 0.3839109493881204
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7018821974047713
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8442037101394532
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.4895140949755706
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.47387768939865055
+ "ndcg@10": 0.48413330907538854
},
"jaqket": {
- "ndcg@10": 0.3956683977353904
+ "ndcg@10": 0.3872950509227257
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.25723625707011927
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.2159968215066114
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.18105368261359917
},
"mrtydi": {
- "ndcg@10": 0.1144234568266308
+ "ndcg@10": 0.11016096912346693
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.5890880676571459
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.6416096544574569
+ "ndcg@10": 0.6005134171957127
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.7023477497744102
+ "ndcg@10": 0.691482229451667
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.4536720868647063
+ "ndcg@10": 0.377200379602747
}
},
"STS": {
"jsick": {
- "spearman": 0.781770693640686
+ "spearman": 0.7914302448138066
},
"jsts": {
- "spearman": 0.7680617109850311
+ "spearman": 0.7677275529386515
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5301620892693397
+ "v_measure_score": 0.4879255424919774
},
"mewsc16": {
- "v_measure_score": 0.4034776723308173
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6238078417520311
+ "v_measure_score": 0.42611073323310256
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.2641681900458691
}
}
}
\ No newline at end of file
diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json
index b36686c..dad1d0c 100644
--- a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json
+++ b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7725250131648236
+ "macro_f1": 0.7767065011282246
},
"amazon_review_classification": {
- "macro_f1": 0.5341627023771393
+ "macro_f1": 0.5348080733659045
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8928165629175933
},
"massive_intent_classification": {
- "macro_f1": 0.7682863192709365
+ "macro_f1": 0.7678594675802368
},
"massive_scenario_classification": {
- "macro_f1": 0.8639396658321546
+ "macro_f1": 0.8624414954250645
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8376983111767246
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4088843388537483
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9094717381883379
+ "ndcg@10": 0.9093431066849924
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.6144762455614383
+ },
+ "jqara": {
+ "ndcg@10": 0.42466871751866847
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7065312090166875
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8742363417086798
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.4627911424268102
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.47038430326303626
+ "ndcg@10": 0.4824617060944974
},
"jaqket": {
- "ndcg@10": 0.44101304795602897
+ "ndcg@10": 0.4416882664197474
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.28888654887615833
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.1951539369285861
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.18656064853165188
},
"mrtydi": {
- "ndcg@10": 0.11429128335865787
+ "ndcg@10": 0.11438786651077741
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.439694854198857
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.43434267808785576
+ "ndcg@10": 0.40326645532241284
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.6240651697600803
+ "ndcg@10": 0.6048895627840009
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.3651687833824759
+ "ndcg@10": 0.36508949429446635
}
},
"STS": {
"jsick": {
- "spearman": 0.787528927058734
+ "spearman": 0.7876474308902304
},
"jsts": {
- "spearman": 0.7781413957931619
+ "spearman": 0.7782114794698556
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.48448646364489634
+ "v_measure_score": 0.5129910499369752
},
"mewsc16": {
- "v_measure_score": 0.43168522818790694
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6235418875927891
+ "v_measure_score": 0.46267377071476495
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.3603960521680572
}
}
}
\ No newline at end of file
diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json
index f620d50..3101473 100644
--- a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json
+++ b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7635642561809131
+ "macro_f1": 0.7655145272700131
},
"amazon_review_classification": {
- "macro_f1": 0.5275222511867922
+ "macro_f1": 0.5273281594091623
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8821782850442395
},
"massive_intent_classification": {
- "macro_f1": 0.7688060073049678
+ "macro_f1": 0.772169445045981
},
"massive_scenario_classification": {
- "macro_f1": 0.8651446837233107
+ "macro_f1": 0.8625146467158739
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8145447793317748
+ },
+ "wrime_classification": {
+ "macro_f1": 0.40382215327142257
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9129851570116734
+ "ndcg@10": 0.9130235242422614
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.6513884390883999
+ },
+ "jqara": {
+ "ndcg@10": 0.44959095699445484
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7121442551193732
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8679395106334268
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.5316167737103407
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.5014367709991477
+ "ndcg@10": 0.5120263378587457
},
"jaqket": {
- "ndcg@10": 0.4583812630740073
+ "ndcg@10": 0.45810454318653493
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.30420713299186014
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.260782337674165
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.23652695166828322
},
"mrtydi": {
- "ndcg@10": 0.13003320802922363
+ "ndcg@10": 0.1306190778426387
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.5464834936384055
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.5508587506679636
+ "ndcg@10": 0.5213267121181618
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.7497069192695408
+ "ndcg@10": 0.7412764112062588
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.4524300499843447
+ "ndcg@10": 0.4220927003134505
}
},
"STS": {
"jsick": {
- "spearman": 0.7984403024596518
+ "spearman": 0.7985649981589037
},
"jsts": {
- "spearman": 0.7813685476201204
+ "spearman": 0.7813825399856615
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5319881995988209
+ "v_measure_score": 0.5159318544938132
},
"mewsc16": {
- "v_measure_score": 0.4330807170988368
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6226614895870103
+ "v_measure_score": 0.4267958807672512
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.3178045302473092
}
}
}
\ No newline at end of file
diff --git a/docs/results/OpenAI/text-embedding-3-large/summary.json b/docs/results/OpenAI/text-embedding-3-large/summary.json
index 0029b0d..cf5b699 100644
--- a/docs/results/OpenAI/text-embedding-3-large/summary.json
+++ b/docs/results/OpenAI/text-embedding-3-large/summary.json
@@ -6,28 +6,64 @@
"amazon_review_classification": {
"macro_f1": 0.6043632319384946
},
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9689111460113327
+ },
"massive_intent_classification": {
"macro_f1": 0.8090871295952566
},
"massive_scenario_classification": {
"macro_f1": 0.9108443051510002
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8785070638424861
+ },
+ "wrime_classification": {
+ "macro_f1": 0.45837220696591946
}
},
"Reranking": {
"esci": {
"ndcg@10": 0.9358042266852659
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8678014849879991
+ },
+ "jqara": {
+ "ndcg@10": 0.5688951496540466
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8379796888542357
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9423911330344104
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8290267731484572
+ },
"jagovfaqs_22k": {
"ndcg@10": 0.7240937077183436
},
"jaqket": {
"ndcg@10": 0.48208863565793814
},
+ "mintaka_retrieval": {
+ "ndcg@10": 0.6351669096573943
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.6056623188124566
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.4526315025094686
+ },
"mrtydi": {
"ndcg@10": 0.3488438390945784
},
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.923732838888777
+ },
"nlp_journal_abs_intro": {
"ndcg@10": 0.9932811349540317
},
@@ -36,7 +72,7 @@
},
"nlp_journal_title_intro": {
"ndcg@10": 0.9547126796600445
- }
+ }
},
"STS": {
"jsick": {
@@ -52,11 +88,9 @@
},
"mewsc16": {
"v_measure_score": 0.4955424351458981
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6234502302515055
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.4882897499806697
}
}
}
\ No newline at end of file
diff --git a/docs/results/OpenAI/text-embedding-3-small/summary.json b/docs/results/OpenAI/text-embedding-3-small/summary.json
index 3391826..ccfdccb 100644
--- a/docs/results/OpenAI/text-embedding-3-small/summary.json
+++ b/docs/results/OpenAI/text-embedding-3-small/summary.json
@@ -6,28 +6,64 @@
"amazon_review_classification": {
"macro_f1": 0.5592259673654241
},
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8997314741995592
+ },
"massive_intent_classification": {
"macro_f1": 0.7766119663088307
},
"massive_scenario_classification": {
"macro_f1": 0.8866536867311439
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8472270726472407
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4005292604550654
}
},
"Reranking": {
"esci": {
"ndcg@10": 0.9291728102678644
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8472076343603366
+ },
+ "jqara": {
+ "ndcg@10": 0.3858424853310068
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7761045097931168
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9261211375496474
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.7958409152797974
+ },
"jagovfaqs_22k": {
"ndcg@10": 0.640150048193537
},
"jaqket": {
"ndcg@10": 0.3394304922804131
},
+ "mintaka_retrieval": {
+ "ndcg@10": 0.3243993062339528
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.4844750116221409
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.35067885909631535
+ },
"mrtydi": {
"ndcg@10": 0.2002984123046011
},
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.8583248954344459
+ },
"nlp_journal_abs_intro": {
"ndcg@10": 0.9846617848570168
},
@@ -52,11 +88,9 @@
},
"mewsc16": {
"v_measure_score": 0.4755374215259236
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6227417640807651
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.44591888262353296
}
}
}
\ No newline at end of file
diff --git a/docs/results/OpenAI/text-embedding-ada-002/summary.json b/docs/results/OpenAI/text-embedding-ada-002/summary.json
index 3523d7d..851c798 100644
--- a/docs/results/OpenAI/text-embedding-ada-002/summary.json
+++ b/docs/results/OpenAI/text-embedding-ada-002/summary.json
@@ -6,28 +6,64 @@
"amazon_review_classification": {
"macro_f1": 0.5312953134953877
},
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8876337189807528
+ },
"massive_intent_classification": {
"macro_f1": 0.7457150118928685
},
"massive_scenario_classification": {
"macro_f1": 0.8689044829586676
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8039306302437722
+ },
+ "wrime_classification": {
+ "macro_f1": 0.3757375090991345
}
},
"Reranking": {
"esci": {
"ndcg@10": 0.9303611831749345
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8391440408595291
+ },
+ "jqara": {
+ "ndcg@10": 0.37540986441296365
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7282642345185789
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9082852722613336
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.7807563383397835
+ },
"jagovfaqs_22k": {
"ndcg@10": 0.6102270226904314
},
"jaqket": {
"ndcg@10": 0.4256467956806472
},
+ "mintaka_retrieval": {
+ "ndcg@10": 0.27093020670377677
+ },
"mrtydi": {
"ndcg@10": 0.1450739420851161
},
+ "miracl_retrieval": {
+ "ndcg@10": 0.3453600176817199
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.3189777971587629
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.9750618854208265
+ },
"nlp_journal_abs_intro": {
"ndcg@10": 0.9499224324391132
},
@@ -52,11 +88,9 @@
},
"mewsc16": {
"v_measure_score": 0.4691938182964486
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6239830208701805
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.49744578060685957
}
}
}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/ruri-base-v2/summary.json b/docs/results/cl-nagoya/ruri-base-v2/summary.json
new file mode 100644
index 0000000..c090ce8
--- /dev/null
+++ b/docs/results/cl-nagoya/ruri-base-v2/summary.json
@@ -0,0 +1,96 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7597182825660609
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5554544939941979
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9235657959062215
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.8092593406289539
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8886710878440421
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8926416828413609
+ },
+ "wrime_classification": {
+ "macro_f1": 0.461674192977988
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9317155624145913
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8576025511447865
+ },
+ "jqara": {
+ "ndcg@10": 0.6066458919871698
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.842561072326263
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8846847676615118
+ }
+ },
+ "Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8101096413526069
+ },
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.7590325308586044
+ },
+ "jaqket": {
+ "ndcg@10": 0.5700921243106366
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.4417665675636218
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.6821942595823656
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.3773323411085737
+ },
+ "mrtydi": {
+ "ndcg@10": 0.4088554217076187
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.8805294567802572
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.8973083823806287
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9696059096853805
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.789314612552914
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8262585834114126
+ },
+ "jsts": {
+ "spearman": 0.8343314248100878
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5437561090974637
+ },
+ "mewsc16": {
+ "v_measure_score": 0.5060934807171409
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.3553392136864812
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/ruri-base/summary.json b/docs/results/cl-nagoya/ruri-base/summary.json
index a7c7b05..591ccd2 100644
--- a/docs/results/cl-nagoya/ruri-base/summary.json
+++ b/docs/results/cl-nagoya/ruri-base/summary.json
@@ -4,59 +4,93 @@
"macro_f1": 0.7665550732749669
},
"amazon_review_classification": {
- "macro_f1": 0.5575876111411316
+ "macro_f1": 0.5602315794414631
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.916854859845768
},
"massive_intent_classification": {
- "macro_f1": 0.8141210121425055
+ "macro_f1": 0.8122217429688374
},
"massive_scenario_classification": {
- "macro_f1": 0.8848812917656395
+ "macro_f1": 0.8861454528496383
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8773434580133629
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4546702469392619
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9290942178703699
+ "ndcg@10": 0.9291919623555276
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8723926273423869
+ },
+ "jqara": {
+ "ndcg@10": 0.5415330056104515
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7921821114257664
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8801076117078023
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8247892121220626
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.7455660589538348
+ "ndcg@10": 0.7550451217031677
},
"jaqket": {
- "ndcg@10": 0.5012253145754781
+ "ndcg@10": 0.5023277717264268
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.45371270319906437
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.5488453168704391
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.35421737773497164
},
"mrtydi": {
- "ndcg@10": 0.3545113073009125
+ "ndcg@10": 0.3558845666232437
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.8664858820958761
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.8689204088388403
+ "ndcg@10": 0.8723253192804757
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.9656989703684407
+ "ndcg@10": 0.952690372948545
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.7531306059721564
+ "ndcg@10": 0.7624967518065642
}
},
"STS": {
"jsick": {
- "spearman": 0.8231772134744029
+ "spearman": 0.8232158602892652
},
"jsts": {
- "spearman": 0.8342848039994751
+ "spearman": 0.8343499347567392
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5427223607801758
+ "v_measure_score": 0.5669485444435229
},
"mewsc16": {
- "v_measure_score": 0.5404099864321413
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6237623762376238
+ "v_measure_score": 0.5205022529269108
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.3854934527391879
}
}
}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/ruri-large-v2/summary.json b/docs/results/cl-nagoya/ruri-large-v2/summary.json
new file mode 100644
index 0000000..e4a22b7
--- /dev/null
+++ b/docs/results/cl-nagoya/ruri-large-v2/summary.json
@@ -0,0 +1,96 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7950890220234579
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5708906806011181
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.935661827685557
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.8087242075730218
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8970775785938794
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8471804883814585
+ },
+ "wrime_classification": {
+ "macro_f1": 0.47233151152826275
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9321133927024134
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8529056816630052
+ },
+ "jqara": {
+ "ndcg@10": 0.644692559122629
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.857799148388121
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9068464851749977
+ }
+ },
+ "Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8048616669652183
+ },
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.7822527313926262
+ },
+ "jaqket": {
+ "ndcg@10": 0.6561070613824674
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.5040548535978852
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.7046000072363299
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.36969618230893564
+ },
+ "mrtydi": {
+ "ndcg@10": 0.4636780745156557
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.9085158509835447
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9114732359476821
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.977434890774318
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.8232131912662143
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8212250726981067
+ },
+ "jsts": {
+ "spearman": 0.8424300570470996
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5562089376369613
+ },
+ "mewsc16": {
+ "v_measure_score": 0.509675337301281
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.4605817648504685
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/ruri-large/summary.json b/docs/results/cl-nagoya/ruri-large/summary.json
index e86c46b..2e2cead 100644
--- a/docs/results/cl-nagoya/ruri-large/summary.json
+++ b/docs/results/cl-nagoya/ruri-large/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.8080806321853091
+ "macro_f1": 0.7950391460082398
},
"amazon_review_classification": {
- "macro_f1": 0.5680171450057119
+ "macro_f1": 0.5685184036314727
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9356380708493385
},
"massive_intent_classification": {
- "macro_f1": 0.8255898596881264
+ "macro_f1": 0.8209962603450597
},
"massive_scenario_classification": {
- "macro_f1": 0.8956410349938264
+ "macro_f1": 0.9002551808707712
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.852564312646895
+ },
+ "wrime_classification": {
+ "macro_f1": 0.46447181564392015
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9298524733536755
+ "ndcg@10": 0.9298778327436324
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8661076138203823
+ },
+ "jqara": {
+ "ndcg@10": 0.5958950681984889
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8022791978749706
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8690504682983363
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8169123630823522
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.7667506664925435
+ "ndcg@10": 0.7763829985024149
},
"jaqket": {
- "ndcg@10": 0.6173871224245404
+ "ndcg@10": 0.617343261611166
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.5106450721691843
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.5547009159538185
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.3476835812045506
},
"mrtydi": {
- "ndcg@10": 0.3803302462897418
+ "ndcg@10": 0.38120908812619875
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.8652992529882778
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.8712459719069233
+ "ndcg@10": 0.8891161860918603
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.9657898747088243
+ "ndcg@10": 0.9617411892426375
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.779665053945222
+ "ndcg@10": 0.7922108957487803
}
},
"STS": {
"jsick": {
- "spearman": 0.8199959693684533
+ "spearman": 0.8199569498182433
},
"jsts": {
- "spearman": 0.8426164139167538
+ "spearman": 0.8426241685487486
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5139491572866559
+ "v_measure_score": 0.5443732953428371
},
"mewsc16": {
- "v_measure_score": 0.5225025331595674
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6228813559322034
+ "v_measure_score": 0.5058998835740889
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.44757212682292163
}
}
}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/ruri-small-v2/summary.json b/docs/results/cl-nagoya/ruri-small-v2/summary.json
new file mode 100644
index 0000000..eec64ee
--- /dev/null
+++ b/docs/results/cl-nagoya/ruri-small-v2/summary.json
@@ -0,0 +1,96 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7767065011282246
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5559888936165459
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8863640825159859
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.8199647165894474
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8816435555944846
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8156946375922746
+ },
+ "wrime_classification": {
+ "macro_f1": 0.452255956789983
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9320364061675573
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8818198634914105
+ },
+ "jqara": {
+ "ndcg@10": 0.5670420631375501
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8332825788093644
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9009377977029078
+ }
+ },
+ "Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8303842720270221
+ },
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.7401670430071696
+ },
+ "jaqket": {
+ "ndcg@10": 0.6225429070303006
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.3530718504041533
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.6689773236918534
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.32577528652704146
+ },
+ "mrtydi": {
+ "ndcg@10": 0.42400768916861914
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.9064650891678154
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9041671364705328
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9729556994161748
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.7821156819492701
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8387675357095226
+ },
+ "jsts": {
+ "spearman": 0.8193470885317312
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5260577746749562
+ },
+ "mewsc16": {
+ "v_measure_score": 0.4947076915300828
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.47820319421479446
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/ruri-small/summary.json b/docs/results/cl-nagoya/ruri-small/summary.json
index cb591ea..079db3e 100644
--- a/docs/results/cl-nagoya/ruri-small/summary.json
+++ b/docs/results/cl-nagoya/ruri-small/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7991935990685706
+ "macro_f1": 0.8055421233612723
},
"amazon_review_classification": {
- "macro_f1": 0.556129066893332
+ "macro_f1": 0.5541385299441624
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8885932202820669
},
"massive_intent_classification": {
- "macro_f1": 0.8148895285345188
+ "macro_f1": 0.8108237159349728
},
"massive_scenario_classification": {
- "macro_f1": 0.8787774569382543
+ "macro_f1": 0.8800077744996155
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.839667353042202
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4595261443020403
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9300177985352138
+ "ndcg@10": 0.9301438020851305
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8766726074179287
+ },
+ "jqara": {
+ "ndcg@10": 0.5325863556709908
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7783787989685144
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8813650067339368
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.825837748200516
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.736494039429321
+ "ndcg@10": 0.740126693753929
},
"jaqket": {
- "ndcg@10": 0.484437639428696
+ "ndcg@10": 0.4844203596195783
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.3723496207549938
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.5222032466588368
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.2898890422890513
},
"mrtydi": {
- "ndcg@10": 0.3342716158897666
+ "ndcg@10": 0.3351374258570715
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.8689213841203763
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.8768878489670099
+ "ndcg@10": 0.8723259697162892
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.9716879343439146
+ "ndcg@10": 0.9619567235021281
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.7608660955794895
+ "ndcg@10": 0.7608782792491423
}
},
"STS": {
"jsick": {
- "spearman": 0.8343927017558587
+ "spearman": 0.8344934497771457
},
"jsts": {
- "spearman": 0.8213297790184827
+ "spearman": 0.8213145808052514
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5096442244018489
+ "v_measure_score": 0.5289736036070719
},
"mewsc16": {
- "v_measure_score": 0.5141045788711239
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6211267605633802
+ "v_measure_score": 0.4936801242208388
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.46507426407220503
}
}
}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/ruri-v3-130m/summary.json b/docs/results/cl-nagoya/ruri-v3-130m/summary.json
new file mode 100644
index 0000000..5700f32
--- /dev/null
+++ b/docs/results/cl-nagoya/ruri-v3-130m/summary.json
@@ -0,0 +1,96 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7674793827265536
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5955994619477079
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9500285886600925
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.807938642045445
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8790346026671575
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8287806075978352
+ },
+ "wrime_classification": {
+ "macro_f1": 0.46634901067800855
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9336981049156847
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8864670177419038
+ },
+ "jqara": {
+ "ndcg@10": 0.663018840039673
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.865876689917921
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9362058245511219
+ }
+ },
+ "Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8421113535976967
+ },
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.7532393338902414
+ },
+ "jaqket": {
+ "ndcg@10": 0.730979460582779
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.5177034569356731
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.7100959869376436
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.45158335316076936
+ },
+ "mrtydi": {
+ "ndcg@10": 0.4780012151028164
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.995144547086835
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9887952520028016
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9795152116360624
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.9628103840588119
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7885956280300046
+ },
+ "jsts": {
+ "spearman": 0.8323603869543141
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5436288048604071
+ },
+ "mewsc16": {
+ "v_measure_score": 0.4883532965483729
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.5019988844015973
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/ruri-v3-30m/summary.json b/docs/results/cl-nagoya/ruri-v3-30m/summary.json
new file mode 100644
index 0000000..c4e768a
--- /dev/null
+++ b/docs/results/cl-nagoya/ruri-v3-30m/summary.json
@@ -0,0 +1,96 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7559571782387728
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5570789457429248
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9262839486939813
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.783074979041957
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8672396605716526
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8140481078951145
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4311261750368354
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9305651903486406
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8761294751423317
+ },
+ "jqara": {
+ "ndcg@10": 0.5747490185208084
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8352458113588647
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9297421530365237
+ }
+ },
+ "Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.827028266156452
+ },
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.7020872105862214
+ },
+ "jaqket": {
+ "ndcg@10": 0.6244733500896729
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.4304756847175998
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.6498916988979277
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.4577076048703079
+ },
+ "mrtydi": {
+ "ndcg@10": 0.41775750844113785
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.9876046427100846
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9916030162169887
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9699245797579602
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.9534027111106339
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8161946935797372
+ },
+ "jsts": {
+ "spearman": 0.819463211043541
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5369067977199252
+ },
+ "mewsc16": {
+ "v_measure_score": 0.47961175798341066
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.4804316290090649
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/ruri-v3-310m/summary.json b/docs/results/cl-nagoya/ruri-v3-310m/summary.json
new file mode 100644
index 0000000..c27fed8
--- /dev/null
+++ b/docs/results/cl-nagoya/ruri-v3-310m/summary.json
@@ -0,0 +1,96 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.8009270010529765
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.6071898527482484
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9530657500380437
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.8176293812793415
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.890051922198645
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8812655271153628
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4852854023445756
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9342725351989479
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8845859005757672
+ },
+ "jqara": {
+ "ndcg@10": 0.6893206802955604
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8500853284469898
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9335769070370818
+ }
+ },
+ "Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8406411130636801
+ },
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.7648595155366429
+ },
+ "jaqket": {
+ "ndcg@10": 0.7186721885111346
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.5225348075920366
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.677145342243983
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.43425275955863796
+ },
+ "mrtydi": {
+ "ndcg@10": 0.47064490316120666
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.9958682142366949
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9935172926595653
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9790717306095701
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.9658294271714906
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7886332339318622
+ },
+ "jsts": {
+ "spearman": 0.8430847366018317
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5855988614657296
+ },
+ "mewsc16": {
+ "v_measure_score": 0.4860478393120035
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.4440626045366051
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/ruri-v3-70m/summary.json b/docs/results/cl-nagoya/ruri-v3-70m/summary.json
new file mode 100644
index 0000000..3a2c52d
--- /dev/null
+++ b/docs/results/cl-nagoya/ruri-v3-70m/summary.json
@@ -0,0 +1,96 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.8180877928218353
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5798379850008339
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9339140455312027
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7891754112354649
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8782518076402043
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.7686616284901401
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4437562280187194
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9320237969329785
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8748197118530385
+ },
+ "jqara": {
+ "ndcg@10": 0.6309432249818713
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8503057292439823
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9225778620264797
+ }
+ },
+ "Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8275893500639571
+ },
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.7327144021448485
+ },
+ "jaqket": {
+ "ndcg@10": 0.6768047159335538
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.4626106409683068
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.6797764462851262
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.43554376517918675
+ },
+ "mrtydi": {
+ "ndcg@10": 0.4499999994407917
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.984966699117648
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9868218521221748
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9706955197203543
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.9573354583951488
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7909930894957667
+ },
+ "jsts": {
+ "spearman": 0.828242284804404
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5492094636693866
+ },
+ "mewsc16": {
+ "v_measure_score": 0.47739615416643866
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.4719940146272088
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json b/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json
index 42cc5ff..91d272c 100644
--- a/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json
+++ b/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7234436301724776
+ "macro_f1": 0.7192545517004465
},
"amazon_review_classification": {
- "macro_f1": 0.5441445333270086
+ "macro_f1": 0.5454422812215437
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9100588500656168
},
"massive_intent_classification": {
- "macro_f1": 0.7951973953020242
+ "macro_f1": 0.8011172170046241
},
"massive_scenario_classification": {
- "macro_f1": 0.8760200177186923
+ "macro_f1": 0.8762609424720998
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8191722798191963
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4188203301151871
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9183455876236017
+ "ndcg@10": 0.9184207070049463
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.6426611140199804
+ },
+ "jqara": {
+ "ndcg@10": 0.3748362133870952
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7087840971938433
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8734013475096433
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.5331630522529377
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.5161990612242935
+ "ndcg@10": 0.5202480516932524
},
"jaqket": {
- "ndcg@10": 0.5024513438428565
+ "ndcg@10": 0.5013089667314551
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.3288294149496304
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.20681341934572967
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.24700329716018354
},
"mrtydi": {
- "ndcg@10": 0.13976323269046823
+ "ndcg@10": 0.141360680613414
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.6909104560170936
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.6807886421530585
+ "ndcg@10": 0.6619434888289687
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.6570889175649209
+ "ndcg@10": 0.6484407439307039
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.48219159577174137
+ "ndcg@10": 0.4696725603511326
}
},
"STS": {
"jsick": {
- "spearman": 0.8282816229512862
+ "spearman": 0.8283659349049672
},
"jsts": {
- "spearman": 0.8127259236647225
+ "spearman": 0.8126484380435667
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5266774168531417
+ "v_measure_score": 0.5511252826598367
},
"mewsc16": {
- "v_measure_score": 0.5091016872016825
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6256665481692143
+ "v_measure_score": 0.5339141639252604
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.49207894013578146
}
}
}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json b/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json
index a2d8924..c2b5a3e 100644
--- a/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json
+++ b/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7321444865928852
+ "macro_f1": 0.7260568612881779
},
"amazon_review_classification": {
- "macro_f1": 0.5475800661400465
+ "macro_f1": 0.5455832826466495
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8942024454984163
},
"massive_intent_classification": {
- "macro_f1": 0.7922802742146243
+ "macro_f1": 0.792273118014186
},
"massive_scenario_classification": {
- "macro_f1": 0.8772172454209797
+ "macro_f1": 0.8770657195206764
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8042709569831964
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4525777476393026
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9148471751378899
+ "ndcg@10": 0.9149640515619839
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.5614550878114778
+ },
+ "jqara": {
+ "ndcg@10": 0.38302855218604437
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7126433285790728
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8659821811381412
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.4370774500135088
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.4683673504170269
+ "ndcg@10": 0.47421467281855384
},
"jaqket": {
- "ndcg@10": 0.39878189118804513
+ "ndcg@10": 0.4004385277719307
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.376774984849213
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.18125969161337505
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.23480755788261093
},
"mrtydi": {
- "ndcg@10": 0.11834919561027905
+ "ndcg@10": 0.1188048690188868
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.6407825080386719
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.634254459552888
+ "ndcg@10": 0.6295135121177772
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.37927566884615427
+ "ndcg@10": 0.36949537039923136
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.25787534957423713
+ "ndcg@10": 0.2490316613470849
}
},
"STS": {
"jsick": {
- "spearman": 0.837959537101532
+ "spearman": 0.8377753687267541
},
"jsts": {
- "spearman": 0.825691902117111
+ "spearman": 0.8256006176068381
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5074967876488787
+ "v_measure_score": 0.5337915256082275
},
"mewsc16": {
- "v_measure_score": 0.503782014677764
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6250885896527285
+ "v_measure_score": 0.5125821768154618
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.45736658859438273
}
}
}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json b/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json
index 3863c9e..2cc2181 100644
--- a/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json
+++ b/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7330185800774036
+ "macro_f1": 0.7364790582283407
},
"amazon_review_classification": {
- "macro_f1": 0.5392887528271114
+ "macro_f1": 0.5413541626836352
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8986588956343088
},
"massive_intent_classification": {
- "macro_f1": 0.7907120296283751
+ "macro_f1": 0.7767897385750657
},
"massive_scenario_classification": {
- "macro_f1": 0.8597097942715117
+ "macro_f1": 0.8610390686035142
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8413013579577491
+ },
+ "wrime_classification": {
+ "macro_f1": 0.41309966752995253
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9115668272308735
+ "ndcg@10": 0.9117818311636607
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.5154239181007129
+ },
+ "jqara": {
+ "ndcg@10": 0.3218696921394324
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.6995597032253587
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8612256071032377
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.35106925427500363
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.46003459081522513
+ "ndcg@10": 0.4673719618749888
},
"jaqket": {
- "ndcg@10": 0.3945725593125862
+ "ndcg@10": 0.3951670829019162
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.299231152726057
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.10934136213023636
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.15981611825721914
},
"mrtydi": {
- "ndcg@10": 0.055507775092798486
+ "ndcg@10": 0.055133639963568334
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.582165240647806
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.6025847751308843
+ "ndcg@10": 0.5841104498413489
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.5562839869857912
+ "ndcg@10": 0.55577879846708
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.3449181162324482
+ "ndcg@10": 0.3284050897756761
}
},
"STS": {
"jsick": {
- "spearman": 0.7849379492955117
+ "spearman": 0.7852600594448598
},
"jsts": {
- "spearman": 0.7894946592483818
+ "spearman": 0.7894496424482047
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5223347838445698
+ "v_measure_score": 0.5065452260003059
},
"mewsc16": {
- "v_measure_score": 0.37310458219601117
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.624424778761062
+ "v_measure_score": 0.39578933501406055
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.3362930091678794
}
}
}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json b/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json
index d37618a..09525c9 100644
--- a/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json
+++ b/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.767905114979583
+ "macro_f1": 0.7640316468319925
},
"amazon_review_classification": {
- "macro_f1": 0.5537089641846143
+ "macro_f1": 0.5504736753163985
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9057099704855596
},
"massive_intent_classification": {
- "macro_f1": 0.7912698845073401
+ "macro_f1": 0.792495956569193
},
"massive_scenario_classification": {
- "macro_f1": 0.8736185210672394
+ "macro_f1": 0.8749858164207054
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8288719236604842
+ },
+ "wrime_classification": {
+ "macro_f1": 0.44326523397693174
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9095494729022622
+ "ndcg@10": 0.9094836571513687
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.5417192948613557
+ },
+ "jqara": {
+ "ndcg@10": 0.3877939946491903
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7001887861606321
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8303617273610736
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.37613574135010835
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.4509073581555124
+ "ndcg@10": 0.46564010373437337
},
"jaqket": {
- "ndcg@10": 0.34595043675331943
+ "ndcg@10": 0.3452888488420233
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.3058130510308383
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.10326154138228141
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.12550430031143336
},
"mrtydi": {
- "ndcg@10": 0.05750859876901772
+ "ndcg@10": 0.057502989435967655
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.504469050615059
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.550742021417855
+ "ndcg@10": 0.5069650402920987
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.6307172007359215
+ "ndcg@10": 0.6043158227609278
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.39612451822677164
+ "ndcg@10": 0.34323430832579677
}
},
"STS": {
"jsick": {
- "spearman": 0.8014979086154339
+ "spearman": 0.8013849170804103
},
"jsts": {
- "spearman": 0.8097685749017456
+ "spearman": 0.809789575264219
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5090447587797094
+ "v_measure_score": 0.5147732775967515
},
"mewsc16": {
- "v_measure_score": 0.4591920015613856
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6248671625929861
+ "v_measure_score": 0.44443267597570074
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.34646662604886447
}
}
}
\ No newline at end of file
diff --git a/docs/results/colorfulscoop/sbert-base-ja/summary.json b/docs/results/colorfulscoop/sbert-base-ja/summary.json
index 2a08044..91ef6aa 100644
--- a/docs/results/colorfulscoop/sbert-base-ja/summary.json
+++ b/docs/results/colorfulscoop/sbert-base-ja/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7221023294352484
+ "macro_f1": 0.7080315613053877
},
"amazon_review_classification": {
- "macro_f1": 0.47952384496155054
+ "macro_f1": 0.4779713813897666
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8350239953633378
},
"massive_intent_classification": {
- "macro_f1": 0.725195343788811
+ "macro_f1": 0.7288673932703351
},
"massive_scenario_classification": {
- "macro_f1": 0.836177960542408
+ "macro_f1": 0.8370655127879382
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8262660922438109
+ },
+ "wrime_classification": {
+ "macro_f1": 0.35057897749310646
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.8997301146575819
+ "ndcg@10": 0.8996866702578056
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.37147215136686634
+ },
+ "jqara": {
+ "ndcg@10": 0.2220517076242275
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.6502702968219343
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8255483571039144
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.192984468642645
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.21501915127957166
+ "ndcg@10": 0.21704292684612675
},
"jaqket": {
- "ndcg@10": 0.13161989528541293
+ "ndcg@10": 0.13139887002144995
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.19067862146114167
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.018598782450328283
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.06972936265190934
},
"mrtydi": {
- "ndcg@10": 0.00436010196904899
+ "ndcg@10": 0.004126228941345733
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.29023294982669573
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.2878020264605714
+ "ndcg@10": 0.2580237968832312
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.22397059858982324
+ "ndcg@10": 0.21071404885072903
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.12815871897103842
+ "ndcg@10": 0.11573741610386916
}
},
"STS": {
"jsick": {
- "spearman": 0.6659298300713198
+ "spearman": 0.6656074999372202
},
"jsts": {
- "spearman": 0.7423952309826243
+ "spearman": 0.7425444938991701
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.4298579019834722
+ "v_measure_score": 0.4059869097583984
},
"mewsc16": {
- "v_measure_score": 0.46641671645082333
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6231013776050865
+ "v_measure_score": 0.4617625340860209
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.3035702180528845
}
}
}
\ No newline at end of file
diff --git a/docs/results/google/embeddinggemma-300m/summary.json b/docs/results/google/embeddinggemma-300m/summary.json
new file mode 100644
index 0000000..1cbe1cd
--- /dev/null
+++ b/docs/results/google/embeddinggemma-300m/summary.json
@@ -0,0 +1,96 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7473788045121156
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5803989931720487
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9598578035045773
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.8007123314267398
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.9058457580997293
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8691524520966505
+ },
+ "wrime_classification": {
+ "macro_f1": 0.46617181157351545
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9325852428034396
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8672290139012463
+ },
+ "jqara": {
+ "ndcg@10": 0.5208735587352208
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8237547981136122
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9019285986799139
+ }
+ },
+ "Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8107178459954021
+ },
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.6942509653422283
+ },
+ "jaqket": {
+ "ndcg@10": 0.6326539731698172
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.38634126517980316
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.3527982534428366
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.34664273718176375
+ },
+ "mrtydi": {
+ "ndcg@10": 0.13863867175417482
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.9934404877801122
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9902425863025213
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9611708983967426
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.9435055100669566
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8167115014804869
+ },
+ "jsts": {
+ "spearman": 0.8381005453815682
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.553278169293011
+ },
+ "mewsc16": {
+ "v_measure_score": 0.5055377268682895
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.4254674919395097
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/hotchpotch/static-embedding-japanese/summary.json b/docs/results/hotchpotch/static-embedding-japanese/summary.json
new file mode 100644
index 0000000..dea2123
--- /dev/null
+++ b/docs/results/hotchpotch/static-embedding-japanese/summary.json
@@ -0,0 +1,96 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.6806231003039513
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.46807443888459704
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.7982203591912549
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7479207001300227
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8218342894775092
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8333478541030553
+ },
+ "wrime_classification": {
+ "macro_f1": 0.32116037890073806
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.918697023137389
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8096474845962077
+ },
+ "jqara": {
+ "ndcg@10": 0.470607034824141
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7201497903350694
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9355298111228094
+ }
+ },
+ "Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.7227068099625594
+ },
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.5555106276533467
+ },
+ "jaqket": {
+ "ndcg@10": 0.6403798293637829
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.3893399585539267
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.3261108514005591
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.4251322740050699
+ },
+ "mrtydi": {
+ "ndcg@10": 0.1118466505474389
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.7618517724714088
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9573914637080742
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9036776565067465
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.862455457223212
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8251124620732032
+ },
+ "jsts": {
+ "spearman": 0.7781260135980573
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5143752588371998
+ },
+ "mewsc16": {
+ "v_measure_score": 0.34814733829489664
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.21465115117004985
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/intfloat/multilingual-e5-base/summary.json b/docs/results/intfloat/multilingual-e5-base/summary.json
index 96f9640..4d84be2 100644
--- a/docs/results/intfloat/multilingual-e5-base/summary.json
+++ b/docs/results/intfloat/multilingual-e5-base/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.6367079139150691
+ "macro_f1": 0.6428957534047911
},
"amazon_review_classification": {
- "macro_f1": 0.5424265794470897
+ "macro_f1": 0.5417258327796466
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9231910434886872
},
"massive_intent_classification": {
- "macro_f1": 0.7277503514873049
+ "macro_f1": 0.7318717264077053
},
"massive_scenario_classification": {
- "macro_f1": 0.8652828949015864
+ "macro_f1": 0.8677940980663801
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.785022714268383
+ },
+ "wrime_classification": {
+ "macro_f1": 0.3865061394465788
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9285060467194839
+ "ndcg@10": 0.9290148108090969
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8865491934939191
+ },
+ "jqara": {
+ "ndcg@10": 0.4761308479065645
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8196779545649944
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8614612823139557
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8431602298737804
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.6534478396845428
+ "ndcg@10": 0.687214041967885
},
"jaqket": {
- "ndcg@10": 0.5067444792013236
+ "ndcg@10": 0.5169392915456349
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.34676383987252357
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.6449511893902589
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.2573147838464383
},
"mrtydi": {
- "ndcg@10": 0.3837652120001251
+ "ndcg@10": 0.42298287793585587
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.8355946539433561
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.8709767034225332
+ "ndcg@10": 0.8447862631398672
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.9473129303429082
+ "ndcg@10": 0.9461907998491789
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.7304538728893641
+ "ndcg@10": 0.7469571396756213
}
},
"STS": {
"jsick": {
- "spearman": 0.8128058660848744
+ "spearman": 0.8125544166626103
},
"jsts": {
- "spearman": 0.7839196475937381
+ "spearman": 0.7965480195299134
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5502694126615243
+ "v_measure_score": 0.5379041349111564
},
"mewsc16": {
- "v_measure_score": 0.41494514000218946
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6226482073127441
+ "v_measure_score": 0.4943772106331262
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.4713134178805946
}
}
}
\ No newline at end of file
diff --git a/docs/results/intfloat/multilingual-e5-large/summary.json b/docs/results/intfloat/multilingual-e5-large/summary.json
index a28c470..40752a5 100644
--- a/docs/results/intfloat/multilingual-e5-large/summary.json
+++ b/docs/results/intfloat/multilingual-e5-large/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.706580687830688
+ "macro_f1": 0.6969861236021963
},
"amazon_review_classification": {
- "macro_f1": 0.5653992303516462
+ "macro_f1": 0.5763612743026115
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9554866923455646
},
"massive_intent_classification": {
- "macro_f1": 0.7577710251429624
+ "macro_f1": 0.7401244088033258
},
"massive_scenario_classification": {
- "macro_f1": 0.8859090262583831
+ "macro_f1": 0.887053685338159
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.7811476853348774
+ },
+ "wrime_classification": {
+ "macro_f1": 0.42377599926222737
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9296254722183955
+ "ndcg@10": 0.9330712866652149
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.9036816685131848
+ },
+ "jqara": {
+ "ndcg@10": 0.561374764136422
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8631195198401651
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8891328806594833
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8641271530674604
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.7030214336558751
+ "ndcg@10": 0.7297746711291291
},
"jaqket": {
- "ndcg@10": 0.5878065301444064
+ "ndcg@10": 0.5967326588135612
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.3958992445664435
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.7095604570396511
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.2984972238105224
},
"mrtydi": {
- "ndcg@10": 0.4363167873386172
+ "ndcg@10": 0.4781603349494696
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.8326468852967057
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.8600225120389309
+ "ndcg@10": 0.8571088737195884
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.9469712765040588
+ "ndcg@10": 0.952870249874937
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.7248023877969718
+ "ndcg@10": 0.7257268520360993
}
},
"STS": {
"jsick": {
- "spearman": 0.7840335060728089
+ "spearman": 0.7985423882395024
},
"jsts": {
- "spearman": 0.8098724997856234
+ "spearman": 0.8186303902222064
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5713023706914878
+ "v_measure_score": 0.5157643001398088
},
"mewsc16": {
- "v_measure_score": 0.4534484706354193
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.621496984746364
+ "v_measure_score": 0.46806674695304834
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.5334765362912619
}
}
}
\ No newline at end of file
diff --git a/docs/results/intfloat/multilingual-e5-small/summary.json b/docs/results/intfloat/multilingual-e5-small/summary.json
index 99a4423..5a3add1 100644
--- a/docs/results/intfloat/multilingual-e5-small/summary.json
+++ b/docs/results/intfloat/multilingual-e5-small/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.6214130966524566
+ "macro_f1": 0.5866005078388893
},
"amazon_review_classification": {
- "macro_f1": 0.5127428912860463
+ "macro_f1": 0.5120598395740691
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8773239262941632
},
"massive_intent_classification": {
- "macro_f1": 0.7085230519111091
+ "macro_f1": 0.7134377059258787
},
"massive_scenario_classification": {
- "macro_f1": 0.8622036829599259
+ "macro_f1": 0.8676947906742417
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8177503141758454
+ },
+ "wrime_classification": {
+ "macro_f1": 0.36913347435432137
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9303349187158247
+ "ndcg@10": 0.9298402731760124
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8998812594907971
+ },
+ "jqara": {
+ "ndcg@10": 0.49280220404951935
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8178461260193638
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.864145360860429
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8558160940470637
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.6411252958220891
+ "ndcg@10": 0.6568760244912849
},
"jaqket": {
- "ndcg@10": 0.49966509556428645
+ "ndcg@10": 0.5157123960708363
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.3153737960263929
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.6323300168472976
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.2590832302769219
},
"mrtydi": {
- "ndcg@10": 0.36054822913647616
+ "ndcg@10": 0.4236692119753354
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.8396508926780583
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.8520749151982298
+ "ndcg@10": 0.8409842458346825
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.9526123412781002
+ "ndcg@10": 0.9447219194706624
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.729906931983999
+ "ndcg@10": 0.7455737280382885
}
},
"STS": {
"jsick": {
- "spearman": 0.8150271836013705
+ "spearman": 0.8199946308873799
},
"jsts": {
- "spearman": 0.786450077409501
+ "spearman": 0.7892106647109823
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5470075389200084
+ "v_measure_score": 0.5194355229712517
},
"mewsc16": {
- "v_measure_score": 0.391226933590049
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6219382321618744
+ "v_measure_score": 0.5233814767010047
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.43592128019411325
}
}
}
\ No newline at end of file
diff --git a/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json b/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json
index 6b7309a..38e78b4 100644
--- a/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json
+++ b/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7994675369288904
+ "macro_f1": 0.7972419438068292
},
"amazon_review_classification": {
- "macro_f1": 0.5748206591211895
+ "macro_f1": 0.5802127224160758
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9199098092136551
},
"massive_intent_classification": {
- "macro_f1": 0.8025949222725076
+ "macro_f1": 0.8015558847211773
},
"massive_scenario_classification": {
- "macro_f1": 0.8875250742566655
+ "macro_f1": 0.8878291337617034
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.7731122315942124
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4573111522822367
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9156331205981866
+ "ndcg@10": 0.9151322326635167
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.6745048816141938
+ },
+ "jqara": {
+ "ndcg@10": 0.36039102371287524
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.6867643099800397
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8538476294446257
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.5964999187333498
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.519938655947725
+ "ndcg@10": 0.5407367959715127
},
"jaqket": {
- "ndcg@10": 0.4206746951743811
+ "ndcg@10": 0.4021523812335328
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.2482827887837841
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.17190013577864438
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.19084474235068657
},
"mrtydi": {
- "ndcg@10": 0.10116108109776817
+ "ndcg@10": 0.10090455185771262
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.44067635335327865
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.4930421996747514
+ "ndcg@10": 0.44837143094362086
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.719369187830078
+ "ndcg@10": 0.7368252250653567
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.3258568875005778
+ "ndcg@10": 0.3115238718909808
}
},
"STS": {
"jsick": {
- "spearman": 0.7211422898060521
+ "spearman": 0.7203759702575281
},
"jsts": {
- "spearman": 0.8109305772255819
+ "spearman": 0.8107670759374308
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.4677177349822789
+ "v_measure_score": 0.5170361974340975
},
"mewsc16": {
- "v_measure_score": 0.5389209739242912
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6237623762376237
+ "v_measure_score": 0.5152481901891431
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.43034104597999767
}
}
}
\ No newline at end of file
diff --git a/docs/results/pfnet/plamo-embedding-1b/summary.json b/docs/results/pfnet/plamo-embedding-1b/summary.json
new file mode 100644
index 0000000..bbd1ebe
--- /dev/null
+++ b/docs/results/pfnet/plamo-embedding-1b/summary.json
@@ -0,0 +1,96 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7758538459902731
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5947995518406083
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9172503242542154
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.8278794713377423
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8994521566290758
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.9031045220702235
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4920234056704329
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9358806147164782
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.9174123687849153
+ },
+ "jqara": {
+ "ndcg@10": 0.6614745715723234
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8191089804461983
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9187107530127357
+ }
+ },
+ "Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8891350347274469
+ },
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.7902563114751548
+ },
+ "jaqket": {
+ "ndcg@10": 0.543879907336617
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.5455917771478032
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.5991430810654191
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.3668286739593277
+ },
+ "mrtydi": {
+ "ndcg@10": 0.4186565845821445
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.9765055597743824
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.990219021795052
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9862781050998647
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.9510769472900551
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.81830804755845
+ },
+ "jsts": {
+ "spearman": 0.8446183418196836
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.6173644704637056
+ },
+ "mewsc16": {
+ "v_measure_score": 0.4802637594283387
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.4773483587781526
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/pkshatech/GLuCoSE-base-ja-v2/summary.json b/docs/results/pkshatech/GLuCoSE-base-ja-v2/summary.json
index 7318aab..6d1041e 100644
--- a/docs/results/pkshatech/GLuCoSE-base-ja-v2/summary.json
+++ b/docs/results/pkshatech/GLuCoSE-base-ja-v2/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7492232749031491
+ "macro_f1": 0.7528271196943096
},
"amazon_review_classification": {
- "macro_f1": 0.5530707609927811
+ "macro_f1": 0.5518771080100612
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.892368025976312
},
"massive_intent_classification": {
- "macro_f1": 0.7979144461303402
+ "macro_f1": 0.7872725195473699
},
"massive_scenario_classification": {
- "macro_f1": 0.8683641924034757
+ "macro_f1": 0.8713846348082936
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8583089323083904
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4323129039345514
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9301469431250418
+ "ndcg@10": 0.9301525338489429
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8827390816541736
+ },
+ "jqara": {
+ "ndcg@10": 0.6070225247152883
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8243623644224994
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.887121388271364
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8385011452405416
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.6979374757372254
+ "ndcg@10": 0.6984652569482365
},
"jaqket": {
- "ndcg@10": 0.6729417850207029
+ "ndcg@10": 0.6751948574643762
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.3957491894384977
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.652881832622734
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.3374776122444277
},
"mrtydi": {
- "ndcg@10": 0.41858579533990486
+ "ndcg@10": 0.4167021902708705
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.899055473429718
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.9029337913460675
+ "ndcg@10": 0.9008045583912581
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.9511153967130517
+ "ndcg@10": 0.9566816164352073
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.7580448576047344
+ "ndcg@10": 0.757906107708436
}
},
"STS": {
"jsick": {
- "spearman": 0.849637366944316
+ "spearman": 0.8494858386977019
},
"jsts": {
- "spearman": 0.8095684318108997
+ "spearman": 0.8095670694135243
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5151536908540161
+ "v_measure_score": 0.5446091559116468
},
"mewsc16": {
- "v_measure_score": 0.45782610528001805
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.623716814159292
+ "v_measure_score": 0.4611859858929692
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.43979504978761347
}
}
-}
+}
\ No newline at end of file
diff --git a/docs/results/pkshatech/GLuCoSE-base-ja/summary.json b/docs/results/pkshatech/GLuCoSE-base-ja/summary.json
index 9048691..5a50ab4 100644
--- a/docs/results/pkshatech/GLuCoSE-base-ja/summary.json
+++ b/docs/results/pkshatech/GLuCoSE-base-ja/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.8243606275521169
+ "macro_f1": 0.8203088346974938
},
"amazon_review_classification": {
- "macro_f1": 0.580654308041878
+ "macro_f1": 0.5793470941382456
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9289309593569228
},
"massive_intent_classification": {
- "macro_f1": 0.7885427536904928
+ "macro_f1": 0.7852003872158392
},
"massive_scenario_classification": {
- "macro_f1": 0.8794225134482166
+ "macro_f1": 0.8771105186592234
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.7723533533184818
+ },
+ "wrime_classification": {
+ "macro_f1": 0.48820317778534994
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9190289767663239
+ "ndcg@10": 0.9182072351783757
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.7453523153562407
+ },
+ "jqara": {
+ "ndcg@10": 0.30235678517238046
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7782487998017047
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8742431547482784
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.6929937892822252
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.6387979415478197
+ "ndcg@10": 0.6414300605061649
},
"jaqket": {
- "ndcg@10": 0.3981609655991592
+ "ndcg@10": 0.39775627519142726
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.2981097485323552
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.4826861479972318
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.2507030467719784
},
"mrtydi": {
- "ndcg@10": 0.30281316435910444
+ "ndcg@10": 0.3013997193651328
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.7677861541704494
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.7825765249971093
+ "ndcg@10": 0.7720777474520221
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.8206371528870603
+ "ndcg@10": 0.8139955508348415
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.5982476164344701
+ "ndcg@10": 0.5843440022515908
}
},
"STS": {
"jsick": {
- "spearman": 0.7496711324072552
+ "spearman": 0.7489963692364312
},
"jsts": {
- "spearman": 0.824592262812859
+ "spearman": 0.8246470658338377
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.49890886040948096
+ "v_measure_score": 0.5040813114960272
},
"mewsc16": {
- "v_measure_score": 0.49676862904881375
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.663883089770355
+ "v_measure_score": 0.4952409837584659
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.41426282292221306
}
}
}
\ No newline at end of file
diff --git a/docs/results/pkshatech/RoSEtta-base-ja/summary.json b/docs/results/pkshatech/RoSEtta-base-ja/summary.json
index d82af4b..7951ed1 100644
--- a/docs/results/pkshatech/RoSEtta-base-ja/summary.json
+++ b/docs/results/pkshatech/RoSEtta-base-ja/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7005147244958231
+ "macro_f1": 0.7021400751808275
},
"amazon_review_classification": {
- "macro_f1": 0.5263680453119501
+ "macro_f1": 0.5261693704750353
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8728387064627037
},
"massive_intent_classification": {
- "macro_f1": 0.7983787583297884
+ "macro_f1": 0.7958661089844552
},
"massive_scenario_classification": {
- "macro_f1": 0.8709593192703351
+ "macro_f1": 0.869642477269303
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8400507949086808
+ },
+ "wrime_classification": {
+ "macro_f1": 0.41243251223612126
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9268625513429571
+ "ndcg@10": 0.9267709447988313
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8682926176464301
+ },
+ "jqara": {
+ "ndcg@10": 0.5792158527364997
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8038275156892214
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8844542290758788
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8201713015308671
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.6595934642903105
+ "ndcg@10": 0.6627940635852495
},
"jaqket": {
- "ndcg@10": 0.6533452086105761
+ "ndcg@10": 0.642772517951208
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.3404237377925581
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.6016261958696313
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.3236631225997826
},
"mrtydi": {
- "ndcg@10": 0.36731170141136216
+ "ndcg@10": 0.36773428568023436
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.9604317247356383
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.9553567926226499
+ "ndcg@10": 0.9541194598644321
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.940828991756893
+ "ndcg@10": 0.931681815900694
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.8163161967769845
+ "ndcg@10": 0.821937205258955
}
},
"STS": {
"jsick": {
- "spearman": 0.8383455453168481
+ "spearman": 0.8383423614590403
},
"jsts": {
- "spearman": 0.7895388048564987
+ "spearman": 0.7894639448529204
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5861760622672214
+ "v_measure_score": 0.4888541691163841
},
"mewsc16": {
- "v_measure_score": 0.4784844036038961
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6173974540311173
+ "v_measure_score": 0.4515710456360326
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.4060764834036522
}
}
-}
+}
\ No newline at end of file
diff --git a/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json b/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json
index cc9f179..5bbd9f7 100644
--- a/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json
+++ b/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.6748573563374541
+ "macro_f1": 0.6827876647194675
},
"amazon_review_classification": {
- "macro_f1": 0.5084883283463678
+ "macro_f1": 0.5175208911836656
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8821403624230039
},
"massive_intent_classification": {
- "macro_f1": 0.7967050091211104
+ "macro_f1": 0.7964832948145142
},
"massive_scenario_classification": {
- "macro_f1": 0.871999260591497
+ "macro_f1": 0.8722583552883876
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8118131918956941
+ },
+ "wrime_classification": {
+ "macro_f1": 0.38393198133793865
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.914930352019688
+ "ndcg@10": 0.9127205853729194
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.5745412347869042
+ },
+ "jqara": {
+ "ndcg@10": 0.31740297589991745
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.7212459481239325
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.8749859006713937
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.45027356866159485
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.41496851385134836
+ "ndcg@10": 0.4100248722670852
},
"jaqket": {
- "ndcg@10": 0.46003031782136106
+ "ndcg@10": 0.37009937036200197
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.3129516236109114
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.16066205698392905
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.20077263817507693
},
"mrtydi": {
- "ndcg@10": 0.1019130492122431
+ "ndcg@10": 0.10152904724472846
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.3813451499418741
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.4014036990267884
+ "ndcg@10": 0.3760245554186644
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.5962532652358485
+ "ndcg@10": 0.5918422105100428
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.2452584471710635
+ "ndcg@10": 0.25260061985270044
}
},
"STS": {
"jsick": {
- "spearman": 0.7307715649457595
+ "spearman": 0.7310527928257868
},
"jsts": {
- "spearman": 0.8052279921326252
+ "spearman": 0.8050903530724467
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.4476707933600858
+ "v_measure_score": 0.491058629988371
},
"mewsc16": {
- "v_measure_score": 0.5029508725037098
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6239830208701805
+ "v_measure_score": 0.4702243143778868
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.5220924001787737
}
}
}
\ No newline at end of file
diff --git a/docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json b/docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json
index 30385ec..d1a1183 100644
--- a/docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json
+++ b/docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7910202863961814
+ "macro_f1": 0.7966249319542733
},
"amazon_review_classification": {
- "macro_f1": 0.614759364446128
+ "macro_f1": 0.6202158443035662
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9503418215782169
},
"massive_intent_classification": {
- "macro_f1": 0.8225880728874561
+ "macro_f1": 0.8121127783146885
},
"massive_scenario_classification": {
- "macro_f1": 0.9065030576701741
+ "macro_f1": 0.9015618520645106
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8262549610016919
+ },
+ "wrime_classification": {
+ "macro_f1": 0.496952794347916
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9374394712541568
+ "ndcg@10": 0.9359864365331227
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8684667204236405
+ },
+ "jqara": {
+ "ndcg@10": 0.6592446626934351
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8516895656188278
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9024168764200886
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8242898079860301
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.7168374490004555
+ "ndcg@10": 0.7176236149918197
},
"jaqket": {
- "ndcg@10": 0.7279485535689915
+ "ndcg@10": 0.729199960117355
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.6260117718497401
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.6323109932464099
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.3458953565848906
},
"mrtydi": {
- "ndcg@10": 0.41952210141116814
+ "ndcg@10": 0.4075091710258615
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.9919931534803926
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.9394095717236127
+ "ndcg@10": 0.9916030162169888
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.9695624263086593
+ "ndcg@10": 0.968506421217649
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.8832876426024624
+ "ndcg@10": 0.9629377323425067
}
},
"STS": {
"jsick": {
- "spearman": 0.8022484725822061
+ "spearman": 0.7979403746663343
},
"jsts": {
- "spearman": 0.851980317221987
+ "spearman": 0.8362521198880197
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.5641831341687762
+ "v_measure_score": 0.5603187837880047
},
"mewsc16": {
- "v_measure_score": 0.5129216698739159
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.62
+ "v_measure_score": 0.5068875864473731
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.4418928761777483
}
}
}
\ No newline at end of file
diff --git a/docs/results/sbintuitions/sarashina-embedding-v2-1b/summary.json b/docs/results/sbintuitions/sarashina-embedding-v2-1b/summary.json
new file mode 100644
index 0000000..86137b4
--- /dev/null
+++ b/docs/results/sbintuitions/sarashina-embedding-v2-1b/summary.json
@@ -0,0 +1,96 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7981260149778604
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.613904230518876
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.9350720201784032
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.8368870408710274
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.9023393778180459
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.814822834466633
+ },
+ "wrime_classification": {
+ "macro_f1": 0.49874416955622525
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9357698212029779
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.8879290064759172
+ },
+ "jqara": {
+ "ndcg@10": 0.7055458565694387
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.8593120098725527
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.9252857993806471
+ }
+ },
+ "Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.8553812052293157
+ },
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.748733390366879
+ },
+ "jaqket": {
+ "ndcg@10": 0.7351759183476264
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.6610711832074698
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.6825626228833273
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.403522262945172
+ },
+ "mrtydi": {
+ "ndcg@10": 0.4956554219902846
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.9684244331815967
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9627838420424424
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9810825575187433
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.9178887982974248
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8257994437715604
+ },
+ "jsts": {
+ "spearman": 0.8586626198858301
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5741299477926689
+ },
+ "mewsc16": {
+ "v_measure_score": 0.5167004748357505
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.48585227521060775
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/sentence-transformers/LaBSE/summary.json b/docs/results/sentence-transformers/LaBSE/summary.json
index de8fd21..d4575ba 100644
--- a/docs/results/sentence-transformers/LaBSE/summary.json
+++ b/docs/results/sentence-transformers/LaBSE/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7361214773958769
+ "macro_f1": 0.7473900578785092
},
"amazon_review_classification": {
- "macro_f1": 0.516957890685124
+ "macro_f1": 0.5163381922398036
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8952055768957177
},
"massive_intent_classification": {
- "macro_f1": 0.7698802987251081
+ "macro_f1": 0.7708783013419095
},
"massive_scenario_classification": {
- "macro_f1": 0.8835366493433755
+ "macro_f1": 0.883882574111003
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8147469939175009
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4010561963802254
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.9162507647227857
+ "ndcg@10": 0.9147393987384248
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.6785244283016075
+ },
+ "jqara": {
+ "ndcg@10": 0.24624584903493016
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.692780512325045
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.818396899799895
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.49122610922285737
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.4310160105414995
+ "ndcg@10": 0.4243154817699682
},
"jaqket": {
- "ndcg@10": 0.34245849139132745
+ "ndcg@10": 0.24919695742546066
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.20021150938693902
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.09357313571231995
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.07525879379433965
},
"mrtydi": {
- "ndcg@10": 0.04238747941951049
+ "ndcg@10": 0.04221321214455149
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.48063138821949475
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.48918127058907085
+ "ndcg@10": 0.48202233374429526
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.7513086500303519
+ "ndcg@10": 0.7559363652226313
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.35089108319096984
+ "ndcg@10": 0.3553481928114969
}
},
"STS": {
"jsick": {
- "spearman": 0.7698905918950973
+ "spearman": 0.770087314840748
},
"jsts": {
- "spearman": 0.7612337568248777
+ "spearman": 0.7611615118281959
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.4829337123233023
+ "v_measure_score": 0.4908336523752348
},
"mewsc16": {
- "v_measure_score": 0.41471299546625956
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.623321554770318
+ "v_measure_score": 0.41781835844551085
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.2859403214333406
}
}
}
\ No newline at end of file
diff --git a/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json b/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json
index 12f71a2..4a59ed9 100644
--- a/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json
+++ b/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json
@@ -1,62 +1,96 @@
{
"Classification": {
"amazon_counterfactual_classification": {
- "macro_f1": 0.7565022696601644
+ "macro_f1": 0.7514299930187799
},
"amazon_review_classification": {
- "macro_f1": 0.5131771609073525
+ "macro_f1": 0.516712003417941
+ },
+ "japanese_sentiment_classification": {
+ "macro_f1": 0.8714537157100772
},
"massive_intent_classification": {
- "macro_f1": 0.7427818411370812
+ "macro_f1": 0.7433839585058197
},
"massive_scenario_classification": {
- "macro_f1": 0.8609512679368835
+ "macro_f1": 0.8606582397219589
+ },
+ "sib200_japanese_classification": {
+ "macro_f1": 0.8372998969612304
+ },
+ "wrime_classification": {
+ "macro_f1": 0.4167776597670575
}
},
"Reranking": {
"esci": {
- "ndcg@10": 0.901984958764163
+ "ndcg@10": 0.8971639400421929
+ },
+ "jacwir_reranking": {
+ "ndcg@10": 0.3920595575511347
+ },
+ "jqara": {
+ "ndcg@10": 0.18511169246774806
+ },
+ "miracl_reranking": {
+ "ndcg@10": 0.6535500060613615
+ },
+ "mldr_reranking": {
+ "ndcg@10": 0.768787823495723
}
},
"Retrieval": {
+ "jacwir_retrieval": {
+ "ndcg@10": 0.21075313614845367
+ },
"jagovfaqs_22k": {
- "ndcg@10": 0.2511106863952595
+ "ndcg@10": 0.2248606553485316
},
"jaqket": {
- "ndcg@10": 0.21606007987072834
+ "ndcg@10": 0.06494577519372931
+ },
+ "mintaka_retrieval": {
+ "ndcg@10": 0.22312923127278733
+ },
+ "miracl_retrieval": {
+ "ndcg@10": 0.022833015048992402
+ },
+ "mldr_retrieval": {
+ "ndcg@10": 0.06529330431356167
},
"mrtydi": {
- "ndcg@10": 0.027590779174942116
+ "ndcg@10": 0.027849411947159904
+ },
+ "nlp_journal_abs_article": {
+ "ndcg@10": 0.24914118502751986
},
"nlp_journal_abs_intro": {
- "ndcg@10": 0.2848558252647936
+ "ndcg@10": 0.2554860092306942
},
"nlp_journal_title_abs": {
- "ndcg@10": 0.3646520309406354
+ "ndcg@10": 0.35835508156998896
},
"nlp_journal_title_intro": {
- "ndcg@10": 0.11545016260271045
+ "ndcg@10": 0.12133118349638791
}
},
"STS": {
"jsick": {
- "spearman": 0.7236409557069434
+ "spearman": 0.7238085290735078
},
"jsts": {
- "spearman": 0.7843597058304203
+ "spearman": 0.784483411606707
}
},
"Clustering": {
"livedoor_news": {
- "v_measure_score": 0.24487129939212224
+ "v_measure_score": 0.26615937330682315
},
"mewsc16": {
- "v_measure_score": 0.304278393205056
- }
- },
- "PairClassification": {
- "paws_x_ja": {
- "binary_f1": 0.6219686162624821
+ "v_measure_score": 0.32048277963560623
+ },
+ "sib200_japanese_clustering": {
+ "v_measure_score": 0.2434250739162938
}
}
}
\ No newline at end of file
diff --git a/leaderboard.md b/leaderboard.md
index dd64309..1b83092 100644
--- a/leaderboard.md
+++ b/leaderboard.md
@@ -5,233 +5,266 @@ This leaderboard shows the results stored under `docs/results`. The scores are a
The summary shows the average scores within each task. The average score is the average of scores by dataset.
-| Model | Avg. | Retrieval | STS | Classification | Reranking | Clustering | PairClassification |
-|:----------------------------------------------|:----------|:------------|:----------|:-----------------|:------------|:-------------|:---------------------|
-| sbintuitions/sarashina-embedding-v1-1b | **75.50** | **77.61** | 82.71 | **78.37** | **93.74** | 53.86 | 62.00 |
-| OpenAI/text-embedding-3-large | 74.05 | 74.48 | 82.52 | 77.58 | 93.58 | 53.32 | 62.35 |
-| jinaai/jina-embeddings-v3 | 73.44 | 75.22 | 80.05 | 76.39 | 92.71 | 51.46 | 62.37 |
-| cl-nagoya/ruri-large | 73.31 | 73.02 | 83.13 | 77.43 | 92.99 | 51.82 | 62.29 |
-| pkshatech/GLuCoSE-base-ja-v2 | 72.23 | 73.36 | 82.96 | 74.21 | 93.01 | 48.65 | 62.37 |
-| pkshatech/RoSEtta-base-ja | 72.04 | 73.21 | 81.39 | 72.41 | 92.69 | 53.23 | 61.74 |
-| cl-nagoya/ruri-base | 71.91 | 69.82 | 82.87 | 75.58 | 92.91 | **54.16** | 62.38 |
-| cl-nagoya/ruri-small | 71.53 | 69.41 | 82.79 | 76.22 | 93.00 | 51.19 | 62.11 |
-| intfloat/multilingual-e5-large | 70.90 | 70.98 | 79.70 | 72.89 | 92.96 | 51.24 | 62.15 |
-| OpenAI/text-embedding-3-small | 69.18 | 66.39 | 79.46 | 73.06 | 92.92 | 51.06 | 62.27 |
-| intfloat/multilingual-e5-base | 68.61 | 68.21 | 79.84 | 69.30 | 92.85 | 48.26 | 62.26 |
-| intfloat/multilingual-e5-small | 67.71 | 67.27 | 80.07 | 67.62 | 93.03 | 46.91 | 62.19 |
-| pkshatech/GLuCoSE-base-ja | 67.29 | 59.02 | 78.71 | 76.82 | 91.90 | 49.78 | **66.39** |
-| OpenAI/text-embedding-ada-002 | 67.21 | 64.38 | 79.02 | 69.75 | 93.04 | 48.30 | 62.40 |
-| cl-nagoya/sup-simcse-ja-base | 63.36 | 49.64 | 82.05 | 73.47 | 91.83 | 51.79 | 62.57 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 61.55 | 47.38 | 78.99 | 73.13 | 91.30 | 48.25 | 62.27 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 60.83 | 46.36 | 77.49 | 73.30 | 91.16 | 46.68 | 62.38 |
-| oshizo/sbert-jsnli-luke-japanese-base-lite | 60.77 | 43.00 | 76.60 | 76.61 | 91.56 | 50.33 | 62.38 |
-| cl-nagoya/unsup-simcse-ja-large | 59.58 | 40.53 | 80.56 | 74.66 | 90.95 | 48.41 | 62.49 |
-| MU-Kindai/Japanese-MixCSE-BERT-base | 59.03 | 42.59 | 77.05 | 72.90 | 91.01 | 44.95 | 62.33 |
-| cl-nagoya/sup-simcse-ja-large | 58.88 | 37.62 | **83.18** | 73.73 | 91.48 | 50.56 | 62.51 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 58.77 | 40.82 | 78.28 | 73.47 | 90.95 | 45.81 | 62.35 |
-| MU-Kindai/Japanese-DiffCSE-BERT-base | 58.66 | 41.79 | 75.50 | 73.77 | 90.95 | 44.22 | 62.38 |
-| cl-nagoya/unsup-simcse-ja-base | 58.39 | 40.23 | 78.72 | 73.07 | 91.16 | 44.77 | 62.44 |
-| sentence-transformers/LaBSE | 58.01 | 40.12 | 76.56 | 72.66 | 91.63 | 44.88 | 62.33 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 57.97 | 41.32 | 74.66 | 72.76 | 90.66 | 43.11 | 62.37 |
-| pkshatech/simcse-ja-bert-base-clcmlp | 56.86 | 37.00 | 76.80 | 71.30 | 91.49 | 47.53 | 62.40 |
-| sentence-transformers/stsb-xlm-r-multilingual | 48.21 | 21.00 | 75.40 | 71.84 | 90.20 | 27.46 | 62.20 |
-| colorfulscoop/sbert-base-ja | 47.38 | 16.52 | 70.42 | 69.07 | 89.97 | 44.81 | 62.31 |
+| Model | Avg. | Retrieval | STS | Classification | Reranking | Clustering |
+|:----------------------------------------------|:---------:|:-----------:|:---------:|:----------------:|:-----------:|:------------:|
+| sbintuitions/sarashina-embedding-v2-1b | **76.38** | **76.48** | **84.22** | 77.14 | **86.28** | 52.56 |
+| cl-nagoya/ruri-v3-310m | 75.85 | 76.03 | 81.59 | **77.65** | 85.84 | 50.52 |
+| cl-nagoya/ruri-v3-130m | 75.52 | 76.45 | 81.05 | 75.65 | 85.71 | 51.13 |
+| sbintuitions/sarashina-embedding-v1-1b | 74.87 | 74.53 | 81.71 | 77.20 | 84.36 | 50.30 |
+| pfnet/plamo-embedding-1b | 74.85 | 73.25 | 83.15 | 77.29 | 85.05 | 52.50 |
+| cl-nagoya/ruri-v3-70m | 73.95 | 74.23 | 80.96 | 74.45 | 84.21 | 49.95 |
+| OpenAI/text-embedding-3-large | 73.86 | 71.95 | 82.52 | 77.27 | 83.06 | 51.82 |
+| cl-nagoya/ruri-large-v2 | 73.63 | 71.87 | 83.18 | 76.10 | 83.89 | 50.88 |
+| cl-nagoya/ruri-v3-30m | 72.95 | 72.84 | 81.78 | 73.35 | 82.93 | 49.90 |
+| BAAI/bge-m3 | 72.46 | 72.15 | 79.74 | 74.10 | 84.10 | 45.56 |
+| cl-nagoya/ruri-large | 71.69 | 68.30 | 83.13 | 76.25 | 81.26 | 49.93 |
+| cl-nagoya/ruri-base-v2 | 71.66 | 68.96 | 83.03 | 75.59 | 82.46 | 46.84 |
+| cl-nagoya/ruri-small-v2 | 71.40 | 68.46 | 82.91 | 74.12 | 82.30 | 49.97 |
+| pkshatech/GLuCoSE-base-ja-v2 | 71.11 | 68.45 | 82.95 | 73.52 | 82.63 | 48.19 |
+| intfloat/multilingual-e5-large | 70.67 | 67.65 | 80.86 | 72.30 | 83.01 | 50.58 |
+| google/embeddinggemma-300m | 70.59 | 65.91 | 82.74 | 76.14 | 80.93 | 49.48 |
+| cl-nagoya/ruri-base | 70.25 | 65.90 | 82.88 | 75.34 | 80.31 | 49.10 |
+| pkshatech/RoSEtta-base-ja | 69.58 | 67.52 | 81.39 | 71.70 | 81.25 | 44.88 |
+| cl-nagoya/ruri-small | 69.34 | 63.95 | 82.79 | 74.83 | 79.98 | 49.59 |
+| intfloat/multilingual-e5-base | 68.06 | 64.48 | 80.46 | 69.70 | 79.46 | 50.12 |
+| intfloat/multilingual-e5-small | 67.38 | 63.91 | 80.46 | 67.77 | 80.09 | 49.29 |
+| OpenAI/text-embedding-3-small | 67.10 | 61.79 | 79.46 | 72.43 | 77.29 | 48.91 |
+| OpenAI/text-embedding-ada-002 | 65.13 | 59.58 | 79.02 | 69.39 | 75.63 | 48.78 |
+| hotchpotch/static-embedding-japanese | 63.80 | 60.51 | 80.16 | 66.73 | 77.09 | 35.91 |
+| pkshatech/GLuCoSE-base-ja | 63.79 | 54.58 | 78.68 | 75.02 | 72.37 | 47.12 |
+| cl-nagoya/sup-simcse-ja-base | 59.91 | 45.00 | 82.05 | 72.72 | 70.36 | **52.57** |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 57.60 | 42.41 | 79.00 | 71.83 | 71.88 | 42.02 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 56.75 | 38.08 | 76.56 | 74.53 | 69.81 | 48.75 |
+| cl-nagoya/sup-simcse-ja-large | 56.46 | 37.38 | 83.17 | 72.74 | 68.76 | 50.12 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 55.78 | 39.85 | 77.96 | 71.46 | 69.92 | 39.27 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 55.35 | 36.23 | 78.29 | 72.59 | 70.59 | 44.54 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 54.65 | 36.24 | 77.75 | 71.81 | 68.58 | 43.45 |
+| cl-nagoya/unsup-simcse-ja-large | 54.23 | 33.98 | 80.56 | 73.71 | 67.39 | 43.52 |
+| cl-nagoya/unsup-simcse-ja-base | 53.86 | 35.34 | 78.74 | 72.41 | 66.20 | 41.29 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 53.82 | 35.22 | 74.96 | 71.48 | 68.15 | 42.86 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 53.59 | 34.93 | 76.70 | 72.06 | 67.73 | 39.93 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 53.48 | 32.80 | 76.81 | 70.67 | 68.02 | 49.45 |
+| sentence-transformers/LaBSE | 52.70 | 33.18 | 76.56 | 71.85 | 67.01 | 39.82 |
+| sentence-transformers/stsb-xlm-r-multilingual | 43.06 | 16.58 | 75.41 | 71.40 | 57.93 | 27.67 |
+| colorfulscoop/sbert-base-ja | 42.90 | 15.45 | 70.41 | 68.05 | 59.38 | 39.04 |
## Retrieval
-| Model | Avg. | jagovfaqs_22k
(ndcg@10) | jaqket
(ndcg@10) | mrtydi
(ndcg@10) | nlp_journal_abs_intro
(ndcg@10) | nlp_journal_title_abs
(ndcg@10) | nlp_journal_title_intro
(ndcg@10) |
-|:----------------------------------------------|:----------|:-----------------------------|:----------------------|:----------------------|:-------------------------------------|:-------------------------------------|:---------------------------------------|
-| sbintuitions/sarashina-embedding-v1-1b | **77.61** | 71.68 | **72.79** | 41.95 | 93.94 | 96.96 | 88.33 |
-| jinaai/jina-embeddings-v3 | 75.22 | 71.50 | 46.48 | **45.45** | 98.43 | 95.62 | 93.85 |
-| OpenAI/text-embedding-3-large | 74.48 | 72.41 | 48.21 | 34.88 | **99.33** | 96.55 | **95.47** |
-| pkshatech/GLuCoSE-base-ja-v2 | 73.36 | 69.79 | 67.29 | 41.86 | 90.29 | 95.11 | 75.80 |
-| pkshatech/RoSEtta-base-ja | 73.21 | 65.96 | 65.33 | 36.73 | 95.54 | 94.08 | 81.63 |
-| cl-nagoya/ruri-large | 73.02 | **76.68** | 61.74 | 38.03 | 87.12 | 96.58 | 77.97 |
-| intfloat/multilingual-e5-large | 70.98 | 70.30 | 58.78 | 43.63 | 86.00 | 94.70 | 72.48 |
-| cl-nagoya/ruri-base | 69.82 | 74.56 | 50.12 | 35.45 | 86.89 | 96.57 | 75.31 |
-| cl-nagoya/ruri-small | 69.41 | 73.65 | 48.44 | 33.43 | 87.69 | **97.17** | 76.09 |
-| intfloat/multilingual-e5-base | 68.21 | 65.34 | 50.67 | 38.38 | 87.10 | 94.73 | 73.05 |
-| intfloat/multilingual-e5-small | 67.27 | 64.11 | 49.97 | 36.05 | 85.21 | 95.26 | 72.99 |
-| OpenAI/text-embedding-3-small | 66.39 | 64.02 | 33.94 | 20.03 | 98.47 | 91.70 | 90.17 |
-| OpenAI/text-embedding-ada-002 | 64.38 | 61.02 | 42.56 | 14.51 | 94.99 | 91.23 | 81.98 |
-| pkshatech/GLuCoSE-base-ja | 59.02 | 63.88 | 39.82 | 30.28 | 78.26 | 82.06 | 59.82 |
-| cl-nagoya/sup-simcse-ja-base | 49.64 | 51.62 | 50.25 | 13.98 | 68.08 | 65.71 | 48.22 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 47.38 | 50.14 | 45.84 | 13.00 | 55.09 | 74.97 | 45.24 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 46.36 | 47.39 | 39.57 | 11.44 | 64.16 | 70.23 | 45.37 |
-| oshizo/sbert-jsnli-luke-japanese-base-lite | 43.00 | 51.99 | 42.07 | 10.12 | 49.30 | 71.94 | 32.59 |
-| MU-Kindai/Japanese-MixCSE-BERT-base | 42.59 | 42.37 | 37.72 | 7.88 | 63.70 | 64.13 | 39.73 |
-| MU-Kindai/Japanese-DiffCSE-BERT-base | 41.79 | 42.31 | 36.20 | 7.81 | 60.77 | 64.34 | 39.32 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 41.32 | 44.11 | 39.61 | 8.15 | 62.76 | 58.39 | 34.89 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 40.82 | 47.04 | 44.10 | 11.43 | 43.43 | 62.41 | 36.52 |
-| cl-nagoya/unsup-simcse-ja-large | 40.53 | 45.09 | 34.60 | 5.75 | 55.07 | 63.07 | 39.61 |
-| cl-nagoya/unsup-simcse-ja-base | 40.23 | 46.00 | 39.46 | 5.55 | 60.26 | 55.63 | 34.49 |
-| sentence-transformers/LaBSE | 40.12 | 43.10 | 34.25 | 4.24 | 48.92 | 75.13 | 35.09 |
-| cl-nagoya/sup-simcse-ja-large | 37.62 | 46.84 | 39.88 | 11.83 | 63.43 | 37.93 | 25.79 |
-| pkshatech/simcse-ja-bert-base-clcmlp | 37.00 | 41.50 | 46.00 | 10.19 | 40.14 | 59.63 | 24.53 |
-| sentence-transformers/stsb-xlm-r-multilingual | 21.00 | 25.11 | 21.61 | 2.76 | 28.49 | 36.47 | 11.55 |
-| colorfulscoop/sbert-base-ja | 16.52 | 21.50 | 13.16 | 0.44 | 28.78 | 22.40 | 12.82 |
+| Model | Avg. | jacwir_retrieval
(ndcg@10) | jagovfaqs_22k
(ndcg@10) | jaqket
(ndcg@10) | mintaka_retrieval
(ndcg@10) | miracl_retrieval
(ndcg@10) | mldr_retrieval
(ndcg@10) | mrtydi
(ndcg@10) | nlp_abs_article
(ndcg@10) | nlp_abs_intro
(ndcg@10) | nlp_title_abs
(ndcg@10) | nlp_title_intro
(ndcg@10) |
+|:----------------------------------------------|:---------:|:-------------------------------:|:----------------------------:|:---------------------:|:--------------------------------:|:-------------------------------:|:-----------------------------:|:---------------------:|:------------------------------:|:----------------------------:|:----------------------------:|:------------------------------:|
+| sbintuitions/sarashina-embedding-v2-1b | **76.48** | 85.54 | 74.87 | **73.52** | **66.11** | 68.26 | 40.35 | **49.57** | 96.84 | 96.28 | 98.11 | 91.79 |
+| cl-nagoya/ruri-v3-130m | 76.45 | 84.21 | 75.32 | 73.10 | 51.77 | 71.01 | 45.16 | 47.80 | 99.51 | 98.88 | 97.95 | 96.28 |
+| cl-nagoya/ruri-v3-310m | 76.03 | 84.06 | 76.49 | 71.87 | 52.25 | 67.71 | 43.43 | 47.06 | **99.59** | **99.35** | 97.91 | **96.58** |
+| sbintuitions/sarashina-embedding-v1-1b | 74.53 | 82.43 | 71.76 | 72.92 | 62.60 | 63.23 | 34.59 | 40.75 | 99.20 | 99.16 | 96.85 | 96.29 |
+| cl-nagoya/ruri-v3-70m | 74.23 | 82.76 | 73.27 | 67.68 | 46.26 | 67.98 | 43.55 | 45.00 | 98.50 | 98.68 | 97.07 | 95.73 |
+| pfnet/plamo-embedding-1b | 73.25 | **88.91** | **79.03** | 54.39 | 54.56 | 59.91 | 36.68 | 41.87 | 97.65 | 99.02 | **98.63** | 95.11 |
+| cl-nagoya/ruri-v3-30m | 72.84 | 82.70 | 70.21 | 62.45 | 43.05 | 64.99 | 45.77 | 41.78 | 98.76 | 99.16 | 96.99 | 95.34 |
+| BAAI/bge-m3 | 72.15 | 85.13 | 69.07 | 56.59 | 32.18 | **73.48** | **51.26** | 45.18 | 95.22 | 97.53 | 96.02 | 91.98 |
+| OpenAI/text-embedding-3-large | 71.95 | 82.90 | 72.41 | 48.21 | 63.52 | 60.57 | 45.26 | 34.88 | 92.37 | 99.33 | 96.55 | 95.47 |
+| cl-nagoya/ruri-large-v2 | 71.87 | 80.49 | 78.23 | 65.61 | 50.41 | 70.46 | 36.97 | 46.37 | 90.85 | 91.15 | 97.74 | 82.32 |
+| cl-nagoya/ruri-base-v2 | 68.96 | 81.01 | 75.90 | 57.01 | 44.18 | 68.22 | 37.73 | 40.89 | 88.05 | 89.73 | 96.96 | 78.93 |
+| cl-nagoya/ruri-small-v2 | 68.46 | 83.04 | 74.02 | 62.25 | 35.31 | 66.90 | 32.58 | 42.40 | 90.65 | 90.42 | 97.30 | 78.21 |
+| pkshatech/GLuCoSE-base-ja-v2 | 68.45 | 83.85 | 69.85 | 67.52 | 39.57 | 65.29 | 33.75 | 41.67 | 89.91 | 90.08 | 95.67 | 75.79 |
+| cl-nagoya/ruri-large | 68.30 | 81.69 | 77.64 | 61.73 | 51.06 | 55.47 | 34.77 | 38.12 | 86.53 | 88.91 | 96.17 | 79.22 |
+| intfloat/multilingual-e5-large | 67.65 | 86.41 | 72.98 | 59.67 | 39.59 | 70.96 | 29.85 | 47.82 | 83.26 | 85.71 | 95.29 | 72.57 |
+| pkshatech/RoSEtta-base-ja | 67.52 | 82.02 | 66.28 | 64.28 | 34.04 | 60.16 | 32.37 | 36.77 | 96.04 | 95.41 | 93.17 | 82.19 |
+| google/embeddinggemma-300m | 65.91 | 81.07 | 69.43 | 63.27 | 38.63 | 35.28 | 34.66 | 13.86 | 99.34 | 99.02 | 96.12 | 94.35 |
+| cl-nagoya/ruri-base | 65.90 | 82.48 | 75.50 | 50.23 | 45.37 | 54.88 | 35.42 | 35.59 | 86.65 | 87.23 | 95.27 | 76.25 |
+| intfloat/multilingual-e5-base | 64.48 | 84.32 | 68.72 | 51.69 | 34.68 | 64.50 | 25.73 | 42.30 | 83.56 | 84.48 | 94.62 | 74.70 |
+| cl-nagoya/ruri-small | 63.95 | 82.58 | 74.01 | 48.44 | 37.23 | 52.22 | 28.99 | 33.51 | 86.89 | 87.23 | 96.20 | 76.09 |
+| intfloat/multilingual-e5-small | 63.91 | 85.58 | 65.69 | 51.57 | 31.54 | 63.23 | 25.91 | 42.37 | 83.97 | 84.10 | 94.47 | 74.56 |
+| OpenAI/text-embedding-3-small | 61.79 | 79.58 | 64.02 | 33.94 | 32.44 | 48.45 | 35.07 | 20.03 | 85.83 | 98.47 | 91.70 | 90.17 |
+| hotchpotch/static-embedding-japanese | 60.51 | 72.27 | 55.55 | 64.04 | 38.93 | 32.61 | 42.51 | 11.18 | 76.19 | 95.74 | 90.37 | 86.25 |
+| OpenAI/text-embedding-ada-002 | 59.58 | 78.08 | 61.02 | 42.56 | 27.09 | 34.54 | 31.90 | 14.51 | 97.51 | 94.99 | 91.23 | 81.98 |
+| pkshatech/GLuCoSE-base-ja | 54.58 | 69.30 | 64.14 | 39.78 | 29.81 | 48.27 | 25.07 | 30.14 | 76.78 | 77.21 | 81.40 | 58.43 |
+| cl-nagoya/sup-simcse-ja-base | 45.00 | 53.32 | 52.02 | 50.13 | 32.88 | 20.68 | 24.70 | 14.14 | 69.09 | 66.19 | 64.84 | 46.97 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 42.41 | 53.16 | 51.20 | 45.81 | 30.42 | 26.08 | 23.65 | 13.06 | 54.65 | 52.13 | 74.13 | 42.21 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 39.85 | 48.95 | 48.41 | 38.73 | 25.72 | 21.60 | 18.11 | 11.02 | 58.91 | 60.05 | 69.15 | 37.72 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 38.08 | 59.65 | 54.07 | 40.22 | 24.83 | 17.19 | 19.08 | 10.09 | 44.07 | 44.84 | 73.68 | 31.15 |
+| cl-nagoya/sup-simcse-ja-large | 37.38 | 43.71 | 47.42 | 40.04 | 37.68 | 18.13 | 23.48 | 11.88 | 64.08 | 62.95 | 36.95 | 24.90 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 36.24 | 42.43 | 43.60 | 37.35 | 25.18 | 14.76 | 16.86 | 7.77 | 56.89 | 59.11 | 61.81 | 32.88 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 36.23 | 46.28 | 48.25 | 44.17 | 28.89 | 19.52 | 18.66 | 11.44 | 43.97 | 40.33 | 60.49 | 36.51 |
+| cl-nagoya/unsup-simcse-ja-base | 35.34 | 35.11 | 46.74 | 39.52 | 29.92 | 10.93 | 15.98 | 5.51 | 58.22 | 58.41 | 55.58 | 32.84 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 35.22 | 39.92 | 44.60 | 38.45 | 22.39 | 13.94 | 13.91 | 7.30 | 58.35 | 58.63 | 57.43 | 32.47 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 34.93 | 40.86 | 43.88 | 35.56 | 19.98 | 16.52 | 12.06 | 7.11 | 54.30 | 55.86 | 62.96 | 35.17 |
+| cl-nagoya/unsup-simcse-ja-large | 33.98 | 37.61 | 46.56 | 34.53 | 30.58 | 10.33 | 12.55 | 5.75 | 50.45 | 50.70 | 60.43 | 34.32 |
+| sentence-transformers/LaBSE | 33.18 | 49.12 | 42.43 | 24.92 | 20.02 | 9.36 | 7.53 | 4.22 | 48.06 | 48.20 | 75.59 | 35.53 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 32.80 | 45.03 | 41.00 | 37.01 | 31.30 | 16.07 | 20.08 | 10.15 | 38.13 | 37.60 | 59.18 | 25.26 |
+| sentence-transformers/stsb-xlm-r-multilingual | 16.58 | 21.08 | 22.49 | 6.49 | 22.31 | 2.28 | 6.53 | 2.78 | 24.91 | 25.55 | 35.84 | 12.13 |
+| colorfulscoop/sbert-base-ja | 15.45 | 19.30 | 21.70 | 13.14 | 19.07 | 1.86 | 6.97 | 0.41 | 29.02 | 25.80 | 21.07 | 11.57 |
## STS
-| Model | Avg. | jsick
(spearman) | jsts
(spearman) |
-|:----------------------------------------------|:----------|:----------------------|:---------------------|
-| cl-nagoya/sup-simcse-ja-large | **83.18** | 83.80 | 82.57 |
-| cl-nagoya/ruri-large | 83.13 | 82.00 | 84.26 |
-| pkshatech/GLuCoSE-base-ja-v2 | 82.96 | **84.96** | 80.96 |
-| cl-nagoya/ruri-base | 82.87 | 82.32 | 83.43 |
-| cl-nagoya/ruri-small | 82.79 | 83.44 | 82.13 |
-| sbintuitions/sarashina-embedding-v1-1b | 82.71 | 80.22 | **85.20** |
-| OpenAI/text-embedding-3-large | 82.52 | 81.27 | 83.77 |
-| cl-nagoya/sup-simcse-ja-base | 82.05 | 82.83 | 81.27 |
-| pkshatech/RoSEtta-base-ja | 81.39 | 83.83 | 78.95 |
-| cl-nagoya/unsup-simcse-ja-large | 80.56 | 80.15 | 80.98 |
-| intfloat/multilingual-e5-small | 80.07 | 81.50 | 78.65 |
-| jinaai/jina-embeddings-v3 | 80.05 | 78.16 | 81.93 |
-| intfloat/multilingual-e5-base | 79.84 | 81.28 | 78.39 |
-| intfloat/multilingual-e5-large | 79.70 | 78.40 | 80.99 |
-| OpenAI/text-embedding-3-small | 79.46 | 80.83 | 78.08 |
-| OpenAI/text-embedding-ada-002 | 79.02 | 79.09 | 78.94 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 78.99 | 79.84 | 78.14 |
-| cl-nagoya/unsup-simcse-ja-base | 78.72 | 78.49 | 78.95 |
-| pkshatech/GLuCoSE-base-ja | 78.71 | 74.97 | 82.46 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 78.28 | 78.75 | 77.81 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 77.49 | 78.18 | 76.81 |
-| MU-Kindai/Japanese-MixCSE-BERT-base | 77.05 | 77.57 | 76.53 |
-| pkshatech/simcse-ja-bert-base-clcmlp | 76.80 | 73.08 | 80.52 |
-| oshizo/sbert-jsnli-luke-japanese-base-lite | 76.60 | 72.11 | 81.09 |
-| sentence-transformers/LaBSE | 76.56 | 76.99 | 76.12 |
-| MU-Kindai/Japanese-DiffCSE-BERT-base | 75.50 | 75.42 | 75.58 |
-| sentence-transformers/stsb-xlm-r-multilingual | 75.40 | 72.36 | 78.44 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 74.66 | 74.64 | 74.68 |
-| colorfulscoop/sbert-base-ja | 70.42 | 66.59 | 74.24 |
+| Model | Avg. | jsick
(spearman) | jsts
(spearman) |
+|:----------------------------------------------|:---------:|:---------------------:|:--------------------:|
+| sbintuitions/sarashina-embedding-v2-1b | **84.22** | 82.58 | **85.87** |
+| cl-nagoya/ruri-large-v2 | 83.18 | 82.12 | 84.24 |
+| cl-nagoya/sup-simcse-ja-large | 83.17 | 83.78 | 82.56 |
+| pfnet/plamo-embedding-1b | 83.15 | 81.83 | 84.46 |
+| cl-nagoya/ruri-large | 83.13 | 82.00 | 84.26 |
+| cl-nagoya/ruri-base-v2 | 83.03 | 82.63 | 83.43 |
+| pkshatech/GLuCoSE-base-ja-v2 | 82.95 | **84.95** | 80.96 |
+| cl-nagoya/ruri-small-v2 | 82.91 | 83.88 | 81.93 |
+| cl-nagoya/ruri-base | 82.88 | 82.32 | 83.43 |
+| cl-nagoya/ruri-small | 82.79 | 83.45 | 82.13 |
+| google/embeddinggemma-300m | 82.74 | 81.67 | 83.81 |
+| OpenAI/text-embedding-3-large | 82.52 | 81.27 | 83.77 |
+| cl-nagoya/sup-simcse-ja-base | 82.05 | 82.84 | 81.26 |
+| cl-nagoya/ruri-v3-30m | 81.78 | 81.62 | 81.95 |
+| sbintuitions/sarashina-embedding-v1-1b | 81.71 | 79.79 | 83.63 |
+| cl-nagoya/ruri-v3-310m | 81.59 | 78.86 | 84.31 |
+| pkshatech/RoSEtta-base-ja | 81.39 | 83.83 | 78.95 |
+| cl-nagoya/ruri-v3-130m | 81.05 | 78.86 | 83.24 |
+| cl-nagoya/ruri-v3-70m | 80.96 | 79.10 | 82.82 |
+| intfloat/multilingual-e5-large | 80.86 | 79.85 | 81.86 |
+| cl-nagoya/unsup-simcse-ja-large | 80.56 | 80.14 | 80.98 |
+| intfloat/multilingual-e5-small | 80.46 | 82.00 | 78.92 |
+| intfloat/multilingual-e5-base | 80.46 | 81.26 | 79.65 |
+| hotchpotch/static-embedding-japanese | 80.16 | 82.51 | 77.81 |
+| BAAI/bge-m3 | 79.74 | 79.27 | 80.21 |
+| OpenAI/text-embedding-3-small | 79.46 | 80.83 | 78.08 |
+| OpenAI/text-embedding-ada-002 | 79.02 | 79.09 | 78.94 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 79.00 | 79.86 | 78.14 |
+| cl-nagoya/unsup-simcse-ja-base | 78.74 | 78.53 | 78.94 |
+| pkshatech/GLuCoSE-base-ja | 78.68 | 74.90 | 82.46 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 78.29 | 78.76 | 77.82 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 77.96 | 79.14 | 76.77 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 77.75 | 78.93 | 76.57 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 76.81 | 73.11 | 80.51 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 76.70 | 77.76 | 75.63 |
+| sentence-transformers/LaBSE | 76.56 | 77.01 | 76.12 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 76.56 | 72.04 | 81.08 |
+| sentence-transformers/stsb-xlm-r-multilingual | 75.41 | 72.38 | 78.45 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 74.96 | 75.25 | 74.66 |
+| colorfulscoop/sbert-base-ja | 70.41 | 66.56 | 74.25 |
## Classification
-| Model | Avg. | amazon_counterfactual
(macro_f1) | amazon_review
(macro_f1) | massive_intent
(macro_f1) | massive_scenario
(macro_f1) |
-|:----------------------------------------------|:----------|:--------------------------------------|:------------------------------|:-------------------------------|:---------------------------------|
-| sbintuitions/sarashina-embedding-v1-1b | **78.37** | 79.10 | **61.48** | 82.26 | 90.65 |
-| OpenAI/text-embedding-3-large | 77.58 | 77.90 | 60.44 | 80.91 | **91.08** |
-| cl-nagoya/ruri-large | 77.43 | 80.81 | 56.80 | **82.56** | 89.56 |
-| pkshatech/GLuCoSE-base-ja | 76.82 | **82.44** | 58.07 | 78.85 | 87.94 |
-| oshizo/sbert-jsnli-luke-japanese-base-lite | 76.61 | 79.95 | 57.48 | 80.26 | 88.75 |
-| jinaai/jina-embeddings-v3 | 76.39 | 78.83 | 59.33 | 77.65 | 89.74 |
-| cl-nagoya/ruri-small | 76.22 | 79.92 | 55.61 | 81.49 | 87.88 |
-| cl-nagoya/ruri-base | 75.58 | 76.66 | 55.76 | 81.41 | 88.49 |
-| cl-nagoya/unsup-simcse-ja-large | 74.66 | 76.79 | 55.37 | 79.13 | 87.36 |
-| pkshatech/GLuCoSE-base-ja-v2 | 74.21 | 74.92 | 55.31 | 79.79 | 86.84 |
-| MU-Kindai/Japanese-DiffCSE-BERT-base | 73.77 | 78.10 | 51.56 | 78.79 | 86.63 |
-| cl-nagoya/sup-simcse-ja-large | 73.73 | 73.21 | 54.76 | 79.23 | 87.72 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 73.47 | 77.25 | 53.42 | 76.83 | 86.39 |
-| cl-nagoya/sup-simcse-ja-base | 73.47 | 72.34 | 54.41 | 79.52 | 87.60 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 73.30 | 76.20 | 51.52 | 78.95 | 86.54 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 73.13 | 76.36 | 52.75 | 76.88 | 86.51 |
-| cl-nagoya/unsup-simcse-ja-base | 73.07 | 73.30 | 53.93 | 79.07 | 85.97 |
-| OpenAI/text-embedding-3-small | 73.06 | 70.01 | 55.92 | 77.66 | 88.67 |
-| MU-Kindai/Japanese-MixCSE-BERT-base | 72.90 | 77.62 | 50.86 | 77.19 | 85.93 |
-| intfloat/multilingual-e5-large | 72.89 | 70.66 | 56.54 | 75.78 | 88.59 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 72.76 | 76.20 | 52.06 | 77.89 | 84.90 |
-| sentence-transformers/LaBSE | 72.66 | 73.61 | 51.70 | 76.99 | 88.35 |
-| pkshatech/RoSEtta-base-ja | 72.41 | 70.05 | 52.64 | 79.84 | 87.10 |
-| sentence-transformers/stsb-xlm-r-multilingual | 71.84 | 75.65 | 51.32 | 74.28 | 86.10 |
-| pkshatech/simcse-ja-bert-base-clcmlp | 71.30 | 67.49 | 50.85 | 79.67 | 87.20 |
-| OpenAI/text-embedding-ada-002 | 69.75 | 64.42 | 53.13 | 74.57 | 86.89 |
-| intfloat/multilingual-e5-base | 69.30 | 63.67 | 54.24 | 72.78 | 86.53 |
-| colorfulscoop/sbert-base-ja | 69.07 | 72.21 | 47.95 | 72.52 | 83.62 |
-| intfloat/multilingual-e5-small | 67.62 | 62.14 | 51.27 | 70.85 | 86.22 |
+| Model | Avg. | amazon_counterfactual
(macro_f1) | amazon_review
(macro_f1) | jpn_sentiment
(macro_f1) | massive_intent
(macro_f1) | massive_scenario
(macro_f1) | sib200_jpn_cls
(macro_f1) | wrime_classification
(macro_f1) |
+|:----------------------------------------------|:---------:|:-------------------------------------:|:-----------------------------:|:-----------------------------:|:------------------------------:|:--------------------------------:|:------------------------------:|:------------------------------------:|
+| cl-nagoya/ruri-v3-310m | **77.65** | 80.09 | 60.72 | 95.31 | 81.76 | 89.01 | 88.13 | 48.53 |
+| pfnet/plamo-embedding-1b | 77.29 | 77.59 | 59.48 | 91.73 | 82.79 | 89.95 | **90.31** | 49.20 |
+| OpenAI/text-embedding-3-large | 77.27 | 77.90 | 60.44 | **96.89** | 80.91 | **91.08** | 87.85 | 45.84 |
+| sbintuitions/sarashina-embedding-v1-1b | 77.20 | 79.66 | **62.02** | 95.03 | 81.21 | 90.16 | 82.63 | 49.70 |
+| sbintuitions/sarashina-embedding-v2-1b | 77.14 | 79.81 | 61.39 | 93.51 | **83.69** | 90.23 | 81.48 | **49.87** |
+| cl-nagoya/ruri-large | 76.25 | 79.50 | 56.85 | 93.56 | 82.10 | 90.03 | 85.26 | 46.45 |
+| google/embeddinggemma-300m | 76.14 | 74.74 | 58.04 | 95.99 | 80.07 | 90.58 | 86.92 | 46.62 |
+| cl-nagoya/ruri-large-v2 | 76.10 | 79.51 | 57.09 | 93.57 | 80.87 | 89.71 | 84.72 | 47.23 |
+| cl-nagoya/ruri-v3-130m | 75.65 | 76.75 | 59.56 | 95.00 | 80.79 | 87.90 | 82.88 | 46.63 |
+| cl-nagoya/ruri-base-v2 | 75.59 | 75.97 | 55.55 | 92.36 | 80.93 | 88.87 | 89.26 | 46.17 |
+| cl-nagoya/ruri-base | 75.34 | 76.66 | 56.02 | 91.69 | 81.22 | 88.61 | 87.73 | 45.47 |
+| pkshatech/GLuCoSE-base-ja | 75.02 | **82.03** | 57.93 | 92.89 | 78.52 | 87.71 | 77.24 | 48.82 |
+| cl-nagoya/ruri-small | 74.83 | 80.55 | 55.41 | 88.86 | 81.08 | 88.00 | 83.97 | 45.95 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 74.53 | 79.72 | 58.02 | 91.99 | 80.16 | 88.78 | 77.31 | 45.73 |
+| cl-nagoya/ruri-v3-70m | 74.45 | 81.81 | 57.98 | 93.39 | 78.92 | 87.83 | 76.87 | 44.38 |
+| cl-nagoya/ruri-small-v2 | 74.12 | 77.67 | 55.60 | 88.64 | 82.00 | 88.16 | 81.57 | 45.23 |
+| BAAI/bge-m3 | 74.10 | 71.86 | 56.65 | 94.41 | 78.68 | 89.70 | 84.25 | 43.17 |
+| cl-nagoya/unsup-simcse-ja-large | 73.71 | 76.40 | 55.05 | 90.57 | 79.25 | 87.50 | 82.89 | 44.33 |
+| pkshatech/GLuCoSE-base-ja-v2 | 73.52 | 75.28 | 55.19 | 89.24 | 78.73 | 87.14 | 85.83 | 43.23 |
+| cl-nagoya/ruri-v3-30m | 73.35 | 75.60 | 55.71 | 92.63 | 78.31 | 86.72 | 81.40 | 43.11 |
+| cl-nagoya/sup-simcse-ja-large | 72.74 | 72.61 | 54.56 | 89.42 | 79.23 | 87.71 | 80.43 | 45.26 |
+| cl-nagoya/sup-simcse-ja-base | 72.72 | 71.93 | 54.54 | 91.01 | 80.11 | 87.63 | 81.92 | 41.88 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 72.59 | 77.67 | 53.48 | 89.28 | 76.79 | 86.24 | 83.77 | 40.89 |
+| OpenAI/text-embedding-3-small | 72.43 | 70.01 | 55.92 | 89.97 | 77.66 | 88.67 | 84.72 | 40.05 |
+| cl-nagoya/unsup-simcse-ja-base | 72.41 | 73.65 | 54.14 | 89.87 | 77.68 | 86.10 | 84.13 | 41.31 |
+| intfloat/multilingual-e5-large | 72.30 | 69.70 | 57.64 | 95.55 | 74.01 | 88.71 | 78.11 | 42.38 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 72.06 | 77.70 | 51.46 | 88.45 | 78.72 | 86.40 | 83.50 | 38.15 |
+| sentence-transformers/LaBSE | 71.85 | 74.74 | 51.63 | 89.52 | 77.09 | 88.39 | 81.47 | 40.11 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 71.83 | 76.55 | 52.73 | 88.22 | 77.22 | 86.25 | 81.45 | 40.38 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 71.81 | 77.79 | 51.11 | 87.82 | 77.97 | 86.34 | 85.06 | 36.56 |
+| pkshatech/RoSEtta-base-ja | 71.70 | 70.21 | 52.62 | 87.28 | 79.59 | 86.96 | 84.01 | 41.24 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 71.48 | 74.30 | 51.97 | 89.69 | 77.83 | 84.60 | 83.82 | 38.15 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 71.46 | 76.40 | 51.65 | 87.86 | 78.15 | 86.44 | 81.80 | 37.93 |
+| sentence-transformers/stsb-xlm-r-multilingual | 71.40 | 75.14 | 51.67 | 87.15 | 74.34 | 86.07 | 83.73 | 41.68 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 70.67 | 68.28 | 51.75 | 88.21 | 79.65 | 87.23 | 81.18 | 38.39 |
+| intfloat/multilingual-e5-base | 69.70 | 64.29 | 54.17 | 92.32 | 73.19 | 86.78 | 78.50 | 38.65 |
+| OpenAI/text-embedding-ada-002 | 69.39 | 64.42 | 53.13 | 88.76 | 74.57 | 86.89 | 80.39 | 37.57 |
+| colorfulscoop/sbert-base-ja | 68.05 | 70.80 | 47.80 | 83.50 | 72.89 | 83.71 | 82.63 | 35.06 |
+| intfloat/multilingual-e5-small | 67.77 | 58.66 | 51.21 | 87.73 | 71.34 | 86.77 | 81.78 | 36.91 |
+| hotchpotch/static-embedding-japanese | 66.73 | 68.06 | 46.81 | 79.82 | 74.79 | 82.18 | 83.33 | 32.12 |
## Reranking
-| Model | Avg. | esci
(ndcg@10) |
-|:----------------------------------------------|:----------|:--------------------|
-| sbintuitions/sarashina-embedding-v1-1b | **93.74** | **93.74** |
-| OpenAI/text-embedding-3-large | 93.58 | 93.58 |
-| OpenAI/text-embedding-ada-002 | 93.04 | 93.04 |
-| intfloat/multilingual-e5-small | 93.03 | 93.03 |
-| pkshatech/GLuCoSE-base-ja-v2 | 93.01 | 93.01 |
-| cl-nagoya/ruri-small | 93.00 | 93.00 |
-| cl-nagoya/ruri-large | 92.99 | 92.99 |
-| intfloat/multilingual-e5-large | 92.96 | 92.96 |
-| OpenAI/text-embedding-3-small | 92.92 | 92.92 |
-| cl-nagoya/ruri-base | 92.91 | 92.91 |
-| intfloat/multilingual-e5-base | 92.85 | 92.85 |
-| jinaai/jina-embeddings-v3 | 92.71 | 92.71 |
-| pkshatech/RoSEtta-base-ja | 92.69 | 92.69 |
-| pkshatech/GLuCoSE-base-ja | 91.90 | 91.90 |
-| cl-nagoya/sup-simcse-ja-base | 91.83 | 91.83 |
-| sentence-transformers/LaBSE | 91.63 | 91.63 |
-| oshizo/sbert-jsnli-luke-japanese-base-lite | 91.56 | 91.56 |
-| pkshatech/simcse-ja-bert-base-clcmlp | 91.49 | 91.49 |
-| cl-nagoya/sup-simcse-ja-large | 91.48 | 91.48 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 91.30 | 91.30 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 91.16 | 91.16 |
-| cl-nagoya/unsup-simcse-ja-base | 91.16 | 91.16 |
-| MU-Kindai/Japanese-MixCSE-BERT-base | 91.01 | 91.01 |
-| cl-nagoya/unsup-simcse-ja-large | 90.95 | 90.95 |
-| MU-Kindai/Japanese-DiffCSE-BERT-base | 90.95 | 90.95 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 90.95 | 90.95 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 90.66 | 90.66 |
-| sentence-transformers/stsb-xlm-r-multilingual | 90.20 | 90.20 |
-| colorfulscoop/sbert-base-ja | 89.97 | 89.97 |
+| Model | Avg. | esci
(ndcg@10) | jacwir_reranking
(ndcg@10) | jqara
(ndcg@10) | miracl_reranking
(ndcg@10) | mldr_reranking
(ndcg@10) |
+|:----------------------------------------------|:---------:|:-------------------:|:-------------------------------:|:--------------------:|:-------------------------------:|:-----------------------------:|
+| sbintuitions/sarashina-embedding-v2-1b | **86.28** | 93.58 | 88.79 | **70.55** | 85.93 | 92.53 |
+| cl-nagoya/ruri-v3-310m | 85.84 | 93.43 | 88.46 | 68.93 | 85.01 | 93.36 |
+| cl-nagoya/ruri-v3-130m | 85.71 | 93.37 | 88.65 | 66.30 | **86.59** | 93.62 |
+| pfnet/plamo-embedding-1b | 85.05 | 93.59 | **91.74** | 66.15 | 81.91 | 91.87 |
+| sbintuitions/sarashina-embedding-v1-1b | 84.36 | **93.60** | 86.85 | 65.92 | 85.17 | 90.24 |
+| cl-nagoya/ruri-v3-70m | 84.21 | 93.20 | 87.48 | 63.09 | 85.03 | 92.26 |
+| BAAI/bge-m3 | 84.10 | 93.27 | 89.55 | 53.92 | 85.96 | **97.78** |
+| cl-nagoya/ruri-large-v2 | 83.89 | 93.21 | 85.29 | 64.47 | 85.78 | 90.68 |
+| OpenAI/text-embedding-3-large | 83.06 | 93.58 | 86.78 | 56.89 | 83.80 | 94.24 |
+| intfloat/multilingual-e5-large | 83.01 | 93.31 | 90.37 | 56.14 | 86.31 | 88.91 |
+| cl-nagoya/ruri-v3-30m | 82.93 | 93.06 | 87.61 | 57.47 | 83.52 | 92.97 |
+| pkshatech/GLuCoSE-base-ja-v2 | 82.63 | 93.02 | 88.27 | 60.70 | 82.44 | 88.71 |
+| cl-nagoya/ruri-base-v2 | 82.46 | 93.17 | 85.76 | 60.66 | 84.26 | 88.47 |
+| cl-nagoya/ruri-small-v2 | 82.30 | 93.20 | 88.18 | 56.70 | 83.33 | 90.09 |
+| cl-nagoya/ruri-large | 81.26 | 92.99 | 86.61 | 59.59 | 80.23 | 86.91 |
+| pkshatech/RoSEtta-base-ja | 81.25 | 92.68 | 86.83 | 57.92 | 80.38 | 88.45 |
+| google/embeddinggemma-300m | 80.93 | 93.26 | 86.72 | 52.09 | 82.38 | 90.19 |
+| cl-nagoya/ruri-base | 80.31 | 92.92 | 87.24 | 54.15 | 79.22 | 88.01 |
+| intfloat/multilingual-e5-small | 80.09 | 92.98 | 89.99 | 49.28 | 81.78 | 86.41 |
+| cl-nagoya/ruri-small | 79.98 | 93.01 | 87.67 | 53.26 | 77.84 | 88.14 |
+| intfloat/multilingual-e5-base | 79.46 | 92.90 | 88.65 | 47.61 | 81.97 | 86.15 |
+| OpenAI/text-embedding-3-small | 77.29 | 92.92 | 84.72 | 38.58 | 77.61 | 92.61 |
+| hotchpotch/static-embedding-japanese | 77.09 | 91.87 | 80.96 | 47.06 | 72.01 | 93.55 |
+| OpenAI/text-embedding-ada-002 | 75.63 | 93.04 | 83.91 | 37.54 | 72.83 | 90.83 |
+| pkshatech/GLuCoSE-base-ja | 72.37 | 91.82 | 74.54 | 30.24 | 77.82 | 87.42 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 71.88 | 91.30 | 65.14 | 44.96 | 71.21 | 86.79 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 70.59 | 90.93 | 61.45 | 42.47 | 70.65 | 87.42 |
+| cl-nagoya/sup-simcse-ja-base | 70.36 | 91.84 | 64.27 | 37.48 | 70.88 | 87.34 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 69.92 | 91.17 | 65.41 | 38.39 | 70.19 | 84.42 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 69.81 | 91.51 | 67.45 | 36.04 | 68.68 | 85.38 |
+| cl-nagoya/sup-simcse-ja-large | 68.76 | 91.50 | 56.15 | 38.30 | 71.26 | 86.60 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 68.58 | 90.92 | 60.51 | 36.84 | 69.31 | 85.31 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 68.15 | 90.67 | 58.16 | 36.66 | 69.09 | 86.15 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 68.02 | 91.27 | 57.45 | 31.74 | 72.12 | 87.50 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 67.73 | 90.95 | 59.81 | 37.20 | 67.90 | 82.81 |
+| cl-nagoya/unsup-simcse-ja-large | 67.39 | 90.95 | 54.17 | 38.78 | 70.02 | 83.04 |
+| sentence-transformers/LaBSE | 67.01 | 91.47 | 67.85 | 24.62 | 69.28 | 81.84 |
+| cl-nagoya/unsup-simcse-ja-base | 66.20 | 91.18 | 51.54 | 32.19 | 69.96 | 86.12 |
+| colorfulscoop/sbert-base-ja | 59.38 | 89.97 | 37.15 | 22.21 | 65.03 | 82.55 |
+| sentence-transformers/stsb-xlm-r-multilingual | 57.93 | 89.72 | 39.21 | 18.51 | 65.36 | 76.88 |
## Clustering
-| Model | Avg. | livedoor_news
(v_measure_score) | mewsc16
(v_measure_score) |
-|:----------------------------------------------|:----------|:-------------------------------------|:-------------------------------|
-| cl-nagoya/ruri-base | **54.16** | 54.27 | **54.04** |
-| sbintuitions/sarashina-embedding-v1-1b | 53.86 | 56.42 | 51.29 |
-| OpenAI/text-embedding-3-large | 53.32 | 57.09 | 49.55 |
-| pkshatech/RoSEtta-base-ja | 53.23 | **58.62** | 47.85 |
-| cl-nagoya/ruri-large | 51.82 | 51.39 | 52.25 |
-| cl-nagoya/sup-simcse-ja-base | 51.79 | 52.67 | 50.91 |
-| jinaai/jina-embeddings-v3 | 51.46 | 54.72 | 48.19 |
-| intfloat/multilingual-e5-large | 51.24 | 57.13 | 45.34 |
-| cl-nagoya/ruri-small | 51.19 | 50.96 | 51.41 |
-| OpenAI/text-embedding-3-small | 51.06 | 54.57 | 47.55 |
-| cl-nagoya/sup-simcse-ja-large | 50.56 | 50.75 | 50.38 |
-| oshizo/sbert-jsnli-luke-japanese-base-lite | 50.33 | 46.77 | 53.89 |
-| pkshatech/GLuCoSE-base-ja | 49.78 | 49.89 | 49.68 |
-| pkshatech/GLuCoSE-base-ja-v2 | 48.65 | 51.52 | 45.78 |
-| cl-nagoya/unsup-simcse-ja-large | 48.41 | 50.90 | 45.92 |
-| OpenAI/text-embedding-ada-002 | 48.30 | 49.67 | 46.92 |
-| intfloat/multilingual-e5-base | 48.26 | 55.03 | 41.49 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 48.25 | 53.20 | 43.31 |
-| pkshatech/simcse-ja-bert-base-clcmlp | 47.53 | 44.77 | 50.30 |
-| intfloat/multilingual-e5-small | 46.91 | 54.70 | 39.12 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 46.68 | 53.02 | 40.35 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 45.81 | 48.45 | 43.17 |
-| MU-Kindai/Japanese-MixCSE-BERT-base | 44.95 | 52.62 | 37.28 |
-| sentence-transformers/LaBSE | 44.88 | 48.29 | 41.47 |
-| colorfulscoop/sbert-base-ja | 44.81 | 42.99 | 46.64 |
-| cl-nagoya/unsup-simcse-ja-base | 44.77 | 52.23 | 37.31 |
-| MU-Kindai/Japanese-DiffCSE-BERT-base | 44.22 | 49.67 | 38.77 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 43.11 | 41.04 | 45.18 |
-| sentence-transformers/stsb-xlm-r-multilingual | 27.46 | 24.49 | 30.43 |
-
-## PairClassification
-| Model | Avg. | paws_x_ja
(binary_f1) |
-|:----------------------------------------------|:----------|:---------------------------|
-| pkshatech/GLuCoSE-base-ja | **66.39** | **66.39** |
-| cl-nagoya/sup-simcse-ja-base | 62.57 | 62.57 |
-| cl-nagoya/sup-simcse-ja-large | 62.51 | 62.51 |
-| cl-nagoya/unsup-simcse-ja-large | 62.49 | 62.49 |
-| cl-nagoya/unsup-simcse-ja-base | 62.44 | 62.44 |
-| pkshatech/simcse-ja-bert-base-clcmlp | 62.40 | 62.40 |
-| OpenAI/text-embedding-ada-002 | 62.40 | 62.40 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 62.38 | 62.38 |
-| cl-nagoya/ruri-base | 62.38 | 62.38 |
-| oshizo/sbert-jsnli-luke-japanese-base-lite | 62.38 | 62.38 |
-| MU-Kindai/Japanese-DiffCSE-BERT-base | 62.38 | 62.38 |
-| jinaai/jina-embeddings-v3 | 62.37 | 62.37 |
-| pkshatech/GLuCoSE-base-ja-v2 | 62.37 | 62.37 |
-| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 62.37 | 62.37 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 62.35 | 62.35 |
-| OpenAI/text-embedding-3-large | 62.35 | 62.35 |
-| MU-Kindai/Japanese-MixCSE-BERT-base | 62.33 | 62.33 |
-| sentence-transformers/LaBSE | 62.33 | 62.33 |
-| colorfulscoop/sbert-base-ja | 62.31 | 62.31 |
-| cl-nagoya/ruri-large | 62.29 | 62.29 |
-| OpenAI/text-embedding-3-small | 62.27 | 62.27 |
-| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 62.27 | 62.27 |
-| intfloat/multilingual-e5-base | 62.26 | 62.26 |
-| sentence-transformers/stsb-xlm-r-multilingual | 62.20 | 62.20 |
-| intfloat/multilingual-e5-small | 62.19 | 62.19 |
-| intfloat/multilingual-e5-large | 62.15 | 62.15 |
-| cl-nagoya/ruri-small | 62.11 | 62.11 |
-| sbintuitions/sarashina-embedding-v1-1b | 62.00 | 62.00 |
-| pkshatech/RoSEtta-base-ja | 61.74 | 61.74 |
+| Model | Avg. | livedoor_news
(v_measure_score) | mewsc16
(v_measure_score) | sib200_jpn_clust
(v_measure_score) |
+|:----------------------------------------------|:---------:|:------------------------------------:|:------------------------------:|:---------------------------------------:|
+| cl-nagoya/sup-simcse-ja-base | **52.57** | 55.11 | **53.39** | 49.21 |
+| sbintuitions/sarashina-embedding-v2-1b | 52.56 | 57.41 | 51.67 | 48.59 |
+| pfnet/plamo-embedding-1b | 52.50 | **61.74** | 48.03 | 47.73 |
+| OpenAI/text-embedding-3-large | 51.82 | 57.09 | 49.55 | 48.83 |
+| cl-nagoya/ruri-v3-130m | 51.13 | 54.36 | 48.84 | 50.20 |
+| cl-nagoya/ruri-large-v2 | 50.88 | 55.62 | 50.97 | 46.06 |
+| intfloat/multilingual-e5-large | 50.58 | 51.58 | 46.81 | **53.35** |
+| cl-nagoya/ruri-v3-310m | 50.52 | 58.56 | 48.60 | 44.41 |
+| sbintuitions/sarashina-embedding-v1-1b | 50.30 | 56.03 | 50.69 | 44.19 |
+| cl-nagoya/sup-simcse-ja-large | 50.12 | 53.38 | 51.26 | 45.74 |
+| intfloat/multilingual-e5-base | 50.12 | 53.79 | 49.44 | 47.13 |
+| cl-nagoya/ruri-small-v2 | 49.97 | 52.61 | 49.47 | 47.82 |
+| cl-nagoya/ruri-v3-70m | 49.95 | 54.92 | 47.74 | 47.20 |
+| cl-nagoya/ruri-large | 49.93 | 54.44 | 50.59 | 44.76 |
+| cl-nagoya/ruri-v3-30m | 49.90 | 53.69 | 47.96 | 48.04 |
+| cl-nagoya/ruri-small | 49.59 | 52.90 | 49.37 | 46.51 |
+| google/embeddinggemma-300m | 49.48 | 55.33 | 50.55 | 42.55 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 49.45 | 49.11 | 47.02 | 52.21 |
+| intfloat/multilingual-e5-small | 49.29 | 51.94 | 52.34 | 43.59 |
+| cl-nagoya/ruri-base | 49.10 | 56.69 | 52.05 | 38.55 |
+| OpenAI/text-embedding-3-small | 48.91 | 54.57 | 47.55 | 44.59 |
+| OpenAI/text-embedding-ada-002 | 48.78 | 49.67 | 46.92 | 49.74 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 48.75 | 51.70 | 51.52 | 43.03 |
+| pkshatech/GLuCoSE-base-ja-v2 | 48.19 | 54.46 | 46.12 | 43.98 |
+| pkshatech/GLuCoSE-base-ja | 47.12 | 50.41 | 49.52 | 41.43 |
+| cl-nagoya/ruri-base-v2 | 46.84 | 54.38 | 50.61 | 35.53 |
+| BAAI/bge-m3 | 45.56 | 54.76 | 42.00 | 39.91 |
+| pkshatech/RoSEtta-base-ja | 44.88 | 48.89 | 45.16 | 40.61 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 44.54 | 51.30 | 46.27 | 36.04 |
+| cl-nagoya/unsup-simcse-ja-large | 43.52 | 51.48 | 44.44 | 34.65 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 43.45 | 48.56 | 43.20 | 38.60 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 42.86 | 45.84 | 44.08 | 38.67 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 42.02 | 51.59 | 42.68 | 31.78 |
+| cl-nagoya/unsup-simcse-ja-base | 41.29 | 50.65 | 39.58 | 33.63 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 39.93 | 46.01 | 39.22 | 34.56 |
+| sentence-transformers/LaBSE | 39.82 | 49.08 | 41.78 | 28.59 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 39.27 | 48.79 | 42.61 | 26.42 |
+| colorfulscoop/sbert-base-ja | 39.04 | 40.60 | 46.18 | 30.36 |
+| hotchpotch/static-embedding-japanese | 35.91 | 51.44 | 34.81 | 21.47 |
+| sentence-transformers/stsb-xlm-r-multilingual | 27.67 | 26.62 | 32.05 | 24.34 |
diff --git a/make_leaderboard.py b/make_leaderboard.py
index 0e43ccf..5d472eb 100644
--- a/make_leaderboard.py
+++ b/make_leaderboard.py
@@ -9,14 +9,44 @@
"amazon_review_classification": "amazon_review",
"massive_intent_classification": "massive_intent",
"massive_scenario_classification": "massive_scenario",
+ "japanese_sentiment_classification": "jpn_sentiment",
+ "sib200_japanese_classification": "sib200_jpn_cls",
+ "sib200_japanese_clustering": "sib200_jpn_clust",
+ "nlp_journal_abs_article": "nlp_abs_article",
+ "nlp_journal_abs_intro": "nlp_abs_intro",
+ "nlp_journal_title_abs": "nlp_title_abs",
+ "nlp_journal_title_intro": "nlp_title_intro",
}
-TASK_ORDER = ["Retrieval", "STS", "Classification", "Reranking", "Clustering", "PairClassification"]
+TASK_ORDER = ["Retrieval", "STS", "Classification", "Reranking", "Clustering"]
SUMMARY_KEY = "Summary"
"""
Collects the results from the results folder.
"""
+# Load reference structure from sbintuitions/sarashina-embedding-v1-1b/summary.json
+reference_file = Path("docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json")
+with open(reference_file) as f:
+ reference_structure = json.load(f)
+
+# Extract the expected structure
+expected_structure = {}
+for task_name, task_results in reference_structure.items():
+ expected_structure[task_name] = set(task_results.keys())
+
+
+def has_same_structure(summary: dict, expected: dict) -> bool:
+ """Check if summary has exactly the same structure as expected."""
+ if set(summary.keys()) != set(expected.keys()):
+ return False
+
+ for task_name, datasets in expected.items():
+ if set(summary[task_name].keys()) != datasets:
+ return False
+
+ return True
+
+
# {task_name: {model_signature: {(dataset_name, metric_name): score}}}
all_results: dict[str, dict[str, dict[str, float]]] = defaultdict(lambda: defaultdict(dict))
for summary_file in Path("docs/results").rglob("summary.json"):
@@ -26,6 +56,13 @@
with open(summary_file) as f:
summary = json.load(f)
+ # Skip models that don't have the same structure as reference
+ if not has_same_structure(summary, expected_structure):
+ org_name = summary_file.parent.parent.name
+ model_name = summary_file.parent.name
+ print(f"Skipping {org_name}/{model_name}: different structure")
+ continue
+
org_name = summary_file.parent.parent.name
model_name = summary_file.parent.name
model_signature = f"{org_name}/{model_name}"
@@ -56,17 +93,24 @@ def format_score(score: float) -> str:
# format to markdown table
dataset_keys = list(task_results[next(iter(task_results))].keys())
if task_name == SUMMARY_KEY:
- dataset_keys = TASK_ORDER
+ # Only include existing tasks in the summary
+ dataset_keys = [task for task in TASK_ORDER if task in all_results]
header = ["Model", AVG_COLUMN_NAME, *dataset_keys]
table_list: list[list[str | float]] = []
for model_signature, dataset_scores in task_results.items():
+ # Skip models that don't have all required datasets
+ if not all(k in dataset_scores for k in dataset_keys):
+ continue
+
model_scores = [dataset_scores[k] for k in dataset_keys]
if task_name == SUMMARY_KEY:
scores_by_dataset = []
for _task_name, _task_results in all_results.items():
- if _task_name != SUMMARY_KEY:
+ if _task_name != SUMMARY_KEY and model_signature in _task_results:
scores_by_dataset.extend(list(_task_results[model_signature].values()))
+ if not scores_by_dataset: # Skip if no scores available
+ continue
average_score = sum(scores_by_dataset) / len(scores_by_dataset)
else:
average_score = sum(model_scores) / len(model_scores)
@@ -88,7 +132,9 @@ def format_score(score: float) -> str:
# add header
table_list.insert(0, ["Model", AVG_COLUMN_NAME, *dataset_keys])
- markdown_table = tabulate(table_list, headers="firstrow", tablefmt="pipe")
+ # Set alignment: left for model names, center for all numeric columns
+ col_alignment = ["left"] + ["center"] * (len(dataset_keys) + 1)
+ markdown_table = tabulate(table_list, headers="firstrow", tablefmt="pipe", colalign=col_alignment)
markdown_tables[task_name] = markdown_table
"""
@@ -100,6 +146,8 @@ def format_score(score: float) -> str:
"This leaderboard shows the results stored under `docs/results`. The scores are all multiplied by 100.\n\n"
)
for task_name in [SUMMARY_KEY, *TASK_ORDER]:
+ if task_name not in markdown_tables:
+ continue
markdown_table = markdown_tables[task_name]
f.write(f"## {task_name}\n")
diff --git a/poetry.lock b/poetry.lock
index 40fbe9f..5766c9f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
[[package]]
name = "accelerate"
@@ -6,6 +6,7 @@ version = "0.31.0"
description = "Accelerate"
optional = false
python-versions = ">=3.8.0"
+groups = ["main"]
files = [
{file = "accelerate-0.31.0-py3-none-any.whl", hash = "sha256:0fc608dc49584f64d04711a39711d73cb0ad4ef3d21cddee7ef2216e29471144"},
{file = "accelerate-0.31.0.tar.gz", hash = "sha256:b5199865b26106ccf9205acacbe8e4b3b428ad585e7c472d6a46f6fb75b6c176"},
@@ -37,6 +38,7 @@ version = "3.9.5"
description = "Async http client/server framework (asyncio)"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fcde4c397f673fdec23e6b05ebf8d4751314fa7c24f93334bf1f1364c1c69ac7"},
{file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d6b3f1fabe465e819aed2c421a6743d8debbde79b6a8600739300630a01bf2c"},
@@ -125,7 +127,7 @@ multidict = ">=4.5,<7.0"
yarl = ">=1.0,<2.0"
[package.extras]
-speedups = ["Brotli", "aiodns", "brotlicffi"]
+speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
[[package]]
name = "aiosignal"
@@ -133,6 +135,7 @@ version = "1.3.1"
description = "aiosignal: a list of registered asynchronous callbacks"
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"},
{file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"},
@@ -147,6 +150,7 @@ version = "0.7.0"
description = "Reusable constraint types to use with typing.Annotated"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
{file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
@@ -158,6 +162,7 @@ version = "4.4.0"
description = "High level compatibility layer for multiple asynchronous event loop implementations"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"},
{file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"},
@@ -171,7 +176,7 @@ typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
[package.extras]
doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
+test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\""]
trio = ["trio (>=0.23)"]
[[package]]
@@ -180,6 +185,8 @@ version = "4.0.3"
description = "Timeout context manager for asyncio programs"
optional = false
python-versions = ">=3.7"
+groups = ["main"]
+markers = "python_version < \"3.11\""
files = [
{file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
{file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
@@ -191,6 +198,7 @@ version = "23.2.0"
description = "Classes Without Boilerplate"
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"},
{file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"},
@@ -201,8 +209,8 @@ cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
dev = ["attrs[tests]", "pre-commit"]
docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
tests = ["attrs[tests-no-zope]", "zope-interface"]
-tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"]
-tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"]
+tests-mypy = ["mypy (>=1.6) ; platform_python_implementation == \"CPython\" and python_version >= \"3.8\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.8\""]
+tests-no-zope = ["attrs[tests-mypy]", "cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"]
[[package]]
name = "black"
@@ -210,6 +218,7 @@ version = "23.12.1"
description = "The uncompromising code formatter."
optional = false
python-versions = ">=3.8"
+groups = ["dev"]
files = [
{file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"},
{file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"},
@@ -246,7 +255,7 @@ typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
[package.extras]
colorama = ["colorama (>=0.4.3)"]
-d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
+d = ["aiohttp (>=3.7.4) ; sys_platform != \"win32\" or implementation_name != \"pypy\"", "aiohttp (>=3.7.4,!=3.9.0) ; sys_platform == \"win32\" and implementation_name == \"pypy\""]
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
uvloop = ["uvloop (>=0.15.2)"]
@@ -256,6 +265,7 @@ version = "2024.6.2"
description = "Python package for providing Mozilla's CA Bundle."
optional = false
python-versions = ">=3.6"
+groups = ["main"]
files = [
{file = "certifi-2024.6.2-py3-none-any.whl", hash = "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56"},
{file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"},
@@ -267,6 +277,7 @@ version = "3.3.2"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
optional = false
python-versions = ">=3.7.0"
+groups = ["main"]
files = [
{file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"},
{file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"},
@@ -366,6 +377,7 @@ version = "8.1.7"
description = "Composable command line interface toolkit"
optional = false
python-versions = ">=3.7"
+groups = ["dev"]
files = [
{file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
{file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
@@ -380,10 +392,12 @@ version = "0.4.6"
description = "Cross-platform colored terminal text."
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["main", "dev"]
files = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
+markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", dev = "platform_system == \"Windows\""}
[[package]]
name = "datasets"
@@ -391,6 +405,7 @@ version = "2.19.2"
description = "HuggingFace community-driven open-source library of datasets"
optional = false
python-versions = ">=3.8.0"
+groups = ["main"]
files = [
{file = "datasets-2.19.2-py3-none-any.whl", hash = "sha256:e07ff15d75b1af75c87dd96323ba2a361128d495136652f37fd62f918d17bb4e"},
{file = "datasets-2.19.2.tar.gz", hash = "sha256:eccb82fb3bb5ee26ccc6d7a15b7f1f834e2cc4e59b7cff7733a003552bad51ef"},
@@ -417,7 +432,7 @@ xxhash = "*"
apache-beam = ["apache-beam (>=2.26.0)"]
audio = ["librosa", "soundfile (>=0.12.1)"]
benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
-dev = ["Pillow (>=9.4.0)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
+dev = ["Pillow (>=9.4.0)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"]
jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"]
@@ -425,7 +440,7 @@ quality = ["ruff (>=0.3.0)"]
s3 = ["s3fs"]
tensorflow = ["tensorflow (>=2.6.0)"]
tensorflow-gpu = ["tensorflow (>=2.6.0)"]
-tests = ["Pillow (>=9.4.0)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
+tests = ["Pillow (>=9.4.0)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
torch = ["torch"]
vision = ["Pillow (>=9.4.0)"]
@@ -435,6 +450,7 @@ version = "0.3.8"
description = "serialize all of Python"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"},
{file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"},
@@ -450,6 +466,7 @@ version = "1.9.0"
description = "Distro - an OS platform information API"
optional = false
python-versions = ">=3.6"
+groups = ["main"]
files = [
{file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
{file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
@@ -461,6 +478,7 @@ version = "0.1.3"
description = "Like `typing._eval_type`, but lets older Python versions use newer typing features."
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "eval_type_backport-0.1.3-py3-none-any.whl", hash = "sha256:519d2a993b3da286df9f90e17f503f66435106ad870cf26620c5720e2158ddf2"},
{file = "eval_type_backport-0.1.3.tar.gz", hash = "sha256:d83ee225331dfa009493cec1f3608a71550b515ee4749abe78da14e3c5e314f5"},
@@ -475,6 +493,8 @@ version = "1.2.1"
description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
+groups = ["main"]
+markers = "python_version < \"3.11\""
files = [
{file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"},
{file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"},
@@ -489,6 +509,7 @@ version = "3.14.0"
description = "A platform independent file lock."
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "filelock-3.14.0-py3-none-any.whl", hash = "sha256:43339835842f110ca7ae60f1e1c160714c5a6afd15a2873419ab185334975c0f"},
{file = "filelock-3.14.0.tar.gz", hash = "sha256:6ea72da3be9b8c82afd3edcf99f2fffbb5076335a5ae4d03248bb5b6c3eae78a"},
@@ -497,7 +518,7 @@ files = [
[package.extras]
docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"]
-typing = ["typing-extensions (>=4.8)"]
+typing = ["typing-extensions (>=4.8) ; python_version < \"3.11\""]
[[package]]
name = "flake8"
@@ -505,6 +526,7 @@ version = "7.0.0"
description = "the modular source code checker: pep8 pyflakes and co"
optional = false
python-versions = ">=3.8.1"
+groups = ["dev"]
files = [
{file = "flake8-7.0.0-py2.py3-none-any.whl", hash = "sha256:a6dfbb75e03252917f2473ea9653f7cd799c3064e54d4c8140044c5c065f53c3"},
{file = "flake8-7.0.0.tar.gz", hash = "sha256:33f96621059e65eec474169085dc92bf26e7b2d47366b70be2f67ab80dc25132"},
@@ -521,6 +543,7 @@ version = "1.4.1"
description = "A list-like structure which implements collections.abc.MutableSequence"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"},
{file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"},
@@ -607,6 +630,7 @@ version = "2024.3.1"
description = "File-system specification"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "fsspec-2024.3.1-py3-none-any.whl", hash = "sha256:918d18d41bf73f0e2b261824baeb1b124bcf771767e3a26425cd7dec3332f512"},
{file = "fsspec-2024.3.1.tar.gz", hash = "sha256:f39780e282d7d117ffb42bb96992f8a90795e4d0fb0f661a70ca39fe9c43ded9"},
@@ -641,50 +665,52 @@ tqdm = ["tqdm"]
[[package]]
name = "fugashi"
-version = "1.3.2"
-description = "A Cython MeCab wrapper for fast, pythonic Japanese tokenization."
+version = "1.5.2"
+description = "Cython MeCab wrapper for fast, pythonic Japanese tokenization."
optional = false
-python-versions = ">=3.7"
-files = [
- {file = "fugashi-1.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:583e7a14e6ddf8a03b500bec30d708f72e98035ab43e2c92940dd9c36ee63de9"},
- {file = "fugashi-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6c67023cdc1b059b05751c1785c794c24d8862f37a16cdb805e33c7d7ae0c19d"},
- {file = "fugashi-1.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6b2e21be33ed72621d9f4a601a33c00b38052df947f297d792b221a33337f094"},
- {file = "fugashi-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:af7abac3037c7421b075782897766b8f453f28ef3bbadd3e7d69c9df409a48a8"},
- {file = "fugashi-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b915d936e3eb30d50fde86889f8ab56968e5cb4d0ceeb497ac1bb6c58531f87"},
- {file = "fugashi-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:8dc57d07809fbecdfc277d50028d5b8d23fb4c0ed12e6d6f7f565709c18848a4"},
- {file = "fugashi-1.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:50243df8758f5fb90bd2801e557168e613df61fa4d488acfe364070e8a4a234c"},
- {file = "fugashi-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9c9fb77c42e6b421e5c20f74179ed479255545b40a28f9983f264a8b19a30374"},
- {file = "fugashi-1.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6fdef6be3489279c670459a55b2dfa876c0856b3fc96b3590aa801f37af6b827"},
- {file = "fugashi-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b2ebe0d6722e05000a959df303e06937939009f4eef0b8692018eb019496013"},
- {file = "fugashi-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85d8e3a9e9d92f555525b2719153e7d3e4ec71d0bae0b076b5495634039b8490"},
- {file = "fugashi-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:9774bb52930fa17ebab17f8bcf2b5d20b6ef529b425ea65affb29a3307c003f8"},
- {file = "fugashi-1.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:fcfb3908059f4dd15d7fda64edd3c027b4da668bf1731f147aa888f5db01bd6c"},
- {file = "fugashi-1.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:49b44261f2051c43a9e31816d85bb89e5563c3e4c03ff7830d1ebf5942888cf9"},
- {file = "fugashi-1.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3a2d8aecb2a239de33bcb70806b7688001e72f68bde68961c6f2899155f15f87"},
- {file = "fugashi-1.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e24864e92ad3acf3c0b8f645e33d543fe569544bb6ee9728cb281325aa76d06"},
- {file = "fugashi-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea942e45214a99844146ce0e0f1ac43bff6e2ccbf6d1cbfde4f2bed9ca0951b1"},
- {file = "fugashi-1.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:da61498017e5cbee65c6eff88a13e17b45a5e3b0428733e99168344b8ff95da5"},
- {file = "fugashi-1.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:18cd37369c1df25e56ef55ea31b3daaa14cfaae805d0ad51ae1274f749f3748d"},
- {file = "fugashi-1.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63a6c360c1d5e8c4ffa55f1459550146a204401c5fb8cc01d4ba593586ed328a"},
- {file = "fugashi-1.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9ed34c799e945f013345a02cf27a5bc97e383b76c3127afe09008cf92b5858d"},
- {file = "fugashi-1.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:213c543e00c80fd601926b03fe489ebd6140d6022a78e2398dcbae7032a9166f"},
- {file = "fugashi-1.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:107a37140c51396776810294aa47d6b92f767f834f1b9e50ca35046a63f31dfd"},
- {file = "fugashi-1.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:13001a977d0a87e174defaa7a7d5c512da0fd021beabe80ce8eb94694a9563a8"},
- {file = "fugashi-1.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1ad77258f97547d906cc822192c6c2c99a54290b0ca8c127368e11e0a0365245"},
- {file = "fugashi-1.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d3345d2c61dd9d056442e271887a189cc2831a5365c3b8bdcccd4395b54f4fe"},
- {file = "fugashi-1.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1cb923fbdfccc5d750accd32c9b929603852d6626b162834370b4b3245bb8c2"},
- {file = "fugashi-1.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:6072a18d1f8428eb19a199ee3d8f1b01c310d15baec96aa7a9fa533e1ce60673"},
- {file = "fugashi-1.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7e392f2c57068bb892c45c1b69067c3dde94b633c81c725a613ee7defe09de47"},
- {file = "fugashi-1.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:85de463fc30390c06d985f52fcfd422acf7ada6b13f723721ca964854b9ae435"},
- {file = "fugashi-1.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5a640da3824aba966209fc425b2b19c38d22a3da637f83b4a7df83cb94376b87"},
- {file = "fugashi-1.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11e95f57b78152be3a0a1a1e77d7887cfc25c30412d5f5825711b75ea6d415be"},
- {file = "fugashi-1.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:409b83f136a3c2da805cd999bd7e1792e7c71fa8e0637f77bdec2b6fd070a3bb"},
- {file = "fugashi-1.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:51eed11fee767597cfe735bd01326eb06deb2283112e29e9e5bdc954750e7a24"},
- {file = "fugashi-1.3.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bc99b6e8f003c7a0e53e0f486caa1547f0ca8f86777610ea92af6e2f40ca212a"},
- {file = "fugashi-1.3.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4c7e97655d1d3f3f5d5c5da6ac7f31f187177a39f1557f9d3f683772a2e30815"},
- {file = "fugashi-1.3.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:54865ba40c35b3180d9c7cf629a1f3e430bca626dcd6ee6288bc5245c044edea"},
- {file = "fugashi-1.3.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:21d2dac5b085632f1f9a24edf5d7ccaeb3272be672e4aa37a0b219fc7a3b0655"},
- {file = "fugashi-1.3.2.tar.gz", hash = "sha256:964980b5d227ee41af7570542aaab56b1298c44416271cba5d8ff9a58ab40748"},
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+ {file = "fugashi-1.5.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:952533caa1704720989ee7f4262902219f938eac87a003d72b8a98b2a24b0299"},
+ {file = "fugashi-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5b65bdf535d6a58cbea2938dd2de7daf001c38f8821f28006b695d3ac892f521"},
+ {file = "fugashi-1.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:380e5ebe058e4243e5662b252b008782f20818c5d2d30d0e482a8911e2e68674"},
+ {file = "fugashi-1.5.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:809db19725a623b5f3f47c7c11909143bb14781569caa3211e6c813608a9a213"},
+ {file = "fugashi-1.5.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6464f747c38a1043c9a2da81975db8f2c9724ef59389754d8dae7328ed60a698"},
+ {file = "fugashi-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:894b69898b83c6d96f73134466df68682cba10d867c1ca55a93585a7d2213133"},
+ {file = "fugashi-1.5.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:072f0ba00ea38705ff43916c8438ce9560bf7ae5e67d415b80f4996f0b82b04e"},
+ {file = "fugashi-1.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e16ada7b953bf5a18fc9c81b2537c58f1c9929b993c6629bf972f96762b221a2"},
+ {file = "fugashi-1.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f855953ac6c98cf239d407d341e3298a54119c8de88217037f012096e41ebe7b"},
+ {file = "fugashi-1.5.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:516d61660c7b2262047e531b0a99275ce63fd2256f30282fc5066160435478a6"},
+ {file = "fugashi-1.5.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ff899e1767024ba8bc53d8a2cf90bca19a6a54b14ddf05a75d04169f7acb262c"},
+ {file = "fugashi-1.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c5e04cb808f5cd46fc682469702f1e34f6199a264514e5c21b1e17ea4f8313f"},
+ {file = "fugashi-1.5.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:4ed199a931c1d9f7d55c606d90a06323d1a60164ec222ea70af74c0c9d236faa"},
+ {file = "fugashi-1.5.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3d2bb28cc6c6eec1c50729bb2dda44007a45599f0471b14c8fda57b0dde36d50"},
+ {file = "fugashi-1.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8c1f64345a7a13b229fb755b567cbc993adb43b5b617ad4089521e5dd4d27b91"},
+ {file = "fugashi-1.5.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ffe760c93e21896cc74066bc5e7dbee6e41a26199807c850b486e2e29b8a3131"},
+ {file = "fugashi-1.5.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:83bc7bf08f81a3c3992bf10b8c681720898a826c6c3dffa80e1296e005f4bfb8"},
+ {file = "fugashi-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:936d710166c5b05064ec2ce0eb347fff7a0cf102c33989012fad205346943402"},
+ {file = "fugashi-1.5.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5cd0a399aad72d00a3b6b2d8c45e43a8c1e3aefd86ba153c826426b8e133e533"},
+ {file = "fugashi-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:52c79cddbdcf4bbd0490212d2b2d78b6011d4cf733ff4ef9455274da9a8d54f0"},
+ {file = "fugashi-1.5.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2ee7b102fef6ec554bdeba51a969ce894a519cc71bade5d05a27935de4426745"},
+ {file = "fugashi-1.5.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:32e01a394011270078efb6c71ef188c327255544d953692cd82f7f726d59ecc4"},
+ {file = "fugashi-1.5.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0e79d3f09d847d07eddf8e62ad9840b11331102bc31ecd66455c62581af11638"},
+ {file = "fugashi-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:cc5e5ece1f6ba1ce00f2a0a9465d2b91fe01e904888aa0c7089a20e471646c47"},
+ {file = "fugashi-1.5.2-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:0535dcc5a844fb196c215020a5791e5ac0b6c26ee4879cb0e63545c5e6f33642"},
+ {file = "fugashi-1.5.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:0805863a5268e112bc3c01e9d77e58a7c5ea079d893a18e0d381f3874f690949"},
+ {file = "fugashi-1.5.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:75a8f6219e26e54c95a969af6c5c67f6ea65e333aecc4e85ccc360488e4ba056"},
+ {file = "fugashi-1.5.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:79cf4b79809e7e9016dc179e35789bb6a0b9df44e03993835c23d5cb31994de2"},
+ {file = "fugashi-1.5.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71c0027aa11747adcb3753d31663290c53fea8007371f0b080c53c192918ceb9"},
+ {file = "fugashi-1.5.2-cp314-cp314-win_amd64.whl", hash = "sha256:a3c69086650a66bfffb5dd4952d42a9274cea9b110df7b4837c74da1fe4f98f3"},
+ {file = "fugashi-1.5.2-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:41e3f388913a87826045722ab59611b27a4654a51e2037c69d6189e04f33f6f5"},
+ {file = "fugashi-1.5.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bb6e06928bd428a8a139660866f01dadd55546b6395a34dffe5602d8c1329205"},
+ {file = "fugashi-1.5.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e516bde355c2ba53b5b2ce37760cf67f6f186c79efa049f9ab3767bc843f341b"},
+ {file = "fugashi-1.5.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7e0c20abc9df511c54c90ceab118208d051a196ef5f68c63ab1c710fc1a35c6a"},
+ {file = "fugashi-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5eda8187624053a610ec09f7b6391d0411e9148c34b5fddad522b342edbcb201"},
+ {file = "fugashi-1.5.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a52b9a023522969f3d9e32172c1a49b0d10bfc187433f33d3ceb1e730cc65417"},
+ {file = "fugashi-1.5.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:deca2ff8310d482b802721814b61eeecc8596af396e346b70389ae3f912790c7"},
+ {file = "fugashi-1.5.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4b586b8dcdbff7bb95d36ff8c9ac7b041ed95ce4d8e734c383b3c4817e94f992"},
+ {file = "fugashi-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:954b426e7886c1c4113bcf56c1faebf11bcead7768aa764c1b0d0104073c2653"},
+ {file = "fugashi-1.5.2.tar.gz", hash = "sha256:a7959eab95bb37a6a934fc2314d3ff888664d11b88d0e1c596260a5785d5880e"},
]
[package.extras]
@@ -697,17 +723,55 @@ version = "0.14.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
]
+[[package]]
+name = "hf-xet"
+version = "1.2.0"
+description = "Fast transfer of large files with the Hugging Face Hub."
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""
+files = [
+ {file = "hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649"},
+ {file = "hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813"},
+ {file = "hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc"},
+ {file = "hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5"},
+ {file = "hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f"},
+ {file = "hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832"},
+ {file = "hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382"},
+ {file = "hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e"},
+ {file = "hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8"},
+ {file = "hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0"},
+ {file = "hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090"},
+ {file = "hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a"},
+ {file = "hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f"},
+ {file = "hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc"},
+ {file = "hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848"},
+ {file = "hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4"},
+ {file = "hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd"},
+ {file = "hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c"},
+ {file = "hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737"},
+ {file = "hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865"},
+ {file = "hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69"},
+ {file = "hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f"},
+]
+
+[package.extras]
+tests = ["pytest"]
+
[[package]]
name = "httpcore"
version = "1.0.5"
description = "A minimal low-level HTTP client."
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"},
{file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"},
@@ -729,6 +793,7 @@ version = "0.27.0"
description = "The next generation HTTP client."
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"},
{file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"},
@@ -742,25 +807,27 @@ idna = "*"
sniffio = "*"
[package.extras]
-brotli = ["brotli", "brotlicffi"]
+brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
[[package]]
name = "huggingface-hub"
-version = "0.23.3"
+version = "0.36.0"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.8.0"
+groups = ["main"]
files = [
- {file = "huggingface_hub-0.23.3-py3-none-any.whl", hash = "sha256:22222c41223f1b7c209ae5511d2d82907325a0e3cdbce5f66949d43c598ff3bc"},
- {file = "huggingface_hub-0.23.3.tar.gz", hash = "sha256:1a1118a0b3dea3bab6c325d71be16f5ffe441d32f3ac7c348d6875911b694b5b"},
+ {file = "huggingface_hub-0.36.0-py3-none-any.whl", hash = "sha256:7bcc9ad17d5b3f07b57c78e79d527102d08313caa278a641993acddcb894548d"},
+ {file = "huggingface_hub-0.36.0.tar.gz", hash = "sha256:47b3f0e2539c39bf5cde015d63b72ec49baff67b6931c3d97f3f84532e2b8d25"},
]
[package.dependencies]
filelock = "*"
fsspec = ">=2023.5.0"
+hf-xet = {version = ">=1.1.3,<2.0.0", markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""}
packaging = ">=20.9"
pyyaml = ">=5.1"
requests = "*"
@@ -768,17 +835,20 @@ tqdm = ">=4.42.1"
typing-extensions = ">=3.7.4.3"
[package.extras]
-all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (>=1.4.0)", "mypy (==1.15.0) ; python_version >= \"3.9\"", "mypy (>=1.14.1,<1.15.0) ; python_version == \"3.8\"", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures (<16.0)", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "ty", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
cli = ["InquirerPy (==0.3.4)"]
-dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (>=1.4.0)", "mypy (==1.15.0) ; python_version >= \"3.9\"", "mypy (>=1.14.1,<1.15.0) ; python_version == \"3.8\"", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures (<16.0)", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "ty", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
hf-transfer = ["hf-transfer (>=0.1.4)"]
-inference = ["aiohttp", "minijinja (>=1.0)"]
-quality = ["mypy (==1.5.1)", "ruff (>=0.3.0)"]
+hf-xet = ["hf-xet (>=1.1.2,<2.0.0)"]
+inference = ["aiohttp"]
+mcp = ["aiohttp", "mcp (>=1.8.0)", "typer"]
+oauth = ["authlib (>=1.3.2)", "fastapi", "httpx", "itsdangerous"]
+quality = ["libcst (>=1.4.0)", "mypy (==1.15.0) ; python_version >= \"3.9\"", "mypy (>=1.14.1,<1.15.0) ; python_version == \"3.8\"", "ruff (>=0.9.0)", "ty"]
tensorflow = ["graphviz", "pydot", "tensorflow"]
tensorflow-testing = ["keras (<3.0)", "tensorflow"]
-testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
-torch = ["safetensors", "torch"]
+testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures (<16.0)", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
+torch = ["safetensors[torch]", "torch"]
typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
[[package]]
@@ -787,6 +857,7 @@ version = "3.7"
description = "Internationalized Domain Names in Applications (IDNA)"
optional = false
python-versions = ">=3.5"
+groups = ["main"]
files = [
{file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
{file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
@@ -798,31 +869,19 @@ version = "2.0.0"
description = "brain-dead simple config-ini parsing"
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
]
-[[package]]
-name = "intel-openmp"
-version = "2021.4.0"
-description = "Intel OpenMP* Runtime Library"
-optional = false
-python-versions = "*"
-files = [
- {file = "intel_openmp-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:41c01e266a7fdb631a7609191709322da2bbf24b252ba763f125dd651bcc7675"},
- {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:3b921236a38384e2016f0f3d65af6732cf2c12918087128a9163225451e776f2"},
- {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:e2240ab8d01472fed04f3544a878cda5da16c26232b7ea1b59132dbfb48b186e"},
- {file = "intel_openmp-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:6e863d8fd3d7e8ef389d52cf97a50fe2afe1a19247e8c0d168ce021546f96fc9"},
- {file = "intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:eef4c8bcc8acefd7f5cd3b9384dbf73d59e2c99fc56545712ded913f43c4a94f"},
-]
-
[[package]]
name = "ipadic"
version = "1.0.0"
description = "IPAdic packaged for Python"
optional = false
python-versions = "*"
+groups = ["main"]
files = [
{file = "ipadic-1.0.0.tar.gz", hash = "sha256:f5923d31eca6131acaaf18ed28d8998665b1347b640d3a6476f64650e9a71c07"},
]
@@ -833,6 +892,7 @@ version = "5.13.2"
description = "A Python utility / library to sort Python imports."
optional = false
python-versions = ">=3.8.0"
+groups = ["dev"]
files = [
{file = "isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6"},
{file = "isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109"},
@@ -847,6 +907,7 @@ version = "3.1.4"
description = "A very fast and expressive template engine."
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
{file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
@@ -864,6 +925,7 @@ version = "1.4.2"
description = "Lightweight pipelining with Python functions"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"},
{file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
@@ -875,6 +937,7 @@ version = "4.29.0"
description = "Implement minimal boilerplate CLIs derived from type hints and parse from command line, config files and environment variables."
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "jsonargparse-4.29.0-py3-none-any.whl", hash = "sha256:e093d9509996b031d156fe8d4a087e2d91adbfc654b9e2c783878d45ad0dfefe"},
{file = "jsonargparse-4.29.0.tar.gz", hash = "sha256:03d407122c856095c48b07c58107002c9d3eaeb2795d8040efad831db5817494"},
@@ -887,12 +950,12 @@ PyYAML = ">=3.13"
[package.extras]
all = ["jsonargparse[argcomplete]", "jsonargparse[fsspec]", "jsonargparse[jsonnet]", "jsonargparse[jsonschema]", "jsonargparse[omegaconf]", "jsonargparse[reconplogger]", "jsonargparse[ruyaml]", "jsonargparse[signatures]", "jsonargparse[typing-extensions]", "jsonargparse[urls]"]
-argcomplete = ["argcomplete (>=2.0.0)", "argcomplete (>=3.3.0)"]
+argcomplete = ["argcomplete (>=2.0.0) ; python_version < \"3.8\"", "argcomplete (>=3.3.0) ; python_version >= \"3.8\""]
coverage = ["jsonargparse[test-no-urls]", "pytest-cov (>=4.0.0)"]
dev = ["build (>=0.10.0)", "jsonargparse[coverage]", "jsonargparse[doc]", "jsonargparse[mypy]", "jsonargparse[test]", "pre-commit (>=2.19.0)", "tox (>=3.25.0)"]
doc = ["Sphinx (>=1.7.9)", "autodocsumm (>=0.1.10)", "sphinx-autodoc-typehints (>=1.19.5)", "sphinx-rtd-theme (>=1.2.2)"]
fsspec = ["fsspec (>=0.8.4)"]
-jsonnet = ["jsonnet (>=0.13.0)", "jsonnet-binary (>=0.17.0)"]
+jsonnet = ["jsonnet (>=0.13.0) ; os_name == \"posix\"", "jsonnet-binary (>=0.17.0) ; os_name != \"posix\""]
jsonschema = ["jsonschema (>=3.2.0)"]
maintainer = ["bump2version (>=0.5.11)", "twine (>=4.0.2)"]
omegaconf = ["omegaconf (>=2.1.1)"]
@@ -901,7 +964,7 @@ ruyaml = ["ruyaml (>=0.20.0)"]
signatures = ["docstring-parser (>=0.15)", "jsonargparse[typing-extensions]", "typeshed-client (>=2.1.0)"]
test = ["attrs (>=22.2.0)", "jsonargparse[test-no-urls]", "pydantic (>=2.3.0)", "responses (>=0.12.0)", "types-PyYAML (>=6.0.11)", "types-requests (>=2.28.9)"]
test-no-urls = ["pytest (>=6.2.5)", "pytest-subtests (>=0.8.0)"]
-typing-extensions = ["typing-extensions (>=3.10.0.0)"]
+typing-extensions = ["typing-extensions (>=3.10.0.0) ; python_version < \"3.10\""]
urls = ["requests (>=2.18.4)"]
[[package]]
@@ -910,6 +973,8 @@ version = "0.20.0"
description = "Python bindings for Jsonnet - The data templating language"
optional = false
python-versions = "*"
+groups = ["main"]
+markers = "os_name == \"posix\""
files = [
{file = "jsonnet-0.20.0.tar.gz", hash = "sha256:7e770c7bf3a366b97b650a39430450f77612e74406731eb75c5bd59f3f104d4f"},
]
@@ -920,6 +985,8 @@ version = "0.17.0"
description = "An UNOFFICIAL Python interface to Jsonnet, available as whl packages for Mac, Linux and Windows."
optional = false
python-versions = "*"
+groups = ["main"]
+markers = "os_name != \"posix\""
files = [
{file = "jsonnet-binary-0.17.0.tar.gz", hash = "sha256:fbadf25f28161b0ccf29e0b72ef689790d14a9b23a681ab6846bd7cb12e17f1d"},
{file = "jsonnet_binary-0.17.0-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:5db15ed838b6e4d1373d5d772a8283cf3a62282056cc5a3643c65bf257efeda4"},
@@ -961,6 +1028,7 @@ version = "0.7.2"
description = "Python logging made (stupidly) simple"
optional = false
python-versions = ">=3.5"
+groups = ["main"]
files = [
{file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"},
{file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"},
@@ -971,7 +1039,7 @@ colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""}
win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""}
[package.extras]
-dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"]
+dev = ["Sphinx (==7.2.5) ; python_version >= \"3.9\"", "colorama (==0.4.5) ; python_version < \"3.8\"", "colorama (==0.4.6) ; python_version >= \"3.8\"", "exceptiongroup (==1.1.3) ; python_version >= \"3.7\" and python_version < \"3.11\"", "freezegun (==1.1.0) ; python_version < \"3.8\"", "freezegun (==1.2.2) ; python_version >= \"3.8\"", "mypy (==v0.910) ; python_version < \"3.6\"", "mypy (==v0.971) ; python_version == \"3.6\"", "mypy (==v1.4.1) ; python_version == \"3.7\"", "mypy (==v1.5.1) ; python_version >= \"3.8\"", "pre-commit (==3.4.0) ; python_version >= \"3.8\"", "pytest (==6.1.2) ; python_version < \"3.8\"", "pytest (==7.4.0) ; python_version >= \"3.8\"", "pytest-cov (==2.12.1) ; python_version < \"3.8\"", "pytest-cov (==4.1.0) ; python_version >= \"3.8\"", "pytest-mypy-plugins (==1.9.3) ; python_version >= \"3.6\" and python_version < \"3.8\"", "pytest-mypy-plugins (==3.0.0) ; python_version >= \"3.8\"", "sphinx-autobuild (==2021.3.14) ; python_version >= \"3.9\"", "sphinx-rtd-theme (==1.3.0) ; python_version >= \"3.9\"", "tox (==3.27.1) ; python_version < \"3.8\"", "tox (==4.11.0) ; python_version >= \"3.8\""]
[[package]]
name = "markupsafe"
@@ -979,6 +1047,7 @@ version = "2.1.5"
description = "Safely add untrusted strings to HTML/XML markup."
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"},
{file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"},
@@ -1048,35 +1117,19 @@ version = "0.7.0"
description = "McCabe checker, plugin for flake8"
optional = false
python-versions = ">=3.6"
+groups = ["dev"]
files = [
{file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
{file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
]
-[[package]]
-name = "mkl"
-version = "2021.4.0"
-description = "Intel® oneAPI Math Kernel Library"
-optional = false
-python-versions = "*"
-files = [
- {file = "mkl-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:67460f5cd7e30e405b54d70d1ed3ca78118370b65f7327d495e9c8847705e2fb"},
- {file = "mkl-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:636d07d90e68ccc9630c654d47ce9fdeb036bb46e2b193b3a9ac8cfea683cce5"},
- {file = "mkl-2021.4.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:398dbf2b0d12acaf54117a5210e8f191827f373d362d796091d161f610c1ebfb"},
- {file = "mkl-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:439c640b269a5668134e3dcbcea4350459c4a8bc46469669b2d67e07e3d330e8"},
- {file = "mkl-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:ceef3cafce4c009dd25f65d7ad0d833a0fbadc3d8903991ec92351fe5de1e718"},
-]
-
-[package.dependencies]
-intel-openmp = "==2021.*"
-tbb = "==2021.*"
-
[[package]]
name = "mpmath"
version = "1.3.0"
description = "Python library for arbitrary-precision floating-point arithmetic"
optional = false
python-versions = "*"
+groups = ["main"]
files = [
{file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
{file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
@@ -1085,7 +1138,7 @@ files = [
[package.extras]
develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"]
docs = ["sphinx"]
-gmpy = ["gmpy2 (>=2.1.0a4)"]
+gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""]
tests = ["pytest (>=4.6)"]
[[package]]
@@ -1094,6 +1147,7 @@ version = "6.0.5"
description = "multidict implementation"
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"},
{file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"},
@@ -1193,6 +1247,7 @@ version = "0.70.16"
description = "better multiprocessing and multithreading in Python"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"},
{file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"},
@@ -1217,6 +1272,7 @@ version = "1.10.0"
description = "Optional static typing for Python"
optional = false
python-versions = ">=3.8"
+groups = ["dev"]
files = [
{file = "mypy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2"},
{file = "mypy-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99"},
@@ -1264,6 +1320,7 @@ version = "1.0.0"
description = "Type system extensions for programs checked with the mypy type checker."
optional = false
python-versions = ">=3.5"
+groups = ["dev"]
files = [
{file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
@@ -1275,6 +1332,7 @@ version = "3.3"
description = "Python package for creating and manipulating graphs and networks"
optional = false
python-versions = ">=3.10"
+groups = ["main"]
files = [
{file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"},
{file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"},
@@ -1293,6 +1351,7 @@ version = "1.26.4"
description = "Fundamental package for array computing in Python"
optional = false
python-versions = ">=3.9"
+groups = ["main"]
files = [
{file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
{file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
@@ -1334,56 +1393,149 @@ files = [
[[package]]
name = "nvidia-cublas-cu12"
-version = "12.1.3.1"
+version = "12.6.4.1"
+description = "CUBLAS native runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
+files = [
+ {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08ed2686e9875d01b58e3cb379c6896df8e76c75e0d4a7f7dace3d7b6d9ef8eb"},
+ {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:235f728d6e2a409eddf1df58d5b0921cf80cfa9e72b9f2775ccb7b4a87984668"},
+ {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-win_amd64.whl", hash = "sha256:9e4fa264f4d8a4eb0cdbd34beadc029f453b3bafae02401e999cf3d5a5af75f8"},
+]
+
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.8.4.1"
description = "CUBLAS native runtime libraries"
optional = false
python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
+files = [
+ {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0"},
+ {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142"},
+ {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af"},
+]
+
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.6.80"
+description = "CUDA profiling tools runtime libs."
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
files = [
- {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"},
- {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"},
+ {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:166ee35a3ff1587f2490364f90eeeb8da06cd867bd5b701bf7f9a02b78bc63fc"},
+ {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.whl", hash = "sha256:358b4a1d35370353d52e12f0a7d1769fc01ff74a191689d3870b2123156184c4"},
+ {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6768bad6cab4f19e8292125e5f1ac8aa7d1718704012a0e3272a6f61c4bce132"},
+ {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a3eff6cdfcc6a4c35db968a06fcadb061cbc7d6dde548609a941ff8701b98b73"},
+ {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-win_amd64.whl", hash = "sha256:bbe6ae76e83ce5251b56e8c8e61a964f757175682bbad058b170b136266ab00a"},
]
[[package]]
name = "nvidia-cuda-cupti-cu12"
-version = "12.1.105"
+version = "12.8.90"
description = "CUDA profiling tools runtime libs."
optional = false
python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
+files = [
+ {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed"},
+ {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182"},
+ {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e"},
+]
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.6.77"
+description = "NVRTC native runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
files = [
- {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"},
- {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"},
+ {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5847f1d6e5b757f1d2b3991a01082a44aad6f10ab3c5c0213fa3e25bddc25a13"},
+ {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53"},
+ {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:f7007dbd914c56bd80ea31bc43e8e149da38f68158f423ba845fc3292684e45a"},
]
[[package]]
name = "nvidia-cuda-nvrtc-cu12"
-version = "12.1.105"
+version = "12.8.93"
description = "NVRTC native runtime libraries"
optional = false
python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
+files = [
+ {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994"},
+ {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8"},
+ {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909"},
+]
+
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.6.77"
+description = "CUDA Runtime native Libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
files = [
- {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"},
- {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"},
+ {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6116fad3e049e04791c0256a9778c16237837c08b27ed8c8401e2e45de8d60cd"},
+ {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d461264ecb429c84c8879a7153499ddc7b19b5f8d84c204307491989a365588e"},
+ {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ba3b56a4f896141e25e19ab287cd71e52a6a0f4b29d0d31609f60e3b4d5219b7"},
+ {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a84d15d5e1da416dd4774cb42edf5e954a3e60cc945698dc1d5be02321c44dc8"},
+ {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:86c58044c824bf3c173c49a2dbc7a6c8b53cb4e4dca50068be0bf64e9dab3f7f"},
]
[[package]]
name = "nvidia-cuda-runtime-cu12"
-version = "12.1.105"
+version = "12.8.90"
description = "CUDA Runtime native Libraries"
optional = false
python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
+files = [
+ {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d"},
+ {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90"},
+ {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8"},
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.5.1.17"
+description = "cuDNN runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
files = [
- {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"},
- {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"},
+ {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9fd4584468533c61873e5fda8ca41bac3a38bcb2d12350830c69b0a96a7e4def"},
+ {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2"},
+ {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-win_amd64.whl", hash = "sha256:d7af0f8a4f3b4b9dbb3122f2ef553b45694ed9c384d5a75bab197b8eefb79ab8"},
]
+[package.dependencies]
+nvidia-cublas-cu12 = "*"
+
[[package]]
name = "nvidia-cudnn-cu12"
-version = "8.9.2.26"
+version = "9.10.2.21"
description = "cuDNN runtime libraries"
optional = false
python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
files = [
- {file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"},
+ {file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8"},
+ {file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8"},
+ {file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e"},
]
[package.dependencies]
@@ -1391,35 +1543,129 @@ nvidia-cublas-cu12 = "*"
[[package]]
name = "nvidia-cufft-cu12"
-version = "11.0.2.54"
+version = "11.3.0.4"
+description = "CUFFT native runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
+files = [
+ {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d16079550df460376455cba121db6564089176d9bac9e4f360493ca4741b22a6"},
+ {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8510990de9f96c803a051822618d42bf6cb8f069ff3f48d93a8486efdacb48fb"},
+ {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ccba62eb9cef5559abd5e0d54ceed2d9934030f51163df018532142a8ec533e5"},
+ {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.whl", hash = "sha256:768160ac89f6f7b459bee747e8d175dbf53619cfe74b2a5636264163138013ca"},
+ {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-win_amd64.whl", hash = "sha256:6048ebddfb90d09d2707efb1fd78d4e3a77cb3ae4dc60e19aab6be0ece2ae464"},
+]
+
+[package.dependencies]
+nvidia-nvjitlink-cu12 = "*"
+
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.3.3.83"
description = "CUFFT native runtime libraries"
optional = false
python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
+files = [
+ {file = "nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a"},
+ {file = "nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74"},
+ {file = "nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7"},
+]
+
+[package.dependencies]
+nvidia-nvjitlink-cu12 = "*"
+
+[[package]]
+name = "nvidia-cufile-cu12"
+version = "1.11.1.6"
+description = "cuFile GPUDirect libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
+files = [
+ {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159"},
+ {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:8f57a0051dcf2543f6dc2b98a98cb2719c37d3cee1baba8965d57f3bbc90d4db"},
+]
+
+[[package]]
+name = "nvidia-cufile-cu12"
+version = "1.13.1.3"
+description = "cuFile GPUDirect libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
+files = [
+ {file = "nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc"},
+ {file = "nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a"},
+]
+
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.7.77"
+description = "CURAND native runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
files = [
- {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"},
- {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"},
+ {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:6e82df077060ea28e37f48a3ec442a8f47690c7499bff392a5938614b56c98d8"},
+ {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a42cd1344297f70b9e39a1e4f467a4e1c10f1da54ff7a85c12197f6c652c8bdf"},
+ {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:99f1a32f1ac2bd134897fc7a203f779303261268a65762a623bf30cc9fe79117"},
+ {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:7b2ed8e95595c3591d984ea3603dd66fe6ce6812b886d59049988a712ed06b6e"},
+ {file = "nvidia_curand_cu12-10.3.7.77-py3-none-win_amd64.whl", hash = "sha256:6d6d935ffba0f3d439b7cd968192ff068fafd9018dbf1b85b37261b13cfc9905"},
]
[[package]]
name = "nvidia-curand-cu12"
-version = "10.3.2.106"
+version = "10.3.9.90"
description = "CURAND native runtime libraries"
optional = false
python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
+files = [
+ {file = "nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd"},
+ {file = "nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9"},
+ {file = "nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec"},
+]
+
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.7.1.2"
+description = "CUDA solver native runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
files = [
- {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"},
- {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"},
+ {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0ce237ef60acde1efc457335a2ddadfd7610b892d94efee7b776c64bb1cac9e0"},
+ {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c"},
+ {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6cf28f17f64107a0c4d7802be5ff5537b2130bfc112f25d5a30df227058ca0e6"},
+ {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dbbe4fc38ec1289c7e5230e16248365e375c3673c9c8bac5796e2e20db07f56e"},
+ {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-win_amd64.whl", hash = "sha256:6813f9d8073f555444a8705f3ab0296d3e1cb37a16d694c5fc8b862a0d8706d7"},
]
+[package.dependencies]
+nvidia-cublas-cu12 = "*"
+nvidia-cusparse-cu12 = "*"
+nvidia-nvjitlink-cu12 = "*"
+
[[package]]
name = "nvidia-cusolver-cu12"
-version = "11.4.5.107"
+version = "11.7.3.90"
description = "CUDA solver native runtime libraries"
optional = false
python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
files = [
- {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"},
- {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"},
+ {file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0"},
+ {file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450"},
+ {file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34"},
]
[package.dependencies]
@@ -1429,49 +1675,163 @@ nvidia-nvjitlink-cu12 = "*"
[[package]]
name = "nvidia-cusparse-cu12"
-version = "12.1.0.106"
+version = "12.5.4.2"
+description = "CUSPARSE native runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
+files = [
+ {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d25b62fb18751758fe3c93a4a08eff08effedfe4edf1c6bb5afd0890fe88f887"},
+ {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7aa32fa5470cf754f72d1116c7cbc300b4e638d3ae5304cfa4a638a5b87161b1"},
+ {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7556d9eca156e18184b94947ade0fba5bb47d69cec46bf8660fd2c71a4b48b73"},
+ {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:23749a6571191a215cb74d1cdbff4a86e7b19f1200c071b3fcf844a5bea23a2f"},
+ {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-win_amd64.whl", hash = "sha256:4acb8c08855a26d737398cba8fb6f8f5045d93f82612b4cfd84645a2332ccf20"},
+]
+
+[package.dependencies]
+nvidia-nvjitlink-cu12 = "*"
+
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.5.8.93"
description = "CUSPARSE native runtime libraries"
optional = false
python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
files = [
- {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"},
- {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"},
+ {file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc"},
+ {file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b"},
+ {file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd"},
]
[package.dependencies]
nvidia-nvjitlink-cu12 = "*"
+[[package]]
+name = "nvidia-cusparselt-cu12"
+version = "0.6.3"
+description = "NVIDIA cuSPARSELt"
+optional = false
+python-versions = "*"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
+files = [
+ {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8371549623ba601a06322af2133c4a44350575f5a3108fb75f3ef20b822ad5f1"},
+ {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46"},
+ {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-win_amd64.whl", hash = "sha256:3b325bcbd9b754ba43df5a311488fca11a6b5dc3d11df4d190c000cf1a0765c7"},
+]
+
+[[package]]
+name = "nvidia-cusparselt-cu12"
+version = "0.7.1"
+description = "NVIDIA cuSPARSELt"
+optional = false
+python-versions = "*"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
+files = [
+ {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5"},
+ {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623"},
+ {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075"},
+]
+
+[[package]]
+name = "nvidia-nccl-cu12"
+version = "2.26.2"
+description = "NVIDIA Collective Communication Library (NCCL) Runtime"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
+files = [
+ {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c196e95e832ad30fbbb50381eb3cbd1fadd5675e587a548563993609af19522"},
+ {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6"},
+]
+
[[package]]
name = "nvidia-nccl-cu12"
-version = "2.20.5"
+version = "2.27.5"
description = "NVIDIA Collective Communication Library (NCCL) Runtime"
optional = false
python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
+files = [
+ {file = "nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a"},
+ {file = "nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457"},
+]
+
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.6.85"
+description = "Nvidia JIT LTO Library"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
files = [
- {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"},
- {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"},
+ {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a"},
+ {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41"},
+ {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-win_amd64.whl", hash = "sha256:e61120e52ed675747825cdd16febc6a0730537451d867ee58bee3853b1b13d1c"},
]
[[package]]
name = "nvidia-nvjitlink-cu12"
-version = "12.5.82"
+version = "12.8.93"
description = "Nvidia JIT LTO Library"
optional = false
python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
+files = [
+ {file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88"},
+ {file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7"},
+ {file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f"},
+]
+
+[[package]]
+name = "nvidia-nvshmem-cu12"
+version = "3.3.20"
+description = "NVSHMEM creates a global address space that provides efficient and scalable communication for NVIDIA GPU clusters."
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
+files = [
+ {file = "nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b0b960da3842212758e4fa4696b94f129090b30e5122fea3c5345916545cff0"},
+ {file = "nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5"},
+]
+
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.6.77"
+description = "NVIDIA Tools Extension"
+optional = false
+python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
files = [
- {file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f9b37bc5c8cf7509665cb6ada5aaa0ce65618f2332b7d3e78e9790511f111212"},
- {file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-win_amd64.whl", hash = "sha256:e782564d705ff0bf61ac3e1bf730166da66dd2fe9012f111ede5fc49b64ae697"},
+ {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f44f8d86bb7d5629988d61c8d3ae61dddb2015dee142740536bc7481b022fe4b"},
+ {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:adcaabb9d436c9761fca2b13959a2d237c5f9fd406c8e4b723c695409ff88059"},
+ {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b90bed3df379fa79afbd21be8e04a0314336b8ae16768b58f2d34cb1d04cd7d2"},
+ {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1"},
+ {file = "nvidia_nvtx_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:2fb11a4af04a5e6c84073e6404d26588a34afd35379f0855a99797897efa75c0"},
]
[[package]]
name = "nvidia-nvtx-cu12"
-version = "12.1.105"
+version = "12.8.90"
description = "NVIDIA Tools Extension"
optional = false
python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
files = [
- {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"},
- {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"},
+ {file = "nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615"},
+ {file = "nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f"},
+ {file = "nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e"},
]
[[package]]
@@ -1480,6 +1840,7 @@ version = "1.32.0"
description = "The official Python library for the openai API"
optional = false
python-versions = ">=3.7.1"
+groups = ["main"]
files = [
{file = "openai-1.32.0-py3-none-any.whl", hash = "sha256:953d57669f309002044fd2f678aba9f07a43256d74b3b00cd04afb5b185568ea"},
{file = "openai-1.32.0.tar.gz", hash = "sha256:a6df15a7ab9344b1bc2bc8d83639f68b7a7e2453c0f5e50c1666547eee86f0bd"},
@@ -1503,6 +1864,7 @@ version = "24.0"
description = "Core utilities for Python packages"
optional = false
python-versions = ">=3.7"
+groups = ["main", "dev"]
files = [
{file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"},
{file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"},
@@ -1514,6 +1876,7 @@ version = "2.2.2"
description = "Powerful data structures for data analysis, time series, and statistics"
optional = false
python-versions = ">=3.9"
+groups = ["main"]
files = [
{file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"},
{file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"},
@@ -1587,6 +1950,7 @@ version = "0.12.1"
description = "Utility library for gitignore style pattern matching of file paths."
optional = false
python-versions = ">=3.8"
+groups = ["dev"]
files = [
{file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
{file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
@@ -1598,6 +1962,7 @@ version = "10.3.0"
description = "Python Imaging Library (Fork)"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "pillow-10.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45"},
{file = "pillow-10.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c"},
@@ -1675,18 +2040,19 @@ docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline
fpx = ["olefile"]
mic = ["olefile"]
tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
-typing = ["typing-extensions"]
+typing = ["typing-extensions ; python_version < \"3.10\""]
xmp = ["defusedxml"]
[[package]]
name = "plac"
-version = "1.4.3"
+version = "1.4.5"
description = "The smartest command line arguments parser in the world"
optional = false
python-versions = "*"
+groups = ["main"]
files = [
- {file = "plac-1.4.3-py2.py3-none-any.whl", hash = "sha256:8a84fde8f950c9de6588a2d53c9deeac3ba1ddb456d887a33228460cf6549750"},
- {file = "plac-1.4.3.tar.gz", hash = "sha256:d4cb3387b2113a28aebd509433d0264a4e5d9bb7c1a86db4fbd0a8f11af74eb3"},
+ {file = "plac-1.4.5-py2.py3-none-any.whl", hash = "sha256:87187786b4e446688b1cf5112e18fed8a23ab3b316c25fe91266a10bd1736b16"},
+ {file = "plac-1.4.5.tar.gz", hash = "sha256:5f05bf85235c017fcd76c73c8101d4ff8e96beb3dc58b9a37de49cac7de82d14"},
]
[[package]]
@@ -1695,6 +2061,7 @@ version = "4.2.2"
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
optional = false
python-versions = ">=3.8"
+groups = ["dev"]
files = [
{file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"},
{file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"},
@@ -1711,6 +2078,7 @@ version = "1.5.0"
description = "plugin and hook calling mechanisms for python"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
{file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
@@ -1722,22 +2090,22 @@ testing = ["pytest", "pytest-benchmark"]
[[package]]
name = "protobuf"
-version = "5.27.1"
+version = "6.33.0"
description = ""
optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
+groups = ["main"]
files = [
- {file = "protobuf-5.27.1-cp310-abi3-win32.whl", hash = "sha256:3adc15ec0ff35c5b2d0992f9345b04a540c1e73bfee3ff1643db43cc1d734333"},
- {file = "protobuf-5.27.1-cp310-abi3-win_amd64.whl", hash = "sha256:25236b69ab4ce1bec413fd4b68a15ef8141794427e0b4dc173e9d5d9dffc3bcd"},
- {file = "protobuf-5.27.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4e38fc29d7df32e01a41cf118b5a968b1efd46b9c41ff515234e794011c78b17"},
- {file = "protobuf-5.27.1-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:917ed03c3eb8a2d51c3496359f5b53b4e4b7e40edfbdd3d3f34336e0eef6825a"},
- {file = "protobuf-5.27.1-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:ee52874a9e69a30271649be88ecbe69d374232e8fd0b4e4b0aaaa87f429f1631"},
- {file = "protobuf-5.27.1-cp38-cp38-win32.whl", hash = "sha256:7a97b9c5aed86b9ca289eb5148df6c208ab5bb6906930590961e08f097258107"},
- {file = "protobuf-5.27.1-cp38-cp38-win_amd64.whl", hash = "sha256:f6abd0f69968792da7460d3c2cfa7d94fd74e1c21df321eb6345b963f9ec3d8d"},
- {file = "protobuf-5.27.1-cp39-cp39-win32.whl", hash = "sha256:dfddb7537f789002cc4eb00752c92e67885badcc7005566f2c5de9d969d3282d"},
- {file = "protobuf-5.27.1-cp39-cp39-win_amd64.whl", hash = "sha256:39309898b912ca6febb0084ea912e976482834f401be35840a008da12d189340"},
- {file = "protobuf-5.27.1-py3-none-any.whl", hash = "sha256:4ac7249a1530a2ed50e24201d6630125ced04b30619262f06224616e0030b6cf"},
- {file = "protobuf-5.27.1.tar.gz", hash = "sha256:df5e5b8e39b7d1c25b186ffdf9f44f40f810bbcc9d2b71d9d3156fee5a9adf15"},
+ {file = "protobuf-6.33.0-cp310-abi3-win32.whl", hash = "sha256:d6101ded078042a8f17959eccd9236fb7a9ca20d3b0098bbcb91533a5680d035"},
+ {file = "protobuf-6.33.0-cp310-abi3-win_amd64.whl", hash = "sha256:9a031d10f703f03768f2743a1c403af050b6ae1f3480e9c140f39c45f81b13ee"},
+ {file = "protobuf-6.33.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:905b07a65f1a4b72412314082c7dbfae91a9e8b68a0cc1577515f8df58ecf455"},
+ {file = "protobuf-6.33.0-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e0697ece353e6239b90ee43a9231318302ad8353c70e6e45499fa52396debf90"},
+ {file = "protobuf-6.33.0-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:e0a1715e4f27355afd9570f3ea369735afc853a6c3951a6afe1f80d8569ad298"},
+ {file = "protobuf-6.33.0-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:35be49fd3f4fefa4e6e2aacc35e8b837d6703c37a2168a55ac21e9b1bc7559ef"},
+ {file = "protobuf-6.33.0-cp39-cp39-win32.whl", hash = "sha256:cd33a8e38ea3e39df66e1bbc462b076d6e5ba3a4ebbde58219d777223a7873d3"},
+ {file = "protobuf-6.33.0-cp39-cp39-win_amd64.whl", hash = "sha256:c963e86c3655af3a917962c9619e1a6b9670540351d7af9439d06064e3317cc9"},
+ {file = "protobuf-6.33.0-py3-none-any.whl", hash = "sha256:25c9e1963c6734448ea2d308cfa610e692b801304ba0908d7bfa564ac5132995"},
+ {file = "protobuf-6.33.0.tar.gz", hash = "sha256:140303d5c8d2037730c548f8c7b93b20bb1dc301be280c378b82b8894589c954"},
]
[[package]]
@@ -1746,6 +2114,7 @@ version = "6.0.0"
description = "Cross-platform lib for process and system monitoring in Python."
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
+groups = ["main"]
files = [
{file = "psutil-6.0.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a021da3e881cd935e64a3d0a20983bda0bb4cf80e4f74fa9bfcb1bc5785360c6"},
{file = "psutil-6.0.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:1287c2b95f1c0a364d23bc6f2ea2365a8d4d9b726a3be7294296ff7ba97c17f0"},
@@ -1767,7 +2136,7 @@ files = [
]
[package.extras]
-test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
+test = ["enum34 ; python_version <= \"3.4\"", "ipaddress ; python_version < \"3.0\"", "mock ; python_version < \"3.0\"", "pywin32 ; sys_platform == \"win32\"", "wmi ; sys_platform == \"win32\""]
[[package]]
name = "py"
@@ -1775,6 +2144,7 @@ version = "1.11.0"
description = "library with cross-python path, ini-parsing, io, code, log facilities"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+groups = ["main"]
files = [
{file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
{file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
@@ -1786,6 +2156,7 @@ version = "16.1.0"
description = "Python library for Apache Arrow"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "pyarrow-16.1.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9"},
{file = "pyarrow-16.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a"},
@@ -1834,6 +2205,7 @@ version = "0.6"
description = ""
optional = false
python-versions = ">=3.5"
+groups = ["main"]
files = [
{file = "pyarrow_hotfix-0.6-py3-none-any.whl", hash = "sha256:dcc9ae2d220dff0083be6a9aa8e0cdee5182ad358d4931fce825c545e5c89178"},
{file = "pyarrow_hotfix-0.6.tar.gz", hash = "sha256:79d3e030f7ff890d408a100ac16d6f00b14d44a502d7897cd9fc3e3a534e9945"},
@@ -1845,6 +2217,7 @@ version = "2.11.1"
description = "Python style guide checker"
optional = false
python-versions = ">=3.8"
+groups = ["dev"]
files = [
{file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"},
{file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"},
@@ -1856,6 +2229,7 @@ version = "2.7.3"
description = "Data validation using Python type hints"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "pydantic-2.7.3-py3-none-any.whl", hash = "sha256:ea91b002777bf643bb20dd717c028ec43216b24a6001a280f83877fd2655d0b4"},
{file = "pydantic-2.7.3.tar.gz", hash = "sha256:c46c76a40bb1296728d7a8b99aa73dd70a48c3510111ff290034f860c99c419e"},
@@ -1875,6 +2249,7 @@ version = "2.18.4"
description = "Core functionality for Pydantic validation and serialization"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "pydantic_core-2.18.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f76d0ad001edd426b92233d45c746fd08f467d56100fd8f30e9ace4b005266e4"},
{file = "pydantic_core-2.18.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:59ff3e89f4eaf14050c8022011862df275b552caef8082e37b542b066ce1ff26"},
@@ -1966,6 +2341,7 @@ version = "3.2.0"
description = "passive checker of Python programs"
optional = false
python-versions = ">=3.8"
+groups = ["dev"]
files = [
{file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"},
{file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"},
@@ -1977,6 +2353,7 @@ version = "7.1.3"
description = "pytest: simple powerful testing with Python"
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "pytest-7.1.3-py3-none-any.whl", hash = "sha256:1377bda3466d70b55e3f5cecfa55bb7cfcf219c7964629b967c37cf0bda818b7"},
{file = "pytest-7.1.3.tar.gz", hash = "sha256:4f365fec2dff9c1162f834d9f18af1ba13062db0c708bf7b946f8a5c76180c39"},
@@ -2000,6 +2377,7 @@ version = "3.14.0"
description = "Thin-wrapper around the mock package for easier use with pytest"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"},
{file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"},
@@ -2017,6 +2395,7 @@ version = "2.9.0.post0"
description = "Extensions to the standard Python datetime module"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main"]
files = [
{file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
{file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
@@ -2031,6 +2410,7 @@ version = "2024.1"
description = "World timezone definitions, modern and historical"
optional = false
python-versions = "*"
+groups = ["main"]
files = [
{file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"},
{file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
@@ -2042,6 +2422,7 @@ version = "6.0.1"
description = "YAML parser and emitter for Python"
optional = false
python-versions = ">=3.6"
+groups = ["main"]
files = [
{file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"},
{file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"},
@@ -2101,6 +2482,7 @@ version = "2024.5.15"
description = "Alternative regular expression module, to replace re."
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "regex-2024.5.15-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a81e3cfbae20378d75185171587cbf756015ccb14840702944f014e0d93ea09f"},
{file = "regex-2024.5.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b59138b219ffa8979013be7bc85bb60c6f7b7575df3d56dc1e403a438c7a3f6"},
@@ -2189,6 +2571,7 @@ version = "2.32.3"
description = "Python HTTP for Humans."
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
{file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
@@ -2210,6 +2593,7 @@ version = "1.3.0"
description = "Yet another Python binding for Juman++/KNP/KWJA"
optional = false
python-versions = ">=3.7,<4.0"
+groups = ["main"]
files = [
{file = "rhoknp-1.3.0-py3-none-any.whl", hash = "sha256:41ee79bbd25e8e1142d555a2e714356fd810b9bf9bb610c75b3bcb704c37ac00"},
{file = "rhoknp-1.3.0.tar.gz", hash = "sha256:ccbac0bba6662b00a573f2d0361e64978901202c44c56b50b3ce2afa5dbb23b6"},
@@ -2224,6 +2608,7 @@ version = "0.4.3"
description = ""
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "safetensors-0.4.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:dcf5705cab159ce0130cd56057f5f3425023c407e170bca60b4868048bae64fd"},
{file = "safetensors-0.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bb4f8c5d0358a31e9a08daeebb68f5e161cdd4018855426d3f0c23bb51087055"},
@@ -2346,6 +2731,7 @@ version = "1.5.0"
description = "A set of python modules for machine learning and data mining"
optional = false
python-versions = ">=3.9"
+groups = ["main"]
files = [
{file = "scikit_learn-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:12e40ac48555e6b551f0a0a5743cc94cc5a765c9513fe708e01f0aa001da2801"},
{file = "scikit_learn-1.5.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f405c4dae288f5f6553b10c4ac9ea7754d5180ec11e296464adb5d6ac68b6ef5"},
@@ -2391,6 +2777,7 @@ version = "1.13.1"
description = "Fundamental algorithms for scientific computing in Python"
optional = false
python-versions = ">=3.9"
+groups = ["main"]
files = [
{file = "scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca"},
{file = "scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f"},
@@ -2429,97 +2816,141 @@ test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "po
[[package]]
name = "sentence-transformers"
-version = "3.0.0"
-description = "Multilingual text embeddings"
+version = "5.1.1"
+description = "Embeddings, Retrieval, and Reranking"
optional = false
-python-versions = ">=3.8.0"
+python-versions = ">=3.9"
+groups = ["main"]
files = [
- {file = "sentence_transformers-3.0.0-py3-none-any.whl", hash = "sha256:9bf851b688b796e5fb06c920921efd5e5e05ee616e85cb3026fbdfe4dcf15bf3"},
- {file = "sentence_transformers-3.0.0.tar.gz", hash = "sha256:52d4101654ed107a28e9fa5110fce399084b55e7838fd8256471353ddc299033"},
+ {file = "sentence_transformers-5.1.1-py3-none-any.whl", hash = "sha256:5ed544629eafe89ca668a8910ebff96cf0a9c5254ec14b05c66c086226c892fd"},
+ {file = "sentence_transformers-5.1.1.tar.gz", hash = "sha256:8af3f844b2ecf9a6c2dfeafc2c02938a87f61202b54329d70dfd7dfd7d17a84e"},
]
[package.dependencies]
-huggingface-hub = ">=0.15.1"
-numpy = "*"
+huggingface-hub = ">=0.20.0"
Pillow = "*"
scikit-learn = "*"
scipy = "*"
torch = ">=1.11.0"
tqdm = "*"
-transformers = ">=4.34.0,<5.0.0"
+transformers = ">=4.41.0,<5.0.0"
+typing_extensions = ">=4.5.0"
[package.extras]
-dev = ["accelerate (>=0.20.3)", "datasets", "pre-commit", "pytest", "ruff (>=0.3.0)"]
+dev = ["accelerate (>=0.20.3)", "datasets", "peft", "pre-commit", "pytest", "pytest-cov"]
+onnx = ["optimum[onnxruntime] (>=1.23.1)"]
+onnx-gpu = ["optimum[onnxruntime-gpu] (>=1.23.1)"]
+openvino = ["optimum-intel[openvino] (>=1.20.0)"]
train = ["accelerate (>=0.20.3)", "datasets"]
[[package]]
name = "sentencepiece"
-version = "0.2.0"
-description = "SentencePiece python wrapper"
+version = "0.2.1"
+description = "Unsupervised text tokenizer and detokenizer."
optional = false
-python-versions = "*"
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+ {file = "sentencepiece-0.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e10fa50bdbaa5e2445dbd387979980d391760faf0ec99a09bd7780ff37eaec44"},
+ {file = "sentencepiece-0.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f27ae6deea72efdb6f361750c92f6c21fd0ad087445082770cc34015213c526"},
+ {file = "sentencepiece-0.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60937c959e6f44159fdd9f56fbdd302501f96114a5ba436829496d5f32d8de3f"},
+ {file = "sentencepiece-0.2.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8b1d91545578852f128650b8cce4ec20f93d39b378ff554ebe66290f2dabb92"},
+ {file = "sentencepiece-0.2.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27e38eee653abc3d387862e67bc5c8b6f428cd604e688b85d29170b7e725c26c"},
+ {file = "sentencepiece-0.2.1-cp310-cp310-win32.whl", hash = "sha256:251874d720ac7f28024a168501f3c7bb15d1802245f6e66de565f18bbb9b5eaa"},
+ {file = "sentencepiece-0.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:e52144670738b4b477fade6c2a9b6af71a8d0094514c9853ac9f6fc1fcfabae7"},
+ {file = "sentencepiece-0.2.1-cp310-cp310-win_arm64.whl", hash = "sha256:9076430ac25dfa7147d9d05751dbc66a04bc1aaac371c07f84952979ea59f0d0"},
+ {file = "sentencepiece-0.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6356d0986b8b8dc351b943150fcd81a1c6e6e4d439772e8584c64230e58ca987"},
+ {file = "sentencepiece-0.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8f8ba89a3acb3dc1ae90f65ec1894b0b9596fdb98ab003ff38e058f898b39bc7"},
+ {file = "sentencepiece-0.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:02593eca45440ef39247cee8c47322a34bdcc1d8ae83ad28ba5a899a2cf8d79a"},
+ {file = "sentencepiece-0.2.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a0d15781a171d188b661ae4bde1d998c303f6bd8621498c50c671bd45a4798e"},
+ {file = "sentencepiece-0.2.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f5a3e0d9f445ed9d66c0fec47d4b23d12cfc858b407a03c194c1b26c2ac2a63"},
+ {file = "sentencepiece-0.2.1-cp311-cp311-win32.whl", hash = "sha256:6d297a1748d429ba8534eebe5535448d78b8acc32d00a29b49acf28102eeb094"},
+ {file = "sentencepiece-0.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:82d9ead6591015f009cb1be1cb1c015d5e6f04046dbb8c9588b931e869a29728"},
+ {file = "sentencepiece-0.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:39f8651bd10974eafb9834ce30d9bcf5b73e1fc798a7f7d2528f9820ca86e119"},
+ {file = "sentencepiece-0.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:57cae326c8727de58c85977b175af132a7138d84c764635d7e71bbee7e774133"},
+ {file = "sentencepiece-0.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:56dd39a3c4d6493db3cdca7e8cc68c6b633f0d4195495cbadfcf5af8a22d05a6"},
+ {file = "sentencepiece-0.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d9381351182ff9888cc80e41c632e7e274b106f450de33d67a9e8f6043da6f76"},
+ {file = "sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99f955df238021bf11f0fc37cdb54fd5e5b5f7fd30ecc3d93fb48b6815437167"},
+ {file = "sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0cdfecef430d985f1c2bcbfff3defd1d95dae876fbd0173376012d2d7d24044b"},
+ {file = "sentencepiece-0.2.1-cp312-cp312-win32.whl", hash = "sha256:a483fd29a34c3e34c39ac5556b0a90942bec253d260235729e50976f5dba1068"},
+ {file = "sentencepiece-0.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:4cdc7c36234fda305e85c32949c5211faaf8dd886096c7cea289ddc12a2d02de"},
+ {file = "sentencepiece-0.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:daeb5e9e9fcad012324807856113708614d534f596d5008638eb9b40112cd9e4"},
+ {file = "sentencepiece-0.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dcd8161eee7b41aae57ded06272905dbd680a0a04b91edd0f64790c796b2f706"},
+ {file = "sentencepiece-0.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c6c8f42949f419ff8c7e9960dbadcfbc982d7b5efc2f6748210d3dd53a7de062"},
+ {file = "sentencepiece-0.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:097f3394e99456e9e4efba1737c3749d7e23563dd1588ce71a3d007f25475fff"},
+ {file = "sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7b670879c370d350557edabadbad1f6561a9e6968126e6debca4029e5547820"},
+ {file = "sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7f0fd2f2693309e6628aeeb2e2faf6edd221134dfccac3308ca0de01f8dab47"},
+ {file = "sentencepiece-0.2.1-cp313-cp313-win32.whl", hash = "sha256:92b3816aa2339355fda2c8c4e021a5de92180b00aaccaf5e2808972e77a4b22f"},
+ {file = "sentencepiece-0.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:10ed3dab2044c47f7a2e7b4969b0c430420cdd45735d78c8f853191fa0e3148b"},
+ {file = "sentencepiece-0.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac650534e2251083c5f75dde4ff28896ce7c8904133dc8fef42780f4d5588fcd"},
+ {file = "sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:8dd4b477a7b069648d19363aad0cab9bad2f4e83b2d179be668efa672500dc94"},
+ {file = "sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0c0f672da370cc490e4c59d89e12289778310a0e71d176c541e4834759e1ae07"},
+ {file = "sentencepiece-0.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ad8493bea8432dae8d6830365352350f3b4144415a1d09c4c8cb8d30cf3b6c3c"},
+ {file = "sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b81a24733726e3678d2db63619acc5a8dccd074f7aa7a54ecd5ca33ca6d2d596"},
+ {file = "sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0a81799d0a68d618e89063fb423c3001a034c893069135ffe51fee439ae474d6"},
+ {file = "sentencepiece-0.2.1-cp313-cp313t-win32.whl", hash = "sha256:89a3ea015517c42c0341d0d962f3e6aaf2cf10d71b1932d475c44ba48d00aa2b"},
+ {file = "sentencepiece-0.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:33f068c9382dc2e7c228eedfd8163b52baa86bb92f50d0488bf2b7da7032e484"},
+ {file = "sentencepiece-0.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:b3616ad246f360e52c85781e47682d31abfb6554c779e42b65333d4b5f44ecc0"},
+ {file = "sentencepiece-0.2.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:5d0350b686c320068702116276cfb26c066dc7e65cfef173980b11bb4d606719"},
+ {file = "sentencepiece-0.2.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c7f54a31cde6fa5cb030370566f68152a742f433f8d2be458463d06c208aef33"},
+ {file = "sentencepiece-0.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c83b85ab2d6576607f31df77ff86f28182be4a8de6d175d2c33ca609925f5da1"},
+ {file = "sentencepiece-0.2.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1855f57db07b51fb51ed6c9c452f570624d2b169b36f0f79ef71a6e6c618cd8b"},
+ {file = "sentencepiece-0.2.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01e6912125cb45d3792f530a4d38f8e21bf884d6b4d4ade1b2de5cf7a8d2a52b"},
+ {file = "sentencepiece-0.2.1-cp314-cp314-win32.whl", hash = "sha256:c415c9de1447e0a74ae3fdb2e52f967cb544113a3a5ce3a194df185cbc1f962f"},
+ {file = "sentencepiece-0.2.1-cp314-cp314-win_amd64.whl", hash = "sha256:881b2e44b14fc19feade3cbed314be37de639fc415375cefaa5bc81a4be137fd"},
+ {file = "sentencepiece-0.2.1-cp314-cp314-win_arm64.whl", hash = "sha256:2005242a16d2dc3ac5fe18aa7667549134d37854823df4c4db244752453b78a8"},
+ {file = "sentencepiece-0.2.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:a19adcec27c524cb7069a1c741060add95f942d1cbf7ad0d104dffa0a7d28a2b"},
+ {file = "sentencepiece-0.2.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:e37e4b4c4a11662b5db521def4e44d4d30ae69a1743241412a93ae40fdcab4bb"},
+ {file = "sentencepiece-0.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:477c81505db072b3ab627e7eab972ea1025331bd3a92bacbf798df2b75ea86ec"},
+ {file = "sentencepiece-0.2.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:010f025a544ef770bb395091d57cb94deb9652d8972e0d09f71d85d5a0816c8c"},
+ {file = "sentencepiece-0.2.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:733e59ff1794d26db706cd41fc2d7ca5f6c64a820709cb801dc0ea31780d64ab"},
+ {file = "sentencepiece-0.2.1-cp314-cp314t-win32.whl", hash = "sha256:d3233770f78e637dc8b1fda2cd7c3b99ec77e7505041934188a4e7fe751de3b0"},
+ {file = "sentencepiece-0.2.1-cp314-cp314t-win_amd64.whl", hash = "sha256:5e4366c97b68218fd30ea72d70c525e6e78a6c0a88650f57ac4c43c63b234a9d"},
+ {file = "sentencepiece-0.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:105e36e75cbac1292642045458e8da677b2342dcd33df503e640f0b457cb6751"},
+ {file = "sentencepiece-0.2.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:afefe50a0cdcb4f2fd9733cb52001a2c164181ee2d82c32d38f5b1b326a8528c"},
+ {file = "sentencepiece-0.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:891ade6503dd93d418c03993f7d6a8aa20260c422cefff5096b9068185e67642"},
+ {file = "sentencepiece-0.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:814978ac05130dd5812b4b03215c766bc6abaef13e7bd72bc534e4d1e12e9a4c"},
+ {file = "sentencepiece-0.2.1-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:017f97b274d4b0baa84b2dc743bf4517be81156f413bb24f12aacacde378e5ab"},
+ {file = "sentencepiece-0.2.1-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22c4ebcb3c6ab1496ab1c37c79ef7bb563b8726f29548c30773b7a4cb152df1a"},
+ {file = "sentencepiece-0.2.1-cp39-cp39-win32.whl", hash = "sha256:caa4e560c72c151da80036aecc2159e51a7fd8ae9efebefd96860460ce6bd025"},
+ {file = "sentencepiece-0.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:2af5a1fb05013332ad94343b8b5f3973e006a2dde2dfba55a819549e054e2f0f"},
+ {file = "sentencepiece-0.2.1-cp39-cp39-win_arm64.whl", hash = "sha256:3d165fbb9bf8fba35f1946ba2617c3f9995679f07438325f07c026d53f33e746"},
+ {file = "sentencepiece-0.2.1.tar.gz", hash = "sha256:8138cec27c2f2282f4a34d9a016e3374cd40e5c6e9cb335063db66a0a3b71fad"},
+]
+
+[package.extras]
+test = ["pytest"]
+testpaths = ["test"]
+
+[[package]]
+name = "setuptools"
+version = "80.9.0"
+description = "Easily download, build, install, upgrade, and uninstall Python packages"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "python_version >= \"3.12\""
files = [
- {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:188779e1298a1c8b8253c7d3ad729cb0a9891e5cef5e5d07ce4592c54869e227"},
- {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bed9cf85b296fa2b76fc2547b9cbb691a523864cebaee86304c43a7b4cb1b452"},
- {file = "sentencepiece-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d7b67e724bead13f18db6e1d10b6bbdc454af574d70efbb36f27d90387be1ca3"},
- {file = "sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fde4b08cfe237be4484c6c7c2e2c75fb862cfeab6bd5449ce4caeafd97b767a"},
- {file = "sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c378492056202d1c48a4979650981635fd97875a00eabb1f00c6a236b013b5e"},
- {file = "sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1380ce6540a368de2ef6d7e6ba14ba8f3258df650d39ba7d833b79ee68a52040"},
- {file = "sentencepiece-0.2.0-cp310-cp310-win32.whl", hash = "sha256:a1151d6a6dd4b43e552394aed0edfe9292820272f0194bd56c7c1660a0c06c3d"},
- {file = "sentencepiece-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:d490142b0521ef22bc1085f061d922a2a6666175bb6b42e588ff95c0db6819b2"},
- {file = "sentencepiece-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:17982700c4f6dbb55fa3594f3d7e5dd1c8659a274af3738e33c987d2a27c9d5c"},
- {file = "sentencepiece-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7c867012c0e8bcd5bdad0f791609101cb5c66acb303ab3270218d6debc68a65e"},
- {file = "sentencepiece-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fd6071249c74f779c5b27183295b9202f8dedb68034e716784364443879eaa6"},
- {file = "sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f90c55a65013cbb8f4d7aab0599bf925cde4adc67ae43a0d323677b5a1c6cb"},
- {file = "sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b293734059ef656dcd65be62ff771507bea8fed0a711b6733976e1ed3add4553"},
- {file = "sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e58b47f933aca74c6a60a79dcb21d5b9e47416256c795c2d58d55cec27f9551d"},
- {file = "sentencepiece-0.2.0-cp311-cp311-win32.whl", hash = "sha256:c581258cf346b327c62c4f1cebd32691826306f6a41d8c4bec43b010dee08e75"},
- {file = "sentencepiece-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:0993dbc665f4113017892f1b87c3904a44d0640eda510abcacdfb07f74286d36"},
- {file = "sentencepiece-0.2.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ea5f536e32ea8ec96086ee00d7a4a131ce583a1b18d130711707c10e69601cb2"},
- {file = "sentencepiece-0.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0cb51f53b6aae3c36bafe41e86167c71af8370a039f542c43b0cce5ef24a68c"},
- {file = "sentencepiece-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3212121805afc58d8b00ab4e7dd1f8f76c203ddb9dc94aa4079618a31cf5da0f"},
- {file = "sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a3149e3066c2a75e0d68a43eb632d7ae728c7925b517f4c05c40f6f7280ce08"},
- {file = "sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:632f3594d3e7ac8b367bca204cb3fd05a01d5b21455acd097ea4c0e30e2f63d7"},
- {file = "sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f295105c6bdbb05bd5e1b0cafbd78ff95036f5d3641e7949455a3f4e5e7c3109"},
- {file = "sentencepiece-0.2.0-cp312-cp312-win32.whl", hash = "sha256:fb89f811e5efd18bab141afc3fea3de141c3f69f3fe9e898f710ae7fe3aab251"},
- {file = "sentencepiece-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a673a72aab81fef5ebe755c6e0cc60087d1f3a4700835d40537183c1703a45f"},
- {file = "sentencepiece-0.2.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:4547683f330289ec4f093027bfeb87f9ef023b2eb6f879fdc4a8187c7e0ffb90"},
- {file = "sentencepiece-0.2.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7cd6175f7eaec7142d2bf6f6597ce7db4c9ac89acf93fcdb17410c3a8b781eeb"},
- {file = "sentencepiece-0.2.0-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:859ba1acde782609a0910a26a60e16c191a82bf39b5621107552c0cd79fad00f"},
- {file = "sentencepiece-0.2.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcbbef6cc277f8f18f36959e305f10b1c620442d75addc79c21d7073ae581b50"},
- {file = "sentencepiece-0.2.0-cp36-cp36m-win32.whl", hash = "sha256:536b934e244829e3fe6c4f198652cd82da48adb9aa145c9f00889542726dee3d"},
- {file = "sentencepiece-0.2.0-cp36-cp36m-win_amd64.whl", hash = "sha256:0a91aaa3c769b52440df56fafda683b3aa48e3f2169cf7ee5b8c8454a7f3ae9b"},
- {file = "sentencepiece-0.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:787e480ca4c1d08c9985a7eb1eae4345c107729c99e9b5a9a00f2575fc7d4b4b"},
- {file = "sentencepiece-0.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4d158189eb2ecffea3a51edf6d25e110b3678ec47f1a40f2d541eafbd8f6250"},
- {file = "sentencepiece-0.2.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1e5ca43013e8935f25457a4fca47e315780172c3e821b4b13a890668911c792"},
- {file = "sentencepiece-0.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7140d9e5a74a0908493bb4a13f1f16a401297bd755ada4c707e842fbf6f0f5bf"},
- {file = "sentencepiece-0.2.0-cp37-cp37m-win32.whl", hash = "sha256:6cf333625234f247ab357b0bd9836638405ea9082e1543d5b8408f014979dcbf"},
- {file = "sentencepiece-0.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:ff88712338b01031910e8e61e7239aff3ce8869ee31a47df63cb38aadd591bea"},
- {file = "sentencepiece-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20813a68d4c221b1849c62c30e1281ea81687894d894b8d4a0f4677d9311e0f5"},
- {file = "sentencepiece-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:926ef920ae2e8182db31d3f5d081ada57804e3e1d3a8c4ef8b117f9d9fb5a945"},
- {file = "sentencepiece-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:89f65f69636b7e9c015b79dff9c9985a9bc7d19ded6f79ef9f1ec920fdd73ecf"},
- {file = "sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f67eae0dbe6f2d7d6ba50a354623d787c99965f068b81e145d53240198021b0"},
- {file = "sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:98501e075f35dd1a1d5a20f65be26839fcb1938752ec61539af008a5aa6f510b"},
- {file = "sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3d1d2cc4882e8d6a1adf9d5927d7716f80617fc693385661caff21888972269"},
- {file = "sentencepiece-0.2.0-cp38-cp38-win32.whl", hash = "sha256:b99a308a2e5e569031ab164b74e6fab0b6f37dfb493c32f7816225f4d411a6dd"},
- {file = "sentencepiece-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:cdb701eec783d3ec86b7cd4c763adad8eaf6b46db37ee1c36e5e6c44b3fe1b5f"},
- {file = "sentencepiece-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1e0f9c4d0a6b0af59b613175f019916e28ade076e21242fd5be24340d8a2f64a"},
- {file = "sentencepiece-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:298f21cc1366eb60311aedba3169d30f885c363ddbf44214b0a587d2908141ad"},
- {file = "sentencepiece-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3f1ec95aa1e5dab11f37ac7eff190493fd87770f7a8b81ebc9dd768d1a3c8704"},
- {file = "sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b06b70af54daa4b4904cbb90b4eb6d35c9f3252fdc86c9c32d5afd4d30118d8"},
- {file = "sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22e37bac44dd6603388cb598c64ff7a76e41ca774646f21c23aadfbf5a2228ab"},
- {file = "sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0461324897735512a32d222e3d886e24ad6a499761952b6bda2a9ee6e4313ea5"},
- {file = "sentencepiece-0.2.0-cp39-cp39-win32.whl", hash = "sha256:38aed822fb76435fa1f12185f10465a94ab9e51d5e8a9159e9a540ce926f0ffd"},
- {file = "sentencepiece-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:d8cf876516548b5a1d6ac4745d8b554f5c07891d55da557925e5c13ff0b4e6ad"},
- {file = "sentencepiece-0.2.0.tar.gz", hash = "sha256:a52c19171daaf2e697dc6cbe67684e0fa341b1248966f6aebb541de654d15843"},
+ {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"},
+ {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"},
]
+[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""]
+core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
+type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"]
+
[[package]]
name = "six"
version = "1.16.0"
description = "Python 2 and 3 compatibility utilities"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+groups = ["main"]
files = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
@@ -2531,6 +2962,7 @@ version = "7.0.4"
description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)"
optional = false
python-versions = "<4.0,>=3.7"
+groups = ["main"]
files = [
{file = "smart_open-7.0.4-py3-none-any.whl", hash = "sha256:4e98489932b3372595cddc075e6033194775165702887216b65eba760dfd8d47"},
{file = "smart_open-7.0.4.tar.gz", hash = "sha256:62b65852bdd1d1d516839fcb1f6bc50cd0f16e05b4ec44b52f43d38bcb838524"},
@@ -2556,6 +2988,7 @@ version = "1.3.1"
description = "Sniff out which async library your code is running under"
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@@ -2563,13 +2996,14 @@ files = [
[[package]]
name = "sudachidict-core"
-version = "20240409"
+version = "20251022"
description = "Sudachi Dictionary for SudachiPy - Core Edition"
optional = false
python-versions = "*"
+groups = ["main"]
files = [
- {file = "SudachiDict-core-20240409.tar.gz", hash = "sha256:341eb2fdf1ce3a0db329213b01e0dea2f0e3db26ea1f5244c43c4a1cd739d41e"},
- {file = "SudachiDict_core-20240409-py3-none-any.whl", hash = "sha256:99b165574f9fe7a42c9caee2f4f274d22f8c99602eaba2863575bbc09020a2fb"},
+ {file = "sudachidict_core-20251022-py3-none-any.whl", hash = "sha256:ca67fe366c4cf3a35f4feef019fdb98a0c17129c66e5c0ececeae30dc318c016"},
+ {file = "sudachidict_core-20251022.tar.gz", hash = "sha256:2b25ffb00c7018c9d4af312f11d833fb710b680c4bfe8e3545da1e6fc18713fb"},
]
[package.dependencies]
@@ -2577,48 +3011,52 @@ SudachiPy = ">=0.5,<0.7"
[[package]]
name = "sudachipy"
-version = "0.6.8"
+version = "0.6.10"
description = "Python version of Sudachi, the Japanese Morphological Analyzer"
optional = false
python-versions = "*"
-files = [
- {file = "SudachiPy-0.6.8-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:85f91a6ac347d2fbf478ae96e0e08efe7b8e47fb7cdfb770e90611be5669cabb"},
- {file = "SudachiPy-0.6.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:361ef3e3333ef4165b517668878dd80fbed6d3c443659b9dc3236132ea8f7fbb"},
- {file = "SudachiPy-0.6.8-cp310-cp310-win_amd64.whl", hash = "sha256:081c52918bdae35f564637db146389f0a48b3b5263f215859b4d1ae311a7a474"},
- {file = "SudachiPy-0.6.8-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:0a6cb506e402933023ea07035fc3e81d65880392afcdb2f09676027882b09e73"},
- {file = "SudachiPy-0.6.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d19db58be100b05362d00d0ad5cd29aff6da31807967b302f35bd43dd59e141f"},
- {file = "SudachiPy-0.6.8-cp311-cp311-win_amd64.whl", hash = "sha256:27833ae472220dc46f934edd9a8839b0134279c0113f7da01d67e424bfe2d0ab"},
- {file = "SudachiPy-0.6.8-cp312-cp312-macosx_10_12_universal2.whl", hash = "sha256:7f75d4627fa141bc02951c5ce17ec7055faf2e9424d10c697e923c27b7936369"},
- {file = "SudachiPy-0.6.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33afa2efa4d98ae3cbea0ab8cc09c71b0405d188074d0c4cef2b2080a51caafe"},
- {file = "SudachiPy-0.6.8-cp312-cp312-win_amd64.whl", hash = "sha256:2a2f22605093ed7994eb7edced2a21c8ac71b9ecc9877e94539414b1a60d172a"},
- {file = "SudachiPy-0.6.8-cp37-cp37m-macosx_10_12_universal2.whl", hash = "sha256:6ab54826d151dcf69dfd168e784887d2701c553cf3f455d28b171e64584a404d"},
- {file = "SudachiPy-0.6.8-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d9aa1890b3f43af0ff691f6de8f770ab9ea58506d9e1ee3c8bb9aae460c58d2"},
- {file = "SudachiPy-0.6.8-cp37-cp37m-win_amd64.whl", hash = "sha256:686a890a376589e78b606548f9d5427a43ce8492edc46bcd09c104d9df594f7c"},
- {file = "SudachiPy-0.6.8-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:8d19395daf8c96e4a14df18c4df634e1f7caa7790917ab089c174ffcbdcaf4c0"},
- {file = "SudachiPy-0.6.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cae943138ef2e9d0126a5a4110dca5d6e5d8f35dc3f909e3ef1aeff3aa565b"},
- {file = "SudachiPy-0.6.8-cp38-cp38-win_amd64.whl", hash = "sha256:e8de107715dcd1d566837c91c6a10572efc171d4969a505176ecb37efe65cb48"},
- {file = "SudachiPy-0.6.8-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:d52ddc5001b0125375419409adee012f8957b15ad1a4017e18f30c54ba69f9b7"},
- {file = "SudachiPy-0.6.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2a7c98f75567bd8488a1597c83f8f6abb4c15c577d0b5f92fa0c31c8304dae4"},
- {file = "SudachiPy-0.6.8-cp39-cp39-win_amd64.whl", hash = "sha256:1ae6e533f98e510e751d7355ec512aff3a7dac73539abb61c731cdcc316a183f"},
- {file = "SudachiPy-0.6.8.tar.gz", hash = "sha256:3d1c9086ff09afacc34d02fdb2112aab7cff1d78f0d4b81f78b9ba01c36d4888"},
+groups = ["main"]
+files = [
+ {file = "SudachiPy-0.6.10-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:418899c5794ec8fd86341d690bdd23bb85f35890540520624a001c751bcfdff0"},
+ {file = "SudachiPy-0.6.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99aeaf4a7bbf4c473929f5a9812226123dac1457fb0d549c5e95192eda3f0859"},
+ {file = "SudachiPy-0.6.10-cp310-cp310-win_amd64.whl", hash = "sha256:efd9c7584ed6dadf9f7d2f4ea616d06207b0d8a805861f9762072733b611b0db"},
+ {file = "SudachiPy-0.6.10-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:e947d907542c8086b7e6d18669f45599b3964eec4e954ad7dd85e4acdaa94793"},
+ {file = "SudachiPy-0.6.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e1c1d8c579cc3af591a6511bffba9f88662eedf5ba32868ca8e3ba3c1051d60"},
+ {file = "SudachiPy-0.6.10-cp311-cp311-win_amd64.whl", hash = "sha256:8af8b3c91a9aaf0f300901967f85805d73e83297da6c56db50002dde3a4514fe"},
+ {file = "SudachiPy-0.6.10-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:efb43fb3b46696ca4510b7dd4c3e490de8dbb7950d7172140dc27a4e69cd5811"},
+ {file = "SudachiPy-0.6.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f8fd0ce37961401c9bdd78c126b2119a0a1669d376feb0b2427c35894ef1428"},
+ {file = "SudachiPy-0.6.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a79b92b0776613481481c1ed0d2e92994b233ed5d29aa365789a1ba521de0a4"},
+ {file = "SudachiPy-0.6.10-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:cc97b5d48f46f9989d97e105f7dd6419da2174888fcc42e55c0e4cd46597ed3b"},
+ {file = "SudachiPy-0.6.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9794b73fffd8099d93e07882ba87eee5edbed0e4f1b94761db8f22c8e5da9904"},
+ {file = "SudachiPy-0.6.10-cp313-cp313-win_amd64.whl", hash = "sha256:0fc5b60920a439c534688237e2651e15e4eaadc166a63182d6e24ac7ef3e4779"},
+ {file = "SudachiPy-0.6.10-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:7455e5cbb4c2cf9294c82345c9d46b344774b4eb23eca917f305ed716d8d5168"},
+ {file = "SudachiPy-0.6.10-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38d0de9e840ac8d199e714a40506792ea5237d0db0c966da16d51fbc74a508d6"},
+ {file = "SudachiPy-0.6.10-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:de4fc5c155479f873f5f7cfb04989ffb41e6a187c566c59efdb7946fc87498fe"},
+ {file = "SudachiPy-0.6.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5e2664dc436798d967c0fd92ae5186a175822eb38d294e2da7dad4417b8625c"},
+ {file = "SudachiPy-0.6.10-cp39-cp39-win_amd64.whl", hash = "sha256:af941d5393b8389acbaf9ec5f50e7b2ef48cb0a875594d9d4347e78e86cf842a"},
+ {file = "sudachipy-0.6.10.tar.gz", hash = "sha256:b8910a4610de98b2c3cb6dc3362fea93e3ba5059f1eb445a68baa9585278f31b"},
]
[package.extras]
-tests = ["sudachidict-core", "tokenizers"]
+tests = ["sudachidict_core", "tokenizers"]
[[package]]
name = "sympy"
-version = "1.12.1"
+version = "1.14.0"
description = "Computer algebra system (CAS) in Python"
optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
+groups = ["main"]
files = [
- {file = "sympy-1.12.1-py3-none-any.whl", hash = "sha256:9b2cbc7f1a640289430e13d2a56f02f867a1da0190f2f99d8968c2f74da0e515"},
- {file = "sympy-1.12.1.tar.gz", hash = "sha256:2877b03f998cd8c08f07cd0de5b767119cd3ef40d09f41c30d722f6686b0fb88"},
+ {file = "sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5"},
+ {file = "sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517"},
]
[package.dependencies]
-mpmath = ">=1.1.0,<1.4.0"
+mpmath = ">=1.1.0,<1.4"
+
+[package.extras]
+dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"]
[[package]]
name = "tabulate"
@@ -2626,6 +3064,7 @@ version = "0.9.0"
description = "Pretty-print tabular data"
optional = false
python-versions = ">=3.7"
+groups = ["main", "dev"]
files = [
{file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"},
{file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"},
@@ -2634,25 +3073,13 @@ files = [
[package.extras]
widechars = ["wcwidth"]
-[[package]]
-name = "tbb"
-version = "2021.13.0"
-description = "Intel® oneAPI Threading Building Blocks (oneTBB)"
-optional = false
-python-versions = "*"
-files = [
- {file = "tbb-2021.13.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:a2567725329639519d46d92a2634cf61e76601dac2f777a05686fea546c4fe4f"},
- {file = "tbb-2021.13.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:aaf667e92849adb012b8874d6393282afc318aca4407fc62f912ee30a22da46a"},
- {file = "tbb-2021.13.0-py3-none-win32.whl", hash = "sha256:6669d26703e9943f6164c6407bd4a237a45007e79b8d3832fe6999576eaaa9ef"},
- {file = "tbb-2021.13.0-py3-none-win_amd64.whl", hash = "sha256:3528a53e4bbe64b07a6112b4c5a00ff3c61924ee46c9c68e004a1ac7ad1f09c3"},
-]
-
[[package]]
name = "threadpoolctl"
version = "3.5.0"
description = "threadpoolctl"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"},
{file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"},
@@ -2664,6 +3091,7 @@ version = "0.6.0"
description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "tiktoken-0.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:277de84ccd8fa12730a6b4067456e5cf72fef6300bea61d506c09e45658d41ac"},
{file = "tiktoken-0.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9c44433f658064463650d61387623735641dcc4b6c999ca30bc0f8ba3fccaf5c"},
@@ -2712,120 +3140,36 @@ blobfile = ["blobfile (>=2)"]
[[package]]
name = "tokenizers"
-version = "0.19.1"
+version = "0.22.1"
description = ""
optional = false
-python-versions = ">=3.7"
-files = [
- {file = "tokenizers-0.19.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:952078130b3d101e05ecfc7fc3640282d74ed26bcf691400f872563fca15ac97"},
- {file = "tokenizers-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82c8b8063de6c0468f08e82c4e198763e7b97aabfe573fd4cf7b33930ca4df77"},
- {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f03727225feaf340ceeb7e00604825addef622d551cbd46b7b775ac834c1e1c4"},
- {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:453e4422efdfc9c6b6bf2eae00d5e323f263fff62b29a8c9cd526c5003f3f642"},
- {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:02e81bf089ebf0e7f4df34fa0207519f07e66d8491d963618252f2e0729e0b46"},
- {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b07c538ba956843833fee1190cf769c60dc62e1cf934ed50d77d5502194d63b1"},
- {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28cab1582e0eec38b1f38c1c1fb2e56bce5dc180acb1724574fc5f47da2a4fe"},
- {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b01afb7193d47439f091cd8f070a1ced347ad0f9144952a30a41836902fe09e"},
- {file = "tokenizers-0.19.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7fb297edec6c6841ab2e4e8f357209519188e4a59b557ea4fafcf4691d1b4c98"},
- {file = "tokenizers-0.19.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2e8a3dd055e515df7054378dc9d6fa8c8c34e1f32777fb9a01fea81496b3f9d3"},
- {file = "tokenizers-0.19.1-cp310-none-win32.whl", hash = "sha256:7ff898780a155ea053f5d934925f3902be2ed1f4d916461e1a93019cc7250837"},
- {file = "tokenizers-0.19.1-cp310-none-win_amd64.whl", hash = "sha256:bea6f9947e9419c2fda21ae6c32871e3d398cba549b93f4a65a2d369662d9403"},
- {file = "tokenizers-0.19.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5c88d1481f1882c2e53e6bb06491e474e420d9ac7bdff172610c4f9ad3898059"},
- {file = "tokenizers-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddf672ed719b4ed82b51499100f5417d7d9f6fb05a65e232249268f35de5ed14"},
- {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:dadc509cc8a9fe460bd274c0e16ac4184d0958117cf026e0ea8b32b438171594"},
- {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfedf31824ca4915b511b03441784ff640378191918264268e6923da48104acc"},
- {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac11016d0a04aa6487b1513a3a36e7bee7eec0e5d30057c9c0408067345c48d2"},
- {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76951121890fea8330d3a0df9a954b3f2a37e3ec20e5b0530e9a0044ca2e11fe"},
- {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b342d2ce8fc8d00f376af068e3274e2e8649562e3bc6ae4a67784ded6b99428d"},
- {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d16ff18907f4909dca9b076b9c2d899114dd6abceeb074eca0c93e2353f943aa"},
- {file = "tokenizers-0.19.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:706a37cc5332f85f26efbe2bdc9ef8a9b372b77e4645331a405073e4b3a8c1c6"},
- {file = "tokenizers-0.19.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:16baac68651701364b0289979ecec728546133e8e8fe38f66fe48ad07996b88b"},
- {file = "tokenizers-0.19.1-cp311-none-win32.whl", hash = "sha256:9ed240c56b4403e22b9584ee37d87b8bfa14865134e3e1c3fb4b2c42fafd3256"},
- {file = "tokenizers-0.19.1-cp311-none-win_amd64.whl", hash = "sha256:ad57d59341710b94a7d9dbea13f5c1e7d76fd8d9bcd944a7a6ab0b0da6e0cc66"},
- {file = "tokenizers-0.19.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:621d670e1b1c281a1c9698ed89451395d318802ff88d1fc1accff0867a06f153"},
- {file = "tokenizers-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d924204a3dbe50b75630bd16f821ebda6a5f729928df30f582fb5aade90c818a"},
- {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4f3fefdc0446b1a1e6d81cd4c07088ac015665d2e812f6dbba4a06267d1a2c95"},
- {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9620b78e0b2d52ef07b0d428323fb34e8ea1219c5eac98c2596311f20f1f9266"},
- {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04ce49e82d100594715ac1b2ce87d1a36e61891a91de774755f743babcd0dd52"},
- {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5c2ff13d157afe413bf7e25789879dd463e5a4abfb529a2d8f8473d8042e28f"},
- {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3174c76efd9d08f836bfccaca7cfec3f4d1c0a4cf3acbc7236ad577cc423c840"},
- {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9d5b6c0e7a1e979bec10ff960fae925e947aab95619a6fdb4c1d8ff3708ce3"},
- {file = "tokenizers-0.19.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a179856d1caee06577220ebcfa332af046d576fb73454b8f4d4b0ba8324423ea"},
- {file = "tokenizers-0.19.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:952b80dac1a6492170f8c2429bd11fcaa14377e097d12a1dbe0ef2fb2241e16c"},
- {file = "tokenizers-0.19.1-cp312-none-win32.whl", hash = "sha256:01d62812454c188306755c94755465505836fd616f75067abcae529c35edeb57"},
- {file = "tokenizers-0.19.1-cp312-none-win_amd64.whl", hash = "sha256:b70bfbe3a82d3e3fb2a5e9b22a39f8d1740c96c68b6ace0086b39074f08ab89a"},
- {file = "tokenizers-0.19.1-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:bb9dfe7dae85bc6119d705a76dc068c062b8b575abe3595e3c6276480e67e3f1"},
- {file = "tokenizers-0.19.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:1f0360cbea28ea99944ac089c00de7b2e3e1c58f479fb8613b6d8d511ce98267"},
- {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:71e3ec71f0e78780851fef28c2a9babe20270404c921b756d7c532d280349214"},
- {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b82931fa619dbad979c0ee8e54dd5278acc418209cc897e42fac041f5366d626"},
- {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e8ff5b90eabdcdaa19af697885f70fe0b714ce16709cf43d4952f1f85299e73a"},
- {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e742d76ad84acbdb1a8e4694f915fe59ff6edc381c97d6dfdd054954e3478ad4"},
- {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d8c5d59d7b59885eab559d5bc082b2985555a54cda04dda4c65528d90ad252ad"},
- {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b2da5c32ed869bebd990c9420df49813709e953674c0722ff471a116d97b22d"},
- {file = "tokenizers-0.19.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:638e43936cc8b2cbb9f9d8dde0fe5e7e30766a3318d2342999ae27f68fdc9bd6"},
- {file = "tokenizers-0.19.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:78e769eb3b2c79687d9cb0f89ef77223e8e279b75c0a968e637ca7043a84463f"},
- {file = "tokenizers-0.19.1-cp37-none-win32.whl", hash = "sha256:72791f9bb1ca78e3ae525d4782e85272c63faaef9940d92142aa3eb79f3407a3"},
- {file = "tokenizers-0.19.1-cp37-none-win_amd64.whl", hash = "sha256:f3bbb7a0c5fcb692950b041ae11067ac54826204318922da754f908d95619fbc"},
- {file = "tokenizers-0.19.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:07f9295349bbbcedae8cefdbcfa7f686aa420be8aca5d4f7d1ae6016c128c0c5"},
- {file = "tokenizers-0.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:10a707cc6c4b6b183ec5dbfc5c34f3064e18cf62b4a938cb41699e33a99e03c1"},
- {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6309271f57b397aa0aff0cbbe632ca9d70430839ca3178bf0f06f825924eca22"},
- {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ad23d37d68cf00d54af184586d79b84075ada495e7c5c0f601f051b162112dc"},
- {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:427c4f0f3df9109314d4f75b8d1f65d9477033e67ffaec4bca53293d3aca286d"},
- {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e83a31c9cf181a0a3ef0abad2b5f6b43399faf5da7e696196ddd110d332519ee"},
- {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c27b99889bd58b7e301468c0838c5ed75e60c66df0d4db80c08f43462f82e0d3"},
- {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bac0b0eb952412b0b196ca7a40e7dce4ed6f6926489313414010f2e6b9ec2adf"},
- {file = "tokenizers-0.19.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8a6298bde623725ca31c9035a04bf2ef63208d266acd2bed8c2cb7d2b7d53ce6"},
- {file = "tokenizers-0.19.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:08a44864e42fa6d7d76d7be4bec62c9982f6f6248b4aa42f7302aa01e0abfd26"},
- {file = "tokenizers-0.19.1-cp38-none-win32.whl", hash = "sha256:1de5bc8652252d9357a666e609cb1453d4f8e160eb1fb2830ee369dd658e8975"},
- {file = "tokenizers-0.19.1-cp38-none-win_amd64.whl", hash = "sha256:0bcce02bf1ad9882345b34d5bd25ed4949a480cf0e656bbd468f4d8986f7a3f1"},
- {file = "tokenizers-0.19.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:0b9394bd204842a2a1fd37fe29935353742be4a3460b6ccbaefa93f58a8df43d"},
- {file = "tokenizers-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4692ab92f91b87769d950ca14dbb61f8a9ef36a62f94bad6c82cc84a51f76f6a"},
- {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6258c2ef6f06259f70a682491c78561d492e885adeaf9f64f5389f78aa49a051"},
- {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c85cf76561fbd01e0d9ea2d1cbe711a65400092bc52b5242b16cfd22e51f0c58"},
- {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:670b802d4d82bbbb832ddb0d41df7015b3e549714c0e77f9bed3e74d42400fbe"},
- {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:85aa3ab4b03d5e99fdd31660872249df5e855334b6c333e0bc13032ff4469c4a"},
- {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbf001afbbed111a79ca47d75941e9e5361297a87d186cbfc11ed45e30b5daba"},
- {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4c89aa46c269e4e70c4d4f9d6bc644fcc39bb409cb2a81227923404dd6f5227"},
- {file = "tokenizers-0.19.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:39c1ec76ea1027438fafe16ecb0fb84795e62e9d643444c1090179e63808c69d"},
- {file = "tokenizers-0.19.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c2a0d47a89b48d7daa241e004e71fb5a50533718897a4cd6235cb846d511a478"},
- {file = "tokenizers-0.19.1-cp39-none-win32.whl", hash = "sha256:61b7fe8886f2e104d4caf9218b157b106207e0f2a4905c9c7ac98890688aabeb"},
- {file = "tokenizers-0.19.1-cp39-none-win_amd64.whl", hash = "sha256:f97660f6c43efd3e0bfd3f2e3e5615bf215680bad6ee3d469df6454b8c6e8256"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3b11853f17b54c2fe47742c56d8a33bf49ce31caf531e87ac0d7d13d327c9334"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d26194ef6c13302f446d39972aaa36a1dda6450bc8949f5eb4c27f51191375bd"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e8d1ed93beda54bbd6131a2cb363a576eac746d5c26ba5b7556bc6f964425594"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca407133536f19bdec44b3da117ef0d12e43f6d4b56ac4c765f37eca501c7bda"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce05fde79d2bc2e46ac08aacbc142bead21614d937aac950be88dc79f9db9022"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:35583cd46d16f07c054efd18b5d46af4a2f070a2dd0a47914e66f3ff5efb2b1e"},
- {file = "tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:43350270bfc16b06ad3f6f07eab21f089adb835544417afda0f83256a8bf8b75"},
- {file = "tokenizers-0.19.1-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b4399b59d1af5645bcee2072a463318114c39b8547437a7c2d6a186a1b5a0e2d"},
- {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6852c5b2a853b8b0ddc5993cd4f33bfffdca4fcc5d52f89dd4b8eada99379285"},
- {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bcd266ae85c3d39df2f7e7d0e07f6c41a55e9a3123bb11f854412952deacd828"},
- {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecb2651956eea2aa0a2d099434134b1b68f1c31f9a5084d6d53f08ed43d45ff2"},
- {file = "tokenizers-0.19.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:b279ab506ec4445166ac476fb4d3cc383accde1ea152998509a94d82547c8e2a"},
- {file = "tokenizers-0.19.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:89183e55fb86e61d848ff83753f64cded119f5d6e1f553d14ffee3700d0a4a49"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2edbc75744235eea94d595a8b70fe279dd42f3296f76d5a86dde1d46e35f574"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:0e64bfde9a723274e9a71630c3e9494ed7b4c0f76a1faacf7fe294cd26f7ae7c"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0b5ca92bfa717759c052e345770792d02d1f43b06f9e790ca0a1db62838816f3"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f8a20266e695ec9d7a946a019c1d5ca4eddb6613d4f466888eee04f16eedb85"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63c38f45d8f2a2ec0f3a20073cccb335b9f99f73b3c69483cd52ebc75369d8a1"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dd26e3afe8a7b61422df3176e06664503d3f5973b94f45d5c45987e1cb711876"},
- {file = "tokenizers-0.19.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:eddd5783a4a6309ce23432353cdb36220e25cbb779bfa9122320666508b44b88"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:56ae39d4036b753994476a1b935584071093b55c7a72e3b8288e68c313ca26e7"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:f9939ca7e58c2758c01b40324a59c034ce0cebad18e0d4563a9b1beab3018243"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6c330c0eb815d212893c67a032e9dc1b38a803eccb32f3e8172c19cc69fbb439"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec11802450a2487cdf0e634b750a04cbdc1c4d066b97d94ce7dd2cb51ebb325b"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2b718f316b596f36e1dae097a7d5b91fc5b85e90bf08b01ff139bd8953b25af"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:ed69af290c2b65169f0ba9034d1dc39a5db9459b32f1dd8b5f3f32a3fcf06eab"},
- {file = "tokenizers-0.19.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f8a9c828277133af13f3859d1b6bf1c3cb6e9e1637df0e45312e6b7c2e622b1f"},
- {file = "tokenizers-0.19.1.tar.gz", hash = "sha256:ee59e6680ed0fdbe6b724cf38bd70400a0c1dd623b07ac729087270caeac88e3"},
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+ {file = "tokenizers-0.22.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:59fdb013df17455e5f950b4b834a7b3ee2e0271e6378ccb33aa74d178b513c73"},
+ {file = "tokenizers-0.22.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:8d4e484f7b0827021ac5f9f71d4794aaef62b979ab7608593da22b1d2e3c4edc"},
+ {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19d2962dd28bc67c1f205ab180578a78eef89ac60ca7ef7cbe9635a46a56422a"},
+ {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:38201f15cdb1f8a6843e6563e6e79f4abd053394992b9bbdf5213ea3469b4ae7"},
+ {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1cbe5454c9a15df1b3443c726063d930c16f047a3cc724b9e6e1a91140e5a21"},
+ {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7d094ae6312d69cc2a872b54b91b309f4f6fbce871ef28eb27b52a98e4d0214"},
+ {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afd7594a56656ace95cdd6df4cca2e4059d294c5cfb1679c57824b605556cb2f"},
+ {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2ef6063d7a84994129732b47e7915e8710f27f99f3a3260b8a38fc7ccd083f4"},
+ {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ba0a64f450b9ef412c98f6bcd2a50c6df6e2443b560024a09fa6a03189726879"},
+ {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:331d6d149fa9c7d632cde4490fb8bbb12337fa3a0232e77892be656464f4b446"},
+ {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:607989f2ea68a46cb1dfbaf3e3aabdf3f21d8748312dbeb6263d1b3b66c5010a"},
+ {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a0f307d490295717726598ef6fa4f24af9d484809223bbc253b201c740a06390"},
+ {file = "tokenizers-0.22.1-cp39-abi3-win32.whl", hash = "sha256:b5120eed1442765cd90b903bb6cfef781fd8fe64e34ccaecbae4c619b7b12a82"},
+ {file = "tokenizers-0.22.1-cp39-abi3-win_amd64.whl", hash = "sha256:65fd6e3fb11ca1e78a6a93602490f134d1fdeb13bcef99389d5102ea318ed138"},
+ {file = "tokenizers-0.22.1.tar.gz", hash = "sha256:61de6522785310a309b3407bac22d99c4db5dba349935e99e4d15ea2226af2d9"},
]
[package.dependencies]
-huggingface-hub = ">=0.16.4,<1.0"
+huggingface-hub = ">=0.16.4,<2.0"
[package.extras]
dev = ["tokenizers[testing]"]
docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"]
-testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"]
+testing = ["black (==22.3)", "datasets", "numpy", "pytest", "pytest-asyncio", "requests", "ruff"]
[[package]]
name = "tomli"
@@ -2833,64 +3177,143 @@ version = "2.0.1"
description = "A lil' TOML parser"
optional = false
python-versions = ">=3.7"
+groups = ["main", "dev"]
files = [
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
]
+markers = {dev = "python_version < \"3.11\""}
[[package]]
name = "torch"
-version = "2.3.1"
+version = "2.7.1"
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
optional = false
-python-versions = ">=3.8.0"
-files = [
- {file = "torch-2.3.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:605a25b23944be5ab7c3467e843580e1d888b8066e5aaf17ff7bf9cc30001cc3"},
- {file = "torch-2.3.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:f2357eb0965583a0954d6f9ad005bba0091f956aef879822274b1bcdb11bd308"},
- {file = "torch-2.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:32b05fe0d1ada7f69c9f86c14ff69b0ef1957a5a54199bacba63d22d8fab720b"},
- {file = "torch-2.3.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:7c09a94362778428484bcf995f6004b04952106aee0ef45ff0b4bab484f5498d"},
- {file = "torch-2.3.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:b2ec81b61bb094ea4a9dee1cd3f7b76a44555375719ad29f05c0ca8ef596ad39"},
- {file = "torch-2.3.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:490cc3d917d1fe0bd027057dfe9941dc1d6d8e3cae76140f5dd9a7e5bc7130ab"},
- {file = "torch-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:5802530783bd465fe66c2df99123c9a54be06da118fbd785a25ab0a88123758a"},
- {file = "torch-2.3.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:a7dd4ed388ad1f3d502bf09453d5fe596c7b121de7e0cfaca1e2017782e9bbac"},
- {file = "torch-2.3.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:a486c0b1976a118805fc7c9641d02df7afbb0c21e6b555d3bb985c9f9601b61a"},
- {file = "torch-2.3.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:224259821fe3e4c6f7edf1528e4fe4ac779c77addaa74215eb0b63a5c474d66c"},
- {file = "torch-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:e5fdccbf6f1334b2203a61a0e03821d5845f1421defe311dabeae2fc8fbeac2d"},
- {file = "torch-2.3.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:3c333dc2ebc189561514eda06e81df22bf8fb64e2384746b2cb9f04f96d1d4c8"},
- {file = "torch-2.3.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:07e9ba746832b8d069cacb45f312cadd8ad02b81ea527ec9766c0e7404bb3feb"},
- {file = "torch-2.3.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:462d1c07dbf6bb5d9d2f3316fee73a24f3d12cd8dacf681ad46ef6418f7f6626"},
- {file = "torch-2.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:ff60bf7ce3de1d43ad3f6969983f321a31f0a45df3690921720bcad6a8596cc4"},
- {file = "torch-2.3.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:bee0bd33dc58aa8fc8a7527876e9b9a0e812ad08122054a5bff2ce5abf005b10"},
- {file = "torch-2.3.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:aaa872abde9a3d4f91580f6396d54888620f4a0b92e3976a6034759df4b961ad"},
- {file = "torch-2.3.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:3d7a7f7ef21a7520510553dc3938b0c57c116a7daee20736a9e25cbc0e832bdc"},
- {file = "torch-2.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:4777f6cefa0c2b5fa87223c213e7b6f417cf254a45e5829be4ccd1b2a4ee1011"},
- {file = "torch-2.3.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:2bb5af780c55be68fe100feb0528d2edebace1d55cb2e351de735809ba7391eb"},
+python-versions = ">=3.9.0"
+groups = ["main"]
+markers = "python_version >= \"3.12\""
+files = [
+ {file = "torch-2.7.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a103b5d782af5bd119b81dbcc7ffc6fa09904c423ff8db397a1e6ea8fd71508f"},
+ {file = "torch-2.7.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:fe955951bdf32d182ee8ead6c3186ad54781492bf03d547d31771a01b3d6fb7d"},
+ {file = "torch-2.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:885453d6fba67d9991132143bf7fa06b79b24352f4506fd4d10b309f53454162"},
+ {file = "torch-2.7.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d72acfdb86cee2a32c0ce0101606f3758f0d8bb5f8f31e7920dc2809e963aa7c"},
+ {file = "torch-2.7.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:236f501f2e383f1cb861337bdf057712182f910f10aeaf509065d54d339e49b2"},
+ {file = "torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:06eea61f859436622e78dd0cdd51dbc8f8c6d76917a9cf0555a333f9eac31ec1"},
+ {file = "torch-2.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:8273145a2e0a3c6f9fd2ac36762d6ee89c26d430e612b95a99885df083b04e52"},
+ {file = "torch-2.7.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:aea4fc1bf433d12843eb2c6b2204861f43d8364597697074c8d38ae2507f8730"},
+ {file = "torch-2.7.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ea1e518df4c9de73af7e8a720770f3628e7f667280bce2be7a16292697e3fa"},
+ {file = "torch-2.7.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c33360cfc2edd976c2633b3b66c769bdcbbf0e0b6550606d188431c81e7dd1fc"},
+ {file = "torch-2.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:d8bf6e1856ddd1807e79dc57e54d3335f2b62e6f316ed13ed3ecfe1fc1df3d8b"},
+ {file = "torch-2.7.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:787687087412c4bd68d315e39bc1223f08aae1d16a9e9771d95eabbb04ae98fb"},
+ {file = "torch-2.7.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:03563603d931e70722dce0e11999d53aa80a375a3d78e6b39b9f6805ea0a8d28"},
+ {file = "torch-2.7.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d632f5417b6980f61404a125b999ca6ebd0b8b4bbdbb5fbbba44374ab619a412"},
+ {file = "torch-2.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:23660443e13995ee93e3d844786701ea4ca69f337027b05182f5ba053ce43b38"},
+ {file = "torch-2.7.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:0da4f4dba9f65d0d203794e619fe7ca3247a55ffdcbd17ae8fb83c8b2dc9b585"},
+ {file = "torch-2.7.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:e08d7e6f21a617fe38eeb46dd2213ded43f27c072e9165dc27300c9ef9570934"},
+ {file = "torch-2.7.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:30207f672328a42df4f2174b8f426f354b2baa0b7cca3a0adb3d6ab5daf00dc8"},
+ {file = "torch-2.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:79042feca1c634aaf6603fe6feea8c6b30dfa140a6bbc0b973e2260c7e79a22e"},
+ {file = "torch-2.7.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:988b0cbc4333618a1056d2ebad9eb10089637b659eb645434d0809d8d937b946"},
+ {file = "torch-2.7.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:e0d81e9a12764b6f3879a866607c8ae93113cbcad57ce01ebde63eb48a576369"},
+ {file = "torch-2.7.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:8394833c44484547ed4a47162318337b88c97acdb3273d85ea06e03ffff44998"},
+ {file = "torch-2.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:df41989d9300e6e3c19ec9f56f856187a6ef060c3662fe54f4b6baf1fc90bd19"},
+ {file = "torch-2.7.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a737b5edd1c44a5c1ece2e9f3d00df9d1b3fb9541138bee56d83d38293fb6c9d"},
]
[package.dependencies]
filelock = "*"
fsspec = "*"
jinja2 = "*"
-mkl = {version = ">=2021.1.1,<=2021.4.0", markers = "platform_system == \"Windows\""}
networkx = "*"
-nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cudnn-cu12 = {version = "8.9.2.26", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-sympy = "*"
-triton = {version = "2.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""}
-typing-extensions = ">=4.8.0"
+nvidia-cublas-cu12 = {version = "12.6.4.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-cupti-cu12 = {version = "12.6.80", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-nvrtc-cu12 = {version = "12.6.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-runtime-cu12 = {version = "12.6.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cudnn-cu12 = {version = "9.5.1.17", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cufft-cu12 = {version = "11.3.0.4", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cufile-cu12 = {version = "1.11.1.6", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-curand-cu12 = {version = "10.3.7.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusolver-cu12 = {version = "11.7.1.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusparse-cu12 = {version = "12.5.4.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusparselt-cu12 = {version = "0.6.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nccl-cu12 = {version = "2.26.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nvjitlink-cu12 = {version = "12.6.85", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nvtx-cu12 = {version = "12.6.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+setuptools = {version = "*", markers = "python_version >= \"3.12\""}
+sympy = ">=1.13.3"
+triton = {version = "3.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+typing-extensions = ">=4.10.0"
+
+[package.extras]
+opt-einsum = ["opt-einsum (>=3.3)"]
+optree = ["optree (>=0.13.0)"]
+
+[[package]]
+name = "torch"
+version = "2.9.0"
+description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+markers = "python_version <= \"3.11\""
+files = [
+ {file = "torch-2.9.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:030bbfe367379ae6a4ae4042b6c44da25383343b8b3c68abaa9c7231efbaf2dd"},
+ {file = "torch-2.9.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:51cb63902182a78e90886e8068befd8ea102af4b00e420263591a3d70c7d3c6c"},
+ {file = "torch-2.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:3f6aad4d2f0ee2248bac25339d74858ff846c3969b27d14ac235821f055af83d"},
+ {file = "torch-2.9.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:413e1654c9203733138858780e184d9fc59442f0b3b209e16f39354eb893db9b"},
+ {file = "torch-2.9.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c596708b5105d0b199215acf0c9be7c1db5f1680d88eddadf4b75a299259a677"},
+ {file = "torch-2.9.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:51de31219c97c51cf4bf2be94d622e3deb5dcc526c6dc00e97c17eaec0fc1d67"},
+ {file = "torch-2.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd515c70059afd95f48b8192733764c08ca37a1d19803af6401b5ecad7c8676e"},
+ {file = "torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:614a185e4986326d526a91210c8fc1397e76e8cfafa78baf6296a790e53a9eec"},
+ {file = "torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642"},
+ {file = "torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6"},
+ {file = "torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b"},
+ {file = "torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:33f58e9a102a91259af289d50525c30323b5c9ae1d31322b6447c0814da68695"},
+ {file = "torch-2.9.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c30a17fc83eeab346913e237c64b15b5ba6407fff812f6c541e322e19bc9ea0e"},
+ {file = "torch-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f25033b8667b57857dfd01458fbf2a9e6a6df1f8def23aef0dc46292f6aa642"},
+ {file = "torch-2.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:d037f1b4ffd25013be4a7bf3651a0a910c68554956c7b2c92ebe87c76475dece"},
+ {file = "torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e4e5b5cba837a2a8d1a497ba9a58dae46fa392593eaa13b871c42f71847503a5"},
+ {file = "torch-2.9.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:64693568f5dc4dbd5f880a478b1cea0201cc6b510d91d1bc54fea86ac5d1a637"},
+ {file = "torch-2.9.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:f8ed31ddd7d10bfb3fbe0b9fe01b1243577f13d75e6f4a0839a283915ce3791e"},
+ {file = "torch-2.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:eff527d4e4846e6f70d2afd8058b73825761203d66576a7e04ea2ecfebcb4ab8"},
+ {file = "torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:f8877779cf56d1ce431a7636703bdb13307f5960bb1af49716d8b179225e0e6a"},
+ {file = "torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7e614fae699838038d888729f82b687c03413c5989ce2a9481f9a7e7a396e0bb"},
+ {file = "torch-2.9.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:dfb5b8cd310ba3436c7e14e8b7833ef658cf3045e50d2bdaed23c8fc517065eb"},
+ {file = "torch-2.9.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b3d29524993a478e46f5d598b249cd824b7ed98d7fba538bd9c4cde6c803948f"},
+ {file = "torch-2.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:71c7578984f5ec0eb645eb4816ac8435fcf3e3e2ae1901bcd2f519a9cafb5125"},
+ {file = "torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:71d9309aee457bbe0b164bce2111cd911c4ed4e847e65d5077dbbcd3aba6befc"},
+ {file = "torch-2.9.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c08fb654d783899e204a32cca758a7ce8a45b2d78eeb89517cc937088316f78e"},
+ {file = "torch-2.9.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ec8feb0099b2daa5728fbc7abb0b05730fd97e0f359ff8bda09865aaa7bd7d4b"},
+ {file = "torch-2.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:695ba920f234ad4170c9c50e28d56c848432f8f530e6bc7f88fcb15ddf338e75"},
+]
+
+[package.dependencies]
+filelock = "*"
+fsspec = ">=0.8.5"
+jinja2 = "*"
+networkx = ">=2.5.1"
+nvidia-cublas-cu12 = {version = "12.8.4.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-cupti-cu12 = {version = "12.8.90", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-nvrtc-cu12 = {version = "12.8.93", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-runtime-cu12 = {version = "12.8.90", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cudnn-cu12 = {version = "9.10.2.21", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cufft-cu12 = {version = "11.3.3.83", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cufile-cu12 = {version = "1.13.1.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-curand-cu12 = {version = "10.3.9.90", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusolver-cu12 = {version = "11.7.3.90", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusparse-cu12 = {version = "12.5.8.93", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusparselt-cu12 = {version = "0.7.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nccl-cu12 = {version = "2.27.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nvjitlink-cu12 = {version = "12.8.93", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nvshmem-cu12 = {version = "3.3.20", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nvtx-cu12 = {version = "12.8.90", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+sympy = ">=1.13.3"
+triton = {version = "3.5.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+typing-extensions = ">=4.10.0"
[package.extras]
opt-einsum = ["opt-einsum (>=3.3)"]
-optree = ["optree (>=0.9.1)"]
+optree = ["optree (>=0.13.0)"]
+pyyaml = ["pyyaml"]
[[package]]
name = "tqdm"
@@ -2898,6 +3321,7 @@ version = "4.66.4"
description = "Fast, Extensible Progress Meter"
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "tqdm-4.66.4-py3-none-any.whl", hash = "sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644"},
{file = "tqdm-4.66.4.tar.gz", hash = "sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb"},
@@ -2914,19 +3338,20 @@ telegram = ["requests"]
[[package]]
name = "transformers"
-version = "4.41.2"
+version = "4.57.1"
description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
optional = false
-python-versions = ">=3.8.0"
+python-versions = ">=3.9.0"
+groups = ["main"]
files = [
- {file = "transformers-4.41.2-py3-none-any.whl", hash = "sha256:05555d20e43f808de1ef211ab64803cdb513170cef70d29a888b589caebefc67"},
- {file = "transformers-4.41.2.tar.gz", hash = "sha256:80a4db216533d573e9cc7388646c31ed9480918feb7c55eb211249cb23567f87"},
+ {file = "transformers-4.57.1-py3-none-any.whl", hash = "sha256:b10d05da8fa67dc41644dbbf9bc45a44cb86ae33da6f9295f5fbf5b7890bd267"},
+ {file = "transformers-4.57.1.tar.gz", hash = "sha256:f06c837959196c75039809636cd964b959f6604b75b8eeec6fdfc0440b89cc55"},
]
[package.dependencies]
filelock = "*"
fugashi = {version = ">=1.0", optional = true, markers = "extra == \"ja\""}
-huggingface-hub = ">=0.23.0,<1.0"
+huggingface-hub = ">=0.34.0,<1.0"
ipadic = {version = ">=1.0.0,<2.0", optional = true, markers = "extra == \"ja\""}
numpy = ">=1.17"
packaging = ">=20.0"
@@ -2935,80 +3360,120 @@ pyyaml = ">=5.1"
regex = "!=2019.12.17"
requests = "*"
rhoknp = {version = ">=1.1.0,<1.3.1", optional = true, markers = "extra == \"ja\""}
-safetensors = ">=0.4.1"
+safetensors = ">=0.4.3"
sentencepiece = {version = ">=0.1.91,<0.1.92 || >0.1.92", optional = true, markers = "extra == \"sentencepiece\""}
-sudachidict-core = {version = ">=20220729", optional = true, markers = "extra == \"ja\""}
+sudachidict_core = {version = ">=20220729", optional = true, markers = "extra == \"ja\""}
sudachipy = {version = ">=0.6.6", optional = true, markers = "extra == \"ja\""}
-tokenizers = ">=0.19,<0.20"
+tokenizers = ">=0.22.0,<=0.23.0"
tqdm = ">=4.27"
unidic = {version = ">=1.0.2", optional = true, markers = "extra == \"ja\""}
-unidic-lite = {version = ">=1.0.7", optional = true, markers = "extra == \"ja\""}
+unidic_lite = {version = ">=1.0.7", optional = true, markers = "extra == \"ja\""}
[package.extras]
-accelerate = ["accelerate (>=0.21.0)"]
-agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"]
-all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision"]
+accelerate = ["accelerate (>=0.26.0)"]
+all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "jinja2 (>=3.1.0)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<=0.9)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"]
audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
-codecarbon = ["codecarbon (==1.2.0)"]
-deepspeed = ["accelerate (>=0.21.0)", "deepspeed (>=0.9.3)"]
-deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.21.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
-dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
-dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.19,<0.20)", "urllib3 (<2.0.0)"]
-dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+benchmark = ["optimum-benchmark (>=0.3.0)"]
+chat-template = ["jinja2 (>=3.1.0)"]
+codecarbon = ["codecarbon (>=2.8.1)"]
+deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"]
+deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "optuna", "parameterized (>=0.9)", "protobuf", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"]
+dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fastapi", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "jinja2 (>=3.1.0)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<=0.9)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "onnxconverter-common", "openai (>=1.98.0)", "optax (>=0.0.8,<=0.1.4)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
+dev-tensorflow = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fastapi", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "onnxconverter-common", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "openai (>=1.98.0)", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "tf2onnx", "timeout-decorator", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "urllib3 (<2.0.0)", "uvicorn"]
+dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.6.1,<=0.9)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"]
flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"]
flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
ftfy = ["ftfy"]
-integrations = ["optuna", "ray[tune] (>=2.7.0)", "sigopt"]
-ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"]
+hf-xet = ["hf_xet"]
+hub-kernels = ["kernels (>=0.6.1,<=0.9)"]
+integrations = ["kernels (>=0.6.1,<=0.9)", "optuna", "ray[tune] (>=2.7.0)"]
+ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)"]
+mistral-common = ["mistral-common[opencv] (>=1.6.3)"]
modelcreation = ["cookiecutter (==1.7.3)"]
natten = ["natten (>=0.14.6,<0.15.0)"]
+num2words = ["num2words"]
onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"]
onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
+open-telemetry = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"]
optuna = ["optuna"]
-quality = ["GitPython (<3.1.19)", "datasets (!=2.5.0)", "isort (>=5.5.4)", "ruff (==0.1.5)", "urllib3 (<2.0.0)"]
+quality = ["GitPython (<3.1.19)", "datasets (>=2.15.0)", "libcst", "pandas (<2.3.0)", "rich", "ruff (==0.13.1)", "urllib3 (<2.0.0)"]
ray = ["ray[tune] (>=2.7.0)"]
-retrieval = ["datasets (!=2.5.0)", "faiss-cpu"]
+retrieval = ["datasets (>=2.15.0)", "faiss-cpu"]
+ruff = ["ruff (==0.13.1)"]
sagemaker = ["sagemaker (>=2.31.0)"]
sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"]
-serving = ["fastapi", "pydantic", "starlette", "uvicorn"]
+serving = ["accelerate (>=0.26.0)", "fastapi", "openai (>=1.98.0)", "pydantic (>=2)", "starlette", "torch (>=2.2)", "uvicorn"]
sigopt = ["sigopt"]
sklearn = ["scikit-learn"]
speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
-testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
-tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
-tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
+testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "parameterized (>=0.9)", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"]
+tf = ["keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
+tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<0.24)", "tensorflow-text (<2.16)", "tf2onnx"]
tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
-timm = ["timm"]
-tokenizers = ["tokenizers (>=0.19,<0.20)"]
-torch = ["accelerate (>=0.21.0)", "torch"]
+tiktoken = ["blobfile", "tiktoken"]
+timm = ["timm (!=1.0.18,<=1.0.19)"]
+tokenizers = ["tokenizers (>=0.22.0,<=0.23.0)"]
+torch = ["accelerate (>=0.26.0)", "torch (>=2.2)"]
torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"]
-torchhub = ["filelock", "huggingface-hub (>=0.23.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.19,<0.20)", "torch", "tqdm (>=4.27)"]
-video = ["av (==9.2.0)", "decord (==0.6.0)"]
+torchhub = ["filelock", "huggingface-hub (>=0.34.0,<1.0)", "importlib_metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "tqdm (>=4.27)"]
+video = ["av"]
vision = ["Pillow (>=10.0.1,<=15.0)"]
[[package]]
name = "triton"
-version = "2.3.1"
+version = "3.3.1"
description = "A language and compiler for custom Deep Learning operations"
optional = false
python-versions = "*"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\""
files = [
- {file = "triton-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c84595cbe5e546b1b290d2a58b1494df5a2ef066dd890655e5b8a8a92205c33"},
- {file = "triton-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9d64ae33bcb3a7a18081e3a746e8cf87ca8623ca13d2c362413ce7a486f893e"},
- {file = "triton-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaf80e8761a9e3498aa92e7bf83a085b31959c61f5e8ac14eedd018df6fccd10"},
- {file = "triton-2.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b13bf35a2b659af7159bf78e92798dc62d877aa991de723937329e2d382f1991"},
- {file = "triton-2.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63381e35ded3304704ea867ffde3b7cfc42c16a55b3062d41e017ef510433d66"},
- {file = "triton-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d968264523c7a07911c8fb51b4e0d1b920204dae71491b1fe7b01b62a31e124"},
+ {file = "triton-3.3.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b74db445b1c562844d3cfad6e9679c72e93fdfb1a90a24052b03bb5c49d1242e"},
+ {file = "triton-3.3.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b31e3aa26f8cb3cc5bf4e187bf737cbacf17311e1112b781d4a059353dfd731b"},
+ {file = "triton-3.3.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9999e83aba21e1a78c1f36f21bce621b77bcaa530277a50484a7cb4a822f6e43"},
+ {file = "triton-3.3.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b89d846b5a4198317fec27a5d3a609ea96b6d557ff44b56c23176546023c4240"},
+ {file = "triton-3.3.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3198adb9d78b77818a5388bff89fa72ff36f9da0bc689db2f0a651a67ce6a42"},
+ {file = "triton-3.3.1-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f6139aeb04a146b0b8e0fbbd89ad1e65861c57cfed881f21d62d3cb94a36bab7"},
]
[package.dependencies]
-filelock = "*"
+setuptools = ">=40.8.0"
[package.extras]
build = ["cmake (>=3.20)", "lit"]
-tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"]
-tutorials = ["matplotlib", "pandas", "tabulate", "torch"]
+tests = ["autopep8", "isort", "llnl-hatchet", "numpy", "pytest", "pytest-forked", "pytest-xdist", "scipy (>=1.7.1)"]
+tutorials = ["matplotlib", "pandas", "tabulate"]
+
+[[package]]
+name = "triton"
+version = "3.5.0"
+description = "A language and compiler for custom Deep Learning operations"
+optional = false
+python-versions = "<3.15,>=3.10"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\""
+files = [
+ {file = "triton-3.5.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6f90de6a6566bb619b4c0adc9855729e1b1b5e26533fca1bf6206e96b6d277a3"},
+ {file = "triton-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5d3b3d480debf24eaa739623c9a42446b0b77f95593d30eb1f64cd2278cc1f0"},
+ {file = "triton-3.5.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8457b22148defefdcb7fa8144b05ce211b9faefad650a1ce85b23df488d5549c"},
+ {file = "triton-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f34bfa21c5b3a203c0f0eab28dcc1e49bd1f67d22724e77fb6665a659200a4ec"},
+ {file = "triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da21fccceafc163e3a5e857abe34351ef76345af06cabf9637a914742671f0b"},
+ {file = "triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833"},
+ {file = "triton-3.5.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:188da5b81fa2f8322c27fec1627703eac24cb9bb7ab0dfbe9925973bc1b070d3"},
+ {file = "triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6bb9aa5519c084a333acdba443789e50012a4b851cd486c54f0b8dc2a8d3a12"},
+ {file = "triton-3.5.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03127d9b33aaf979c856676b394bc059ec1d68cb6da68ae03f62dd8ad77a04ae"},
+ {file = "triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c83f2343e1a220a716c7b3ab9fccfcbe3ad4020d189549200e2d2e8d5868bed9"},
+ {file = "triton-3.5.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:468936651d383f4a6d10068d34a627505e13af55be5d002b9f27b987e7a5f0ac"},
+ {file = "triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da0fa67ccd76c3dcfb0bffe1b1c57c685136a6bd33d141c24d9655d4185b1289"},
+ {file = "triton-3.5.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7ceef21410229ac23173a28eee5cfc0e37c1dfdb8b4bc11ecda2e3ecec7c686"},
+ {file = "triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7"},
+]
+
+[package.extras]
+build = ["cmake (>=3.20,<4.0)", "lit"]
+tests = ["autopep8", "isort", "llnl-hatchet", "numpy", "pytest", "pytest-forked", "pytest-xdist", "scipy (>=1.7.1)"]
+tutorials = ["matplotlib", "pandas", "tabulate"]
[[package]]
name = "typing-extensions"
@@ -3016,6 +3481,7 @@ version = "4.12.1"
description = "Backported and Experimental Type Hints for Python 3.8+"
optional = false
python-versions = ">=3.8"
+groups = ["main", "dev"]
files = [
{file = "typing_extensions-4.12.1-py3-none-any.whl", hash = "sha256:6024b58b69089e5a89c347397254e35f1bf02a907728ec7fee9bf0fe837d203a"},
{file = "typing_extensions-4.12.1.tar.gz", hash = "sha256:915f5e35ff76f56588223f15fdd5938f9a1cf9195c0de25130c627e4d597f6d1"},
@@ -3027,6 +3493,7 @@ version = "2024.1"
description = "Provider of IANA time zone data"
optional = false
python-versions = ">=2"
+groups = ["main"]
files = [
{file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
{file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"},
@@ -3038,6 +3505,7 @@ version = "1.1.0"
description = "UniDic packaged for Python"
optional = false
python-versions = ">=3.5"
+groups = ["main"]
files = [
{file = "unidic-1.1.0.tar.gz", hash = "sha256:0ab91c05de342c84d2a6314901fd3afb9061ecd7534dd4a0431dccbb87d921b7"},
]
@@ -3054,6 +3522,7 @@ version = "1.0.8"
description = "A small version of UniDic packaged for Python"
optional = false
python-versions = "*"
+groups = ["main"]
files = [
{file = "unidic-lite-1.0.8.tar.gz", hash = "sha256:db9d4572d9fdd4d00a97949d4b0741ec480ee05a7e7e2e32f547500dae27b245"},
]
@@ -3064,13 +3533,14 @@ version = "2.2.1"
description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false
python-versions = ">=3.8"
+groups = ["main"]
files = [
{file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"},
{file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"},
]
[package.extras]
-brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
+brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""]
h2 = ["h2 (>=4,<5)"]
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
zstd = ["zstandard (>=0.18.0)"]
@@ -3081,6 +3551,7 @@ version = "0.10.1"
description = "A lightweight console printing and formatting toolkit"
optional = false
python-versions = "*"
+groups = ["main"]
files = [
{file = "wasabi-0.10.1-py3-none-any.whl", hash = "sha256:fe862cc24034fbc9f04717cd312ab884f71f51a8ecabebc3449b751c2a649d83"},
{file = "wasabi-0.10.1.tar.gz", hash = "sha256:c8e372781be19272942382b14d99314d175518d7822057cb7a97010c4259d249"},
@@ -3092,13 +3563,15 @@ version = "1.1.0"
description = "A small Python utility to set file creation time on Windows"
optional = false
python-versions = ">=3.5"
+groups = ["main"]
+markers = "sys_platform == \"win32\""
files = [
{file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"},
{file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"},
]
[package.extras]
-dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"]
+dev = ["black (>=19.3b0) ; python_version >= \"3.6\"", "pytest (>=4.6.2)"]
[[package]]
name = "wrapt"
@@ -3106,6 +3579,7 @@ version = "1.16.0"
description = "Module for decorators, wrappers and monkey patching."
optional = false
python-versions = ">=3.6"
+groups = ["main"]
files = [
{file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"},
{file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"},
@@ -3185,6 +3659,7 @@ version = "3.4.1"
description = "Python binding for xxHash"
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "xxhash-3.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:91dbfa55346ad3e18e738742236554531a621042e419b70ad8f3c1d9c7a16e7f"},
{file = "xxhash-3.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:665a65c2a48a72068fcc4d21721510df5f51f1142541c890491afc80451636d2"},
@@ -3302,6 +3777,7 @@ version = "1.9.4"
description = "Yet another URL library"
optional = false
python-versions = ">=3.7"
+groups = ["main"]
files = [
{file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"},
{file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"},
@@ -3400,6 +3876,6 @@ idna = ">=2.0"
multidict = ">=4.0"
[metadata]
-lock-version = "2.0"
+lock-version = "2.1"
python-versions = ">=3.10,<4.0"
-content-hash = "a2c9ed2cef63429fda1482752acb674fe3b39b94498bbe2c177d0b8ac9558c44"
+content-hash = "f4ea38369c3560805eaf80e8b74de4909777dc51cba333401221fa6787c391bc"
diff --git a/pyproject.toml b/pyproject.toml
index b5d2296..28cea05 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,11 +19,11 @@ python = ">=3.10,<4.0"
jsonargparse = {extras = ["jsonnet"], version = "^4.27.5"}
loguru = "^0.7.2"
scikit-learn = "^1.3.2"
-transformers = {extras = ["ja", "sentencepiece"], version = "^4.38.1"}
+transformers = {version = "^4.57.1", extras = ["ja", "sentencepiece"]}
datasets = ">=2.17"
-sentence-transformers = "^3.0.0"
+sentence-transformers = "5.1.1"
pytest = "7.1.3"
-torch = "^2.3"
+torch = "^2.6"
pydantic = "^2.6.3"
eval-type-backport = "^0.1.3"
smart-open = "^7.0.1"
diff --git a/src/jmteb/__main__.py b/src/jmteb/__main__.py
index ff10884..2dc9478 100644
--- a/src/jmteb/__main__.py
+++ b/src/jmteb/__main__.py
@@ -119,6 +119,7 @@ def main(
)
if args.log_predictions:
+ logger.info("Prediction logging activated.")
for k, v in args.evaluators.items():
if hasattr(v, "log_predictions"):
args.evaluators[k].log_predictions = True
diff --git a/src/jmteb/configs/jmteb.jsonnet b/src/jmteb/configs/jmteb.jsonnet
index 66fd2dc..b27d021 100644
--- a/src/jmteb/configs/jmteb.jsonnet
+++ b/src/jmteb/configs/jmteb.jsonnet
@@ -3,14 +3,16 @@
(import './tasks/amazon_counterfactual_classification.jsonnet') +
(import './tasks/massive_intent_classification.jsonnet') +
(import './tasks/massive_scenario_classification.jsonnet') +
+(import './tasks/japanese_sentiment_classification.jsonnet') +
+(import './tasks/sib200_japanese_classification.jsonnet') +
+(import './tasks/wrime_classification.jsonnet') +
// Clustering
(import './tasks/livedoor_news.jsonnet') +
(import './tasks/mewsc16.jsonnet') +
+(import './tasks/sib200_japanese_clustering.jsonnet') +
// STS
(import './tasks/jsts.jsonnet') +
(import './tasks/jsick.jsonnet') +
-// Pair Classification
-(import './tasks/paws_x_ja.jsonnet') +
// Retrieval
(import './tasks/jagovfaqs_22k.jsonnet') +
(import './tasks/mrtydi.jsonnet') +
@@ -18,5 +20,14 @@
(import './tasks/nlp_journal_title_abs.jsonnet') +
(import './tasks/nlp_journal_title_intro.jsonnet') +
(import './tasks/nlp_journal_abs_intro.jsonnet') +
+(import './tasks/nlp_journal_abs_article.jsonnet') +
+(import './tasks/jacwir_retrieval.jsonnet') +
+(import './tasks/miracl_retrieval.jsonnet') +
+(import './tasks/mldr_retrieval.jsonnet') +
+(import './tasks/mintaka_retrieval.jsonnet') +
// Reranking
-(import './tasks/esci.jsonnet')
\ No newline at end of file
+(import './tasks/esci.jsonnet') +
+(import './tasks/jqara.jsonnet') +
+(import './tasks/jacwir_reranking.jsonnet') +
+(import './tasks/miracl_reranking.jsonnet') +
+(import './tasks/mldr_reranking.jsonnet')
\ No newline at end of file
diff --git a/src/jmteb/configs/tasks/jacwir_reranking.jsonnet b/src/jmteb/configs/tasks/jacwir_reranking.jsonnet
new file mode 100644
index 0000000..eb41d67
--- /dev/null
+++ b/src/jmteb/configs/tasks/jacwir_reranking.jsonnet
@@ -0,0 +1,31 @@
+{
+ jacwir_reranking: {
+ class_path: 'RerankingEvaluator',
+ init_args: {
+ val_query_dataset: {
+ class_path: 'HfRerankingQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'validation',
+ name: 'jacwir-reranking-query',
+ },
+ },
+ test_query_dataset: {
+ class_path: 'HfRerankingQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'jacwir-reranking-query',
+ },
+ },
+ doc_dataset: {
+ class_path: 'HfRerankingDocDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'corpus',
+ name: 'jacwir-reranking-corpus',
+ },
+ },
+ },
+ },
+}
diff --git a/src/jmteb/configs/tasks/jacwir_retrieval.jsonnet b/src/jmteb/configs/tasks/jacwir_retrieval.jsonnet
new file mode 100644
index 0000000..8cdb416
--- /dev/null
+++ b/src/jmteb/configs/tasks/jacwir_retrieval.jsonnet
@@ -0,0 +1,32 @@
+{
+ jacwir_retrieval: {
+ class_path: 'RetrievalEvaluator',
+ init_args: {
+ val_query_dataset: {
+ class_path: 'HfRetrievalQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'validation',
+ name: 'jacwir-retrieval-query',
+ },
+ },
+ test_query_dataset: {
+ class_path: 'HfRetrievalQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'jacwir-retrieval-query',
+ },
+ },
+ doc_dataset: {
+ class_path: 'HfRetrievalDocDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'corpus',
+ name: 'jacwir-retrieval-corpus',
+ },
+ },
+ "doc_chunk_size":10000
+ },
+ },
+}
diff --git a/src/jmteb/configs/tasks/japanese_sentiment_classification.jsonnet b/src/jmteb/configs/tasks/japanese_sentiment_classification.jsonnet
new file mode 100644
index 0000000..f5a847c
--- /dev/null
+++ b/src/jmteb/configs/tasks/japanese_sentiment_classification.jsonnet
@@ -0,0 +1,31 @@
+{
+ japanese_sentiment_classification: {
+ class_path: 'ClassificationEvaluator',
+ init_args: {
+ train_dataset: {
+ class_path: 'HfClassificationDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'train',
+ name: 'japanese_sentiment_classification',
+ },
+ },
+ val_dataset: {
+ class_path: 'HfClassificationDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'validation',
+ name: 'japanese_sentiment_classification',
+ },
+ },
+ test_dataset: {
+ class_path: 'HfClassificationDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'japanese_sentiment_classification',
+ },
+ },
+ },
+ },
+}
diff --git a/src/jmteb/configs/tasks/jqara.jsonnet b/src/jmteb/configs/tasks/jqara.jsonnet
new file mode 100644
index 0000000..1c0ba64
--- /dev/null
+++ b/src/jmteb/configs/tasks/jqara.jsonnet
@@ -0,0 +1,31 @@
+{
+ jqara: {
+ class_path: 'RerankingEvaluator',
+ init_args: {
+ val_query_dataset: {
+ class_path: 'HfRerankingQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'validation',
+ name: 'jqara-query',
+ },
+ },
+ test_query_dataset: {
+ class_path: 'HfRerankingQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'jqara-query',
+ },
+ },
+ doc_dataset: {
+ class_path: 'HfRerankingDocDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'corpus',
+ name: 'jqara-corpus',
+ },
+ },
+ },
+ },
+}
diff --git a/src/jmteb/configs/tasks/mintaka_retrieval.jsonnet b/src/jmteb/configs/tasks/mintaka_retrieval.jsonnet
new file mode 100644
index 0000000..6b17949
--- /dev/null
+++ b/src/jmteb/configs/tasks/mintaka_retrieval.jsonnet
@@ -0,0 +1,32 @@
+{
+ mintaka_retrieval: {
+ class_path: 'RetrievalEvaluator',
+ init_args: {
+ val_query_dataset: {
+ class_path: 'HfRetrievalQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'validation',
+ name: 'mintaka-retrieval-query',
+ },
+ },
+ test_query_dataset: {
+ class_path: 'HfRetrievalQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'mintaka-retrieval-query',
+ },
+ },
+ doc_dataset: {
+ class_path: 'HfRetrievalDocDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'corpus',
+ name: 'mintaka-retrieval-corpus',
+ },
+ },
+ "doc_chunk_size":10000
+ },
+ },
+}
diff --git a/src/jmteb/configs/tasks/miracl_reranking.jsonnet b/src/jmteb/configs/tasks/miracl_reranking.jsonnet
new file mode 100644
index 0000000..b91a341
--- /dev/null
+++ b/src/jmteb/configs/tasks/miracl_reranking.jsonnet
@@ -0,0 +1,31 @@
+{
+ miracl_reranking: {
+ class_path: 'RerankingEvaluator',
+ init_args: {
+ val_query_dataset: {
+ class_path: 'HfRerankingQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'validation',
+ name: 'miracl-reranking-query',
+ },
+ },
+ test_query_dataset: {
+ class_path: 'HfRerankingQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'miracl-reranking-query',
+ },
+ },
+ doc_dataset: {
+ class_path: 'HfRerankingDocDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'corpus',
+ name: 'miracl-reranking-corpus',
+ },
+ },
+ },
+ },
+}
diff --git a/src/jmteb/configs/tasks/miracl_retrieval.jsonnet b/src/jmteb/configs/tasks/miracl_retrieval.jsonnet
new file mode 100644
index 0000000..9b73f4f
--- /dev/null
+++ b/src/jmteb/configs/tasks/miracl_retrieval.jsonnet
@@ -0,0 +1,32 @@
+{
+ miracl_retrieval: {
+ class_path: 'RetrievalEvaluator',
+ init_args: {
+ val_query_dataset: {
+ class_path: 'HfRetrievalQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'validation',
+ name: 'miracl-retrieval-query',
+ },
+ },
+ test_query_dataset: {
+ class_path: 'HfRetrievalQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'miracl-retrieval-query',
+ },
+ },
+ doc_dataset: {
+ class_path: 'HfRetrievalDocDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'corpus',
+ name: 'miracl-retrieval-corpus',
+ },
+ },
+ "doc_chunk_size":10000
+ },
+ },
+}
diff --git a/src/jmteb/configs/tasks/mldr_reranking.jsonnet b/src/jmteb/configs/tasks/mldr_reranking.jsonnet
new file mode 100644
index 0000000..1cbc025
--- /dev/null
+++ b/src/jmteb/configs/tasks/mldr_reranking.jsonnet
@@ -0,0 +1,31 @@
+{
+ mldr_reranking: {
+ class_path: 'RerankingEvaluator',
+ init_args: {
+ val_query_dataset: {
+ class_path: 'HfRerankingQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'validation',
+ name: 'mldr-reranking-query',
+ },
+ },
+ test_query_dataset: {
+ class_path: 'HfRerankingQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'mldr-reranking-query',
+ },
+ },
+ doc_dataset: {
+ class_path: 'HfRerankingDocDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'corpus',
+ name: 'mldr-reranking-corpus',
+ },
+ },
+ },
+ },
+}
diff --git a/src/jmteb/configs/tasks/mldr_retrieval.jsonnet b/src/jmteb/configs/tasks/mldr_retrieval.jsonnet
new file mode 100644
index 0000000..71c0bee
--- /dev/null
+++ b/src/jmteb/configs/tasks/mldr_retrieval.jsonnet
@@ -0,0 +1,32 @@
+{
+ mldr_retrieval: {
+ class_path: 'RetrievalEvaluator',
+ init_args: {
+ val_query_dataset: {
+ class_path: 'HfRetrievalQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'validation',
+ name: 'mldr-retrieval-query',
+ },
+ },
+ test_query_dataset: {
+ class_path: 'HfRetrievalQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'mldr-retrieval-query',
+ },
+ },
+ doc_dataset: {
+ class_path: 'HfRetrievalDocDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'corpus',
+ name: 'mldr-retrieval-corpus',
+ },
+ },
+ "doc_chunk_size":10000
+ },
+ },
+}
diff --git a/src/jmteb/configs/tasks/nlp_journal_abs_article.jsonnet b/src/jmteb/configs/tasks/nlp_journal_abs_article.jsonnet
new file mode 100644
index 0000000..f2c175f
--- /dev/null
+++ b/src/jmteb/configs/tasks/nlp_journal_abs_article.jsonnet
@@ -0,0 +1,31 @@
+{
+ nlp_journal_abs_article: {
+ class_path: 'RetrievalEvaluator',
+ init_args: {
+ val_query_dataset: {
+ class_path: 'HfRetrievalQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'validation',
+ name: 'nlp_journal_abs_article-query',
+ },
+ },
+ test_query_dataset: {
+ class_path: 'HfRetrievalQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'nlp_journal_abs_article-query',
+ },
+ },
+ doc_dataset: {
+ class_path: 'HfRetrievalDocDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'corpus',
+ name: 'nlp_journal_abs_article-corpus',
+ },
+ },
+ },
+ },
+}
diff --git a/src/jmteb/configs/tasks/sib200_japanese_classification.jsonnet b/src/jmteb/configs/tasks/sib200_japanese_classification.jsonnet
new file mode 100644
index 0000000..852505f
--- /dev/null
+++ b/src/jmteb/configs/tasks/sib200_japanese_classification.jsonnet
@@ -0,0 +1,31 @@
+{
+ sib200_japanese_classification: {
+ class_path: 'ClassificationEvaluator',
+ init_args: {
+ train_dataset: {
+ class_path: 'HfClassificationDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'train',
+ name: 'sib200_japanese_classification',
+ },
+ },
+ val_dataset: {
+ class_path: 'HfClassificationDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'validation',
+ name: 'sib200_japanese_classification',
+ },
+ },
+ test_dataset: {
+ class_path: 'HfClassificationDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'sib200_japanese_classification',
+ },
+ },
+ },
+ },
+}
diff --git a/src/jmteb/configs/tasks/paws_x_ja.jsonnet b/src/jmteb/configs/tasks/sib200_japanese_clustering.jsonnet
similarity index 53%
rename from src/jmteb/configs/tasks/paws_x_ja.jsonnet
rename to src/jmteb/configs/tasks/sib200_japanese_clustering.jsonnet
index ee57b72..762d34a 100644
--- a/src/jmteb/configs/tasks/paws_x_ja.jsonnet
+++ b/src/jmteb/configs/tasks/sib200_japanese_clustering.jsonnet
@@ -1,21 +1,21 @@
{
- paws_x_ja: {
- class_path: 'PairClassificationEvaluator',
+ sib200_japanese_clustering: {
+ class_path: 'ClusteringEvaluator',
init_args: {
val_dataset: {
- class_path: 'HfPairClassificationDataset',
+ class_path: 'HfClusteringDataset',
init_args: {
path: 'sbintuitions/JMTEB',
split: 'validation',
- name: 'paws_x_ja',
+ name: 'sib200_japanese_clustering',
},
},
test_dataset: {
- class_path: 'HfPairClassificationDataset',
+ class_path: 'HfClusteringDataset',
init_args: {
path: 'sbintuitions/JMTEB',
split: 'test',
- name: 'paws_x_ja',
+ name: 'sib200_japanese_clustering',
},
},
},
diff --git a/src/jmteb/configs/tasks/wrime_classification.jsonnet b/src/jmteb/configs/tasks/wrime_classification.jsonnet
new file mode 100644
index 0000000..7fb68b7
--- /dev/null
+++ b/src/jmteb/configs/tasks/wrime_classification.jsonnet
@@ -0,0 +1,31 @@
+{
+ wrime_classification: {
+ class_path: 'ClassificationEvaluator',
+ init_args: {
+ train_dataset: {
+ class_path: 'HfClassificationDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'train',
+ name: 'wrime_classification',
+ },
+ },
+ val_dataset: {
+ class_path: 'HfClassificationDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'validation',
+ name: 'wrime_classification',
+ },
+ },
+ test_dataset: {
+ class_path: 'HfClassificationDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'wrime_classification',
+ },
+ },
+ },
+ },
+}
diff --git a/src/jmteb/embedders/__init__.py b/src/jmteb/embedders/__init__.py
index f28f038..5a3e19c 100644
--- a/src/jmteb/embedders/__init__.py
+++ b/src/jmteb/embedders/__init__.py
@@ -2,6 +2,8 @@
from jmteb.embedders.data_parallel_sbert_embedder import (
DataParallelSentenceBertEmbedder,
)
+from jmteb.embedders.gemma_embedder import GemmaEmbedder
from jmteb.embedders.openai_embedder import OpenAIEmbedder
+from jmteb.embedders.plamo_embedder import PlamoEmbedder
from jmteb.embedders.sbert_embedder import SentenceBertEmbedder
from jmteb.embedders.transformers_embedder import TransformersEmbedder
diff --git a/src/jmteb/embedders/base.py b/src/jmteb/embedders/base.py
index ea078f1..42a5d54 100644
--- a/src/jmteb/embedders/base.py
+++ b/src/jmteb/embedders/base.py
@@ -144,3 +144,17 @@ def set_output_tensor(self):
def set_output_numpy(self):
self.convert_to_numpy = True
self.convert_to_tensor = False
+
+ def set_max_seq_length(self, max_seq_length: int | None = None) -> None:
+ if hasattr(self, "max_seq_length"):
+ self.max_seq_length = max_seq_length
+ else:
+ logger.warning("Embedder doesn't have a `max_seq_length` attribute!")
+
+ def reset_max_seq_length(self):
+ orig_max_seq_length = getattr(self, "_orig_max_length", None)
+ if not orig_max_seq_length:
+ logger.warning("Failed to reset `max_seq_length`!")
+ else:
+ logger.info(f"Set `max_seq_length` to model default: {orig_max_seq_length}")
+ self.max_seq_length = orig_max_seq_length
diff --git a/src/jmteb/embedders/data_parallel_sbert_embedder.py b/src/jmteb/embedders/data_parallel_sbert_embedder.py
index 7416fe4..5d932e2 100644
--- a/src/jmteb/embedders/data_parallel_sbert_embedder.py
+++ b/src/jmteb/embedders/data_parallel_sbert_embedder.py
@@ -201,6 +201,7 @@ def __init__(
)
self.dp_model = DPSentenceTransformer(sbert_model=model)
self.model = self.dp_model.sbert
+ self._orig_max_length = self.model.max_seq_length
if max_seq_length:
self.model.max_seq_length = max_seq_length
self.initital_batch_size = batch_size
@@ -258,3 +259,7 @@ def _add_eos_func(self, text: str | list[str]) -> str | list[str]:
def get_output_dim(self) -> int:
return self.model.get_sentence_embedding_dimension()
+
+ def reset_max_seq_length(self):
+ logger.info(f"Reset `max_seq_length` to {self._orig_max_length}")
+ self.model.max_seq_length = self._orig_max_length
diff --git a/src/jmteb/embedders/gemma_embedder.py b/src/jmteb/embedders/gemma_embedder.py
new file mode 100644
index 0000000..5949845
--- /dev/null
+++ b/src/jmteb/embedders/gemma_embedder.py
@@ -0,0 +1,219 @@
+from __future__ import annotations
+
+import numpy as np
+import torch
+from loguru import logger
+from sentence_transformers import SentenceTransformer
+
+from jmteb.embedders.base import TextEmbedder
+
+
+class GemmaEmbedder(TextEmbedder):
+ """
+ Google EmbeddingGemma model embedder using SentenceTransformers.
+
+ This class supports the EmbeddingGemma models from Google (e.g., embeddinggemma-300m).
+ It uses SentenceTransformers to load the model and provides specialized encode_query
+ and encode_document methods for optimal performance in different use cases.
+ """
+
+ def __init__(
+ self,
+ model_name_or_path: str = "google/embeddinggemma-300m",
+ batch_size: int = 32,
+ device: str | None = None,
+ normalize_embeddings: bool = True,
+ max_seq_length: int | None = None,
+ query_mode: bool = False,
+ add_eos: bool = False,
+ truncate_dim: int | None = None,
+ model_kwargs: dict | None = None,
+ tokenizer_kwargs: dict | None = None,
+ ) -> None:
+ """
+ Initialize the EmbeddingGemma embedder using SentenceTransformers.
+
+ Args:
+ model_name_or_path: Path or name of the EmbeddingGemma model
+ batch_size: Batch size for encoding
+ device: Device to use ('cuda', 'cpu', or None for auto)
+ normalize_embeddings: Whether to normalize embeddings (recommended for EmbeddingGemma)
+ max_seq_length: Maximum sequence length (default: model's max, typically 2048)
+ query_mode: Whether to use query encoding mode by default
+ add_eos: Whether to add EOS token to inputs
+ truncate_dim: Truncate embeddings to this dimension (supports 768, 512, 256, 128)
+ model_kwargs: Additional kwargs for model loading
+ tokenizer_kwargs: Additional kwargs for tokenizer loading
+ """
+ model_kwargs = self._model_kwargs_parser(model_kwargs or {})
+
+ # Initialize SentenceTransformer
+ self.model = SentenceTransformer(
+ model_name_or_path,
+ trust_remote_code=True,
+ truncate_dim=truncate_dim,
+ model_kwargs=model_kwargs,
+ tokenizer_kwargs=tokenizer_kwargs or {},
+ )
+
+ # Store original max length and set new one if provided
+ self._orig_max_length = self.model.max_seq_length
+ if max_seq_length:
+ self.model.max_seq_length = max_seq_length
+
+ self.batch_size = batch_size
+ self.device = device
+ self.normalize_embeddings = normalize_embeddings
+ self.max_seq_length = getattr(self.model, "max_seq_length", None)
+ self.add_eos = add_eos
+ self.query_mode = query_mode
+
+ # Set output format based on model kwargs
+ if model_kwargs and "torch_dtype" in model_kwargs:
+ self.set_output_tensor()
+ else:
+ self.set_output_numpy()
+
+ logger.info(f"Loaded EmbeddingGemma model: {model_name_or_path}")
+ logger.info(f"Model device: {self.model.device}, Max seq length: {self.max_seq_length}")
+
+ def encode(self, text: str | list[str], prefix: str | None = None, **kwargs) -> np.ndarray | torch.Tensor:
+ """
+ Encode text into embeddings using EmbeddingGemma's specialized methods.
+
+ This method is compatible with the base TextEmbedder interface and works
+ seamlessly with batch_encode_with_cache.
+
+ Args:
+ text: Input text(s) to encode
+ prefix: Prefix to add to texts
+ **kwargs: Additional arguments (supports query_mode for specialized encoding)
+
+ Returns:
+ Embeddings as numpy array or torch tensor
+ """
+ if isinstance(text, str):
+ text = [text]
+ text_was_str = True
+ else:
+ text_was_str = False
+
+ # Check for query_mode in kwargs, otherwise use instance default
+ use_query_mode = kwargs.get("query_mode", self.query_mode)
+
+ # Apply prefix if provided
+ if prefix:
+ text = [prefix + t for t in text]
+
+ if self.add_eos:
+ text = self._add_eos_func(text)
+
+ # Use specialized encoding methods if available
+ if hasattr(self.model, "encode_query") and hasattr(self.model, "encode_document"):
+ if use_query_mode:
+ embeddings = self.model.encode_query(text)
+ else:
+ embeddings = self.model.encode_document(text)
+
+ # Convert to appropriate format
+ if self.convert_to_numpy and isinstance(embeddings, torch.Tensor):
+ embeddings = embeddings.cpu().numpy()
+ elif not self.convert_to_numpy and isinstance(embeddings, np.ndarray):
+ embeddings = torch.from_numpy(embeddings)
+ else:
+ # Fallback to standard SentenceTransformer encode method
+ embeddings = self.model.encode(
+ text,
+ convert_to_numpy=self.convert_to_numpy,
+ convert_to_tensor=self.convert_to_tensor,
+ batch_size=self.batch_size,
+ device=self.device,
+ normalize_embeddings=self.normalize_embeddings,
+ **kwargs,
+ )
+
+ if text_was_str:
+ if isinstance(embeddings, np.ndarray) and embeddings.ndim > 1:
+ embeddings = embeddings[0]
+ elif isinstance(embeddings, torch.Tensor) and embeddings.ndim > 1:
+ embeddings = embeddings[0]
+
+ return embeddings
+
+ def encode_queries(
+ self, queries: str | list[str], prefix: str | None = None, **kwargs
+ ) -> np.ndarray | torch.Tensor:
+ """
+ Convenience method to encode queries using query mode.
+
+ Args:
+ queries: Query text(s) to encode
+ prefix: Prefix to add
+ **kwargs: Additional arguments
+
+ Returns:
+ Query embeddings
+ """
+ return self.encode(queries, prefix=prefix, query_mode=True, **kwargs)
+
+ def encode_documents(
+ self, documents: str | list[str], prefix: str | None = None, **kwargs
+ ) -> np.ndarray | torch.Tensor:
+ """
+ Convenience method to encode documents using document mode.
+
+ Args:
+ documents: Document text(s) to encode
+ prefix: Prefix to add
+ **kwargs: Additional arguments
+
+ Returns:
+ Document embeddings
+ """
+ return self.encode(documents, prefix=prefix, query_mode=False, **kwargs)
+
+ def set_query_mode(self, query_mode: bool = True) -> None:
+ """
+ Set the default encoding mode.
+
+ Args:
+ query_mode: True for query mode, False for document mode
+ """
+ self.query_mode = query_mode
+ logger.info(f"Set default encoding mode to {'query' if query_mode else 'document'}")
+
+ def _add_eos_func(self, text: str | list[str]) -> str | list[str]:
+ """Add EOS token to text if available."""
+ try:
+ eos_token = getattr(self.model.tokenizer, "eos_token")
+ except AttributeError:
+ return text
+
+ if isinstance(text, str):
+ return text + eos_token
+ elif isinstance(text, list):
+ return [t + eos_token for t in text]
+ return text
+
+ def get_output_dim(self) -> int:
+ """Get the dimensionality of output embeddings."""
+ return self.model.get_sentence_embedding_dimension()
+
+ def set_max_seq_length(self, max_seq_length: int | None = None) -> None:
+ """Set maximum sequence length."""
+ if max_seq_length:
+ self.model.max_seq_length = max_seq_length
+ self.max_seq_length = max_seq_length
+ logger.info(f"Set max_seq_length to {max_seq_length}")
+
+ def reset_max_seq_length(self) -> None:
+ """Reset max sequence length to model's original value."""
+ try:
+ logger.info(f"Reset max_seq_length to {self._orig_max_length}")
+ self.model.max_seq_length = self._orig_max_length
+ self.max_seq_length = self._orig_max_length
+ except AttributeError:
+ logger.warning("Failed to reset max_seq_length - original value not available")
+
+ def __repr__(self) -> str:
+ return f"GemmaEmbedder(model='{self.model.model_name}', device='{self.model.device}')"
diff --git a/src/jmteb/embedders/openai_embedder.py b/src/jmteb/embedders/openai_embedder.py
index 6ea8b8f..631f0c6 100644
--- a/src/jmteb/embedders/openai_embedder.py
+++ b/src/jmteb/embedders/openai_embedder.py
@@ -1,9 +1,12 @@
from __future__ import annotations
from dataclasses import dataclass
+from os import PathLike
+from pathlib import Path
import numpy as np
import tiktoken
+import tqdm
from loguru import logger
from openai import OpenAI
@@ -14,7 +17,7 @@
class OpenAIEmbedderConfig:
max_output_dim: int
encoder_name: str
- max_token_length: int
+ max_seq_length: int
OPENAI_EMBEDDERS = {
@@ -28,7 +31,12 @@ class OpenAIEmbedderConfig:
class OpenAIEmbedder(TextEmbedder):
"""Embedder via OpenAI API."""
- def __init__(self, model: str = "text-embedding-3-small", dim: int | None = None) -> None:
+ def __init__(
+ self,
+ model: str = "text-embedding-3-small",
+ dim: int | None = None,
+ max_seq_length: int | None = None,
+ ) -> None:
"""Setup.
model and dim: see https://platform.openai.com/docs/models/embeddings
`text-embedding-3-large` model: max 3072 dim
@@ -44,13 +52,19 @@ def __init__(self, model: str = "text-embedding-3-small", dim: int | None = None
Args:
model (str, optional): Name of an OpenAI embedding model. Defaults to "text-embedding-3-small".
dim (int, optional): Output dimension. Defaults to 1536.
+ max_seq_length (int, optional): Maximum length of sequences. Default to None.
"""
self.client = OpenAI() # API key written in .env
assert model in OPENAI_EMBEDDERS.keys(), f"`model` must be one of {list(OPENAI_EMBEDDERS.keys())}!"
self.model = model
model_config = OPENAI_EMBEDDERS[model]
self.encoding = tiktoken.get_encoding(model_config.encoder_name)
- self.max_token_length = model_config.max_token_length
+ self._orig_max_length = model_config.max_seq_length
+ if max_seq_length:
+ self.max_seq_length = max_seq_length
+ else:
+ self.max_seq_length = model_config.max_seq_length
+
if not dim or model == "text-embedding-ada-002":
self.dim = model_config.max_output_dim
else:
@@ -70,16 +84,22 @@ def encode(self, text: str | list[str], prefix: str | None = None) -> np.ndarray
token_ids: list[int] = self.encode_and_truncate_text(text, prefix)
else:
token_ids: list[list[int]] = [self.encode_and_truncate_text(t, prefix) for t in text]
- result = np.asarray(
- [
- data.embedding
- for data in self.client.embeddings.create(
- input=token_ids,
- model=self.model,
- **kwargs,
- ).data
- ]
- )
+ try:
+ result = np.asarray(
+ [
+ data.embedding
+ for data in self.client.embeddings.create(
+ input=token_ids,
+ model=self.model,
+ **kwargs,
+ ).data
+ ]
+ )
+ except Exception as e:
+ logger.error(f"{len(text)=}")
+ logger.error(f"{len(token_ids)=}")
+ raise e
+
if result.shape[0] == 1:
return result.reshape(-1)
return result
@@ -94,4 +114,86 @@ def encode_and_truncate_text(self, text: str, prefix: str | None = None) -> list
text = " "
logger.warning("Found empty string!")
# Ignore prefix in OpenAIEmbedder
- return self.encoding.encode(text)[: self.max_token_length]
+ return self.encoding.encode(text)[: self.max_seq_length]
+
+ def _batch_encode_and_save_on_disk(
+ self,
+ text_list: list[str],
+ save_path: str | PathLike[str],
+ prefix: str | None = None,
+ batch_size: int = 256,
+ dtype: str = "float32",
+ **kwargs,
+ ) -> np.memmap:
+ """
+ Encode a list of texts and save the embeddings on disk using memmap.
+
+ Args:
+ text_list (list[str]): list of texts
+ save_path (str): path to save the embeddings
+ prefix (str, optional): the prefix to use for encoding. Default to None.
+ dtype (str, optional): data type. Defaults to "float32".
+ batch_size (int): batch size. Defaults to 64.
+ """
+
+ batch_size = 512
+ num_samples = len(text_list)
+ output_dim = self.get_output_dim()
+ embeddings = np.memmap(save_path, dtype=dtype, mode="w+", shape=(num_samples, output_dim))
+
+ with tqdm.tqdm(total=num_samples, desc="Encoding") as pbar:
+ for i in range(0, num_samples, batch_size):
+ batch = text_list[i : i + batch_size]
+ try:
+ batch_embeddings: np.ndarray = self.encode(batch, prefix=prefix, **kwargs)
+ except Exception:
+ logger.error(f"{batch_size=}, {len(batch)=}")
+ logger.warning("Batch too large, retrying with batch size 16")
+ # Retry with batch size 16
+ small_batch_size = 16
+ batch_embeddings_list = []
+ for j in range(0, len(batch), small_batch_size):
+ small_batch = batch[j : j + small_batch_size]
+ small_batch_embeddings = self.encode(small_batch, prefix=prefix, **kwargs)
+ batch_embeddings_list.append(small_batch_embeddings)
+ batch_embeddings = np.vstack(batch_embeddings_list)
+ embeddings[i : i + batch_size] = batch_embeddings
+ pbar.update(len(batch))
+
+ embeddings.flush()
+ return np.memmap(save_path, dtype=dtype, mode="r", shape=(num_samples, output_dim))
+
+ def batch_encode_with_cache(
+ self,
+ text_list: list[str],
+ prefix: str | None = None,
+ cache_path: str | PathLike[str] | None = None,
+ overwrite_cache: bool = False,
+ dtype: str = "float32",
+ **kwargs,
+ ) -> np.ndarray:
+ """
+ Encode a list of texts and save the embeddings on disk using memmap if cache_path is provided.
+
+ Args:
+ text_list (list[str]): list of texts
+ prefix (str, optional): the prefix to use for encoding. Default to None.
+ cache_path (str, optional): path to save the embeddings. Defaults to None.
+ overwrite_cache (bool, optional): whether to overwrite the cache. Defaults to False.
+ dtype (str, optional): data type. Defaults to "float32".
+ """
+
+ logger.warning(f"Encoding with OpenAI embedder. {kwargs=}")
+ if cache_path is None:
+ logger.info("Encoding embeddings")
+ return self.encode(text_list, prefix=prefix, **kwargs)
+
+ if Path(cache_path).exists() and not overwrite_cache:
+ logger.info(f"Loading embeddings from {cache_path}")
+ return np.memmap(cache_path, dtype=dtype, mode="r", shape=(len(text_list), self.get_output_dim()))
+
+ logger.info(f"Encoding and saving embeddings to {cache_path}")
+ embeddings = self._batch_encode_and_save_on_disk(
+ text_list, cache_path, prefix=prefix, batch_size=self._chunk_size, dtype=dtype, **kwargs
+ )
+ return embeddings
diff --git a/src/jmteb/embedders/plamo_embedder.py b/src/jmteb/embedders/plamo_embedder.py
new file mode 100644
index 0000000..f2c6755
--- /dev/null
+++ b/src/jmteb/embedders/plamo_embedder.py
@@ -0,0 +1,251 @@
+import numpy as np
+import torch
+from loguru import logger
+from transformers import AutoModel, AutoTokenizer, PreTrainedModel, PreTrainedTokenizer
+
+from jmteb.embedders.base import TextEmbedder
+
+
+class PlamoEmbedder(TextEmbedder):
+ """
+ PLaMO embedding model embedder with multi-GPU support.
+
+ This class supports the PLaMO-Embedding-1B model from Preferred Networks.
+ It uses the model's specialized encode_query and encode_document methods
+ for optimal performance in different use cases.
+ """
+
+ def __init__(
+ self,
+ model_name_or_path: str = "pfnet/plamo-embedding-1b",
+ batch_size: int = 2,
+ device: str | None = None,
+ normalize_embeddings: bool = False,
+ max_seq_length: int | None = None,
+ query_mode: bool = False,
+ model_kwargs: dict = {},
+ tokenizer_kwargs: dict = {},
+ ) -> None:
+ """
+ Initialize the PLaMO embedder.
+
+ Args:
+ model_name_or_path: Path or name of the PLaMO model
+ batch_size: Batch size for encoding
+ device: Device to use ('cuda', 'cpu', or None for auto)
+ normalize_embeddings: Whether to normalize embeddings
+ max_seq_length: Maximum sequence length (default: model's max)
+ query_mode: Whether to use query encoding mode by default
+ model_kwargs: Additional kwargs for model loading
+ tokenizer_kwargs: Additional kwargs for tokenizer loading
+ """
+ model_kwargs = self._model_kwargs_parser(model_kwargs)
+
+ # Load model and tokenizer with trust_remote_code=True for PLaMO
+ self.model: PreTrainedModel = AutoModel.from_pretrained(
+ model_name_or_path, trust_remote_code=True, **model_kwargs
+ )
+ self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(
+ model_name_or_path, trust_remote_code=True, **tokenizer_kwargs
+ )
+
+ self.batch_size = batch_size
+ self.normalize_embeddings = normalize_embeddings
+ self.query_mode = query_mode
+
+ # Set up device
+ if not device and torch.cuda.is_available():
+ self.device = "cuda"
+ else:
+ self.device = device or "cpu"
+
+ # Move model to device
+ self.model.to(self.device)
+
+ # Enable simple multi-GPU support with DataParallel if multiple GPUs available
+ if torch.cuda.device_count() > 1 and self.device == "cuda":
+ logger.info(f"Using {torch.cuda.device_count()} GPUs with DataParallel")
+ self.model = torch.nn.DataParallel(self.model)
+ self.is_data_parallel = True
+ self.distributed_state = True # For compatibility with tests
+ else:
+ self.is_data_parallel = False
+ self.distributed_state = None
+
+ # Store the device for easy access
+ self.model_device = next(self.model.parameters()).device
+ logger.info(f"Model device: {self.model_device}, GPU count: {torch.cuda.device_count()}")
+
+ # Set up sequence length
+ self._orig_max_length = getattr(
+ self.model.config if not self.is_data_parallel else self.model.module.config,
+ "max_position_embeddings",
+ 4096,
+ )
+ self.max_seq_length = max_seq_length or self._orig_max_length
+
+ # PLaMO-Embedding-1B has 2048 embedding dimensions
+ self.output_dim = getattr(
+ self.model.config if not self.is_data_parallel else self.model.module.config, "hidden_size", 2048
+ )
+
+ # Set output format based on model kwargs
+ if "torch_dtype" in model_kwargs:
+ self.set_output_tensor()
+ else:
+ self.set_output_numpy()
+
+ def get_output_dim(self) -> int:
+ """Get the dimensionality of output embeddings."""
+ return self.output_dim
+
+ def encode(self, text: str | list[str], prefix: str | None = None, **kwargs) -> np.ndarray | torch.Tensor:
+ """
+ Encode text into embeddings using PLaMO's specialized methods.
+
+ This method is compatible with the base TextEmbedder interface and works
+ seamlessly with batch_encode_with_cache.
+
+ Args:
+ text: Input text(s) to encode
+ prefix: Prefix to add to texts
+ **kwargs: Additional arguments (supports query_mode for specialized encoding)
+
+ Returns:
+ Embeddings as numpy array or torch tensor
+ """
+ if isinstance(text, str):
+ text = [text]
+ text_was_str = True
+ else:
+ text_was_str = False
+
+ # Check for query_mode in kwargs, otherwise use instance default
+ use_query_mode = kwargs.get("query_mode", self.query_mode)
+
+ # Apply prefix if provided
+ if prefix:
+ text = [prefix + t for t in text]
+
+ # Encode using PLaMO's specialized methods
+ with torch.inference_mode():
+ embeddings = self._encode_batch(text, use_query_mode)
+
+ # Apply normalization if requested
+ if self.normalize_embeddings:
+ embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
+
+ if text_was_str:
+ res = embeddings.view(-1)
+ else:
+ res = embeddings
+
+ if self.convert_to_numpy:
+ return res.cpu().numpy() if res.is_cuda else res.numpy()
+ else:
+ return res
+
+ def _encode_batch(self, text: list[str], query_mode: bool = False) -> torch.Tensor:
+ """
+ Encode a batch of texts using PLaMO's specialized methods with memory optimization.
+
+ Args:
+ text: List of texts to encode
+ query_mode: Whether to use query or document encoding
+
+ Returns:
+ Batch embeddings as torch tensor
+ """
+ if len(text) == 0:
+ return torch.empty(0, self.output_dim, device=self.model_device)
+
+ # Process in reasonable chunks for PLaMO
+ chunk_size = self.batch_size
+ all_embeddings = []
+
+ # Get the actual model (handle DataParallel wrapper)
+ actual_model = self.model.module if self.is_data_parallel else self.model
+
+ with torch.inference_mode():
+ for i in range(0, len(text), chunk_size):
+ chunk = text[i : i + chunk_size]
+
+ try:
+ if query_mode:
+ # Use PLaMO's encode_query method for queries
+ chunk_embeddings = actual_model.encode_query(chunk, self.tokenizer)
+ else:
+ # Use PLaMO's encode_document method for documents
+ chunk_embeddings = actual_model.encode_document(chunk, self.tokenizer)
+
+ # Keep embeddings on device
+ all_embeddings.append(chunk_embeddings)
+
+ except torch.cuda.OutOfMemoryError:
+ # If still OOM, try processing one by one
+ logger.warning(f"OOM with chunk size {len(chunk)}, falling back to single item processing")
+ torch.cuda.empty_cache()
+
+ for single_text in chunk:
+ if query_mode:
+ single_embedding = actual_model.encode_query([single_text], self.tokenizer)
+ else:
+ single_embedding = actual_model.encode_document([single_text], self.tokenizer)
+ all_embeddings.append(single_embedding)
+ torch.cuda.empty_cache()
+
+ # Concatenate all embeddings
+ if all_embeddings:
+ return torch.cat(all_embeddings, dim=0)
+ else:
+ return torch.empty(0, self.output_dim, device=self.model_device)
+
+ def encode_queries(
+ self, queries: str | list[str], prefix: str | None = None, **kwargs
+ ) -> np.ndarray | torch.Tensor:
+ """
+ Convenience method to encode queries using query mode.
+
+ Args:
+ queries: Query text(s) to encode
+ prefix: Prefix to add
+ **kwargs: Additional arguments
+
+ Returns:
+ Query embeddings
+ """
+ return self.encode(queries, prefix=prefix, query_mode=True, **kwargs)
+
+ def encode_documents(
+ self, documents: str | list[str], prefix: str | None = None, **kwargs
+ ) -> np.ndarray | torch.Tensor:
+ """
+ Convenience method to encode documents using document mode.
+
+ Args:
+ documents: Document text(s) to encode
+ prefix: Prefix to add
+ **kwargs: Additional arguments
+
+ Returns:
+ Document embeddings
+ """
+ return self.encode(documents, prefix=prefix, query_mode=False, **kwargs)
+
+ def set_query_mode(self, query_mode: bool = True) -> None:
+ """
+ Set the default encoding mode.
+
+ Args:
+ query_mode: True for query mode, False for document mode
+ """
+ self.query_mode = query_mode
+ logger.info(f"Set default encoding mode to {'query' if query_mode else 'document'}")
+
+ def reset_max_seq_length(self) -> None:
+ """Reset max sequence length to model's original value."""
+ if hasattr(self, "_orig_max_length") and self._orig_max_length:
+ self.max_seq_length = self._orig_max_length
+ logger.info(f"Reset max_seq_length to {self._orig_max_length}")
+ else:
+ logger.warning("Failed to reset max_seq_length - original value not available")
diff --git a/src/jmteb/embedders/sbert_embedder.py b/src/jmteb/embedders/sbert_embedder.py
index ba33a36..892f703 100644
--- a/src/jmteb/embedders/sbert_embedder.py
+++ b/src/jmteb/embedders/sbert_embedder.py
@@ -1,6 +1,7 @@
from __future__ import annotations
import numpy as np
+from loguru import logger
from sentence_transformers import SentenceTransformer
from jmteb.embedders.base import TextEmbedder
@@ -29,6 +30,7 @@ def __init__(
model_kwargs=model_kwargs, # https://github.com/UKPLab/sentence-transformers/blob/84f69fee6dcde023f46a8807e89bc99a7700ba82/sentence_transformers/SentenceTransformer.py#L81-L105 # noqa: E501
tokenizer_kwargs=tokenizer_kwargs,
)
+ self._orig_max_length = self.model.max_seq_length
if max_seq_length:
self.model.max_seq_length = max_seq_length
@@ -70,3 +72,10 @@ def _add_eos_func(self, text: str | list[str]) -> str | list[str]:
def get_output_dim(self) -> int:
return self.model.get_sentence_embedding_dimension()
+
+ def reset_max_seq_length(self):
+ try:
+ logger.info(f"Reset `max_seq_length` to {self._orig_max_length}")
+ self.model.max_seq_length = self._orig_max_length
+ except AttributeError:
+ pass
diff --git a/src/jmteb/embedders/transformers_embedder.py b/src/jmteb/embedders/transformers_embedder.py
index 0592061..721e0c9 100644
--- a/src/jmteb/embedders/transformers_embedder.py
+++ b/src/jmteb/embedders/transformers_embedder.py
@@ -48,6 +48,7 @@ def __init__(
logger.info(f"{self.model.device=}, {torch.cuda.device_count()=}")
self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(model_name_or_path, **tokenizer_kwargs)
+ self._orig_max_length = getattr(self.model, "max_seq_length", None)
self.max_seq_length = getattr(self.model, "max_seq_length", None)
if max_seq_length:
self.max_seq_length = max_seq_length
@@ -135,7 +136,9 @@ def _encode_batch(self, text: list[str], prefix: str | None = None) -> torch.Ten
if self.add_eos:
text = self._add_eos_func(text)
- encoded_input = self.tokenizer(text, padding=True, truncation=True, return_tensors="pt").to(self.model.device)
+ encoded_input = self.tokenizer(
+ text, padding=True, truncation=True, return_tensors="pt", max_length=self.max_seq_length
+ ).to(self.model.device)
model_output = self.model(**encoded_input)
last_hidden_states = model_output["last_hidden_state"]
features = {
diff --git a/src/jmteb/evaluators/classification/evaluator.py b/src/jmteb/evaluators/classification/evaluator.py
index c2b8836..bb3a4ca 100644
--- a/src/jmteb/evaluators/classification/evaluator.py
+++ b/src/jmteb/evaluators/classification/evaluator.py
@@ -66,13 +66,22 @@ def __call__(
if cache_dir is not None:
Path(cache_dir).mkdir(parents=True, exist_ok=True)
+ # Auto-optimize for PlamoEmbedder if no explicit kwargs provided
+ encode_kwargs = self.encode_kwargs.copy()
+
+ # Check if this is a PlamoEmbedder and set optimal encoding mode
+ if model.__class__.__name__ in ("PlamoEmbedder", "GemmaEmbedder"):
+ if "query_mode" not in encode_kwargs:
+ encode_kwargs["query_mode"] = False # Use document mode for classification texts
+ logger.info(f"Auto-optimized {model.__class__.__name__}: query_mode=False for classification texts")
+
logger.info("Encoding training and validation sentences...")
X_train = model.batch_encode_with_cache(
[item.text for item in self.train_dataset],
prefix=self.prefix,
cache_path=Path(cache_dir) / "train_embeddings.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.encode_kwargs,
+ **encode_kwargs,
)
y_train = [item.label for item in self.train_dataset]
@@ -81,7 +90,7 @@ def __call__(
prefix=self.prefix,
cache_path=Path(cache_dir) / "val_embeddings.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.encode_kwargs,
+ **encode_kwargs,
)
y_val = [item.label for item in self.val_dataset]
@@ -95,7 +104,7 @@ def __call__(
prefix=self.prefix,
cache_path=Path(cache_dir) / "test_embeddings.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.encode_kwargs,
+ **encode_kwargs,
)
y_test = [item.label for item in self.test_dataset]
diff --git a/src/jmteb/evaluators/clustering/evaluator.py b/src/jmteb/evaluators/clustering/evaluator.py
index 2b8cdf2..bbce269 100644
--- a/src/jmteb/evaluators/clustering/evaluator.py
+++ b/src/jmteb/evaluators/clustering/evaluator.py
@@ -14,6 +14,7 @@
MiniBatchKMeans,
)
from sklearn.metrics import homogeneity_completeness_v_measure
+from sklearn.preprocessing import normalize
from jmteb.embedders.base import TextEmbedder
from jmteb.evaluators.base import EmbeddingEvaluator, EvaluationResults
@@ -57,13 +58,22 @@ def __call__(
if cache_dir is not None:
Path(cache_dir).mkdir(parents=True, exist_ok=True)
+ # Auto-optimize for PlamoEmbedder if no explicit kwargs provided
+ encode_kwargs = self.encode_kwargs.copy()
+
+ # Check if this is a PlamoEmbedder and set optimal encoding mode
+ if model.__class__.__name__ in ("PlamoEmbedder", "GemmaEmbedder"):
+ if "query_mode" not in encode_kwargs:
+ encode_kwargs["query_mode"] = False # Use document mode for clustering texts
+ logger.info(f"Auto-optimized {model.__class__.__name__}: query_mode=False for clustering texts")
+
logger.info("Converting validation data to embeddings...")
val_embeddings = model.batch_encode_with_cache(
[item.text for item in self.val_dataset],
prefix=self.prefix,
cache_path=Path(cache_dir) / "val_embeddings.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.encode_kwargs,
+ **encode_kwargs,
)
val_labels = [item.label for item in self.val_dataset]
@@ -77,7 +87,7 @@ def __call__(
prefix=self.prefix,
cache_path=Path(cache_dir) / "test_embeddings.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.encode_kwargs,
+ **encode_kwargs,
)
test_labels = [item.label for item in self.test_dataset]
@@ -127,7 +137,19 @@ def __call__(
def _evaluate_clustering_model(
embeddings: np.ndarray, y_true: list[int], clustering_model: ClusterMixin
) -> tuple[dict[str, float], list[int]]:
- y_pred = clustering_model.fit_predict(embeddings)
+ try:
+ # First try without normalization to preserve original behavior when possible
+ y_pred = clustering_model.fit_predict(embeddings)
+ except ValueError as e:
+ # If overflow error occurs, apply normalization and retry
+ if "infinity" in str(e).lower() or "too large" in str(e).lower():
+ logger.warning(f"Overflow detected in clustering, applying L2 normalization: {e}")
+ embeddings_normalized = normalize(embeddings, norm="l2")
+ y_pred = clustering_model.fit_predict(embeddings_normalized)
+ else:
+ # Re-raise if it's a different ValueError
+ raise e
+
h_score, c_score, v_score = homogeneity_completeness_v_measure(
labels_pred=y_pred, labels_true=np.array(y_true)
)
diff --git a/src/jmteb/evaluators/pair_classification/evaluator.py b/src/jmteb/evaluators/pair_classification/evaluator.py
index ef466bf..8fba017 100644
--- a/src/jmteb/evaluators/pair_classification/evaluator.py
+++ b/src/jmteb/evaluators/pair_classification/evaluator.py
@@ -49,8 +49,19 @@ def __call__(
if cache_dir is not None:
Path(cache_dir).mkdir(parents=True, exist_ok=True)
+ # Auto-optimize for PlamoEmbedder if no explicit kwargs provided
+ encode_kwargs = self.encode_kwargs.copy()
+
+ # Check if this is a PlamoEmbedder and set optimal encoding mode
+ if model.__class__.__name__ in ("PlamoEmbedder", "GemmaEmbedder"):
+ if "query_mode" not in encode_kwargs:
+ encode_kwargs["query_mode"] = False # Use document mode for pair classification texts
+ from loguru import logger
+
+ logger.info(f"Auto-optimized {model.__class__.__name__}: query_mode=False for pair classification texts")
+
val_embeddings1, val_embeddings2, val_golden_labels = self._convert_to_embeddings(
- model, self.val_dataset, "dev", overwrite_cache, cache_dir
+ model, self.val_dataset, "dev", overwrite_cache, cache_dir, encode_kwargs
)
if self.val_dataset == self.test_dataset:
test_embeddings1, test_embeddings2, test_golden_labels = (
@@ -60,7 +71,7 @@ def __call__(
)
else:
test_embeddings1, test_embeddings2, test_golden_labels = self._convert_to_embeddings(
- model, self.test_dataset, "test", overwrite_cache, cache_dir
+ model, self.test_dataset, "test", overwrite_cache, cache_dir, encode_kwargs
)
val_results = {}
@@ -119,20 +130,24 @@ def _convert_to_embeddings(
split: str = "test",
overwrite_cache: bool = False,
cache_dir: str | None = None,
+ encode_kwargs: dict | None = None,
) -> tuple[np.ndarray, np.ndarray, list[float]]:
+ if encode_kwargs is None:
+ encode_kwargs = self.encode_kwargs
+
embeddings1 = model.batch_encode_with_cache(
[item.sentence1 for item in dataset],
prefix=self.sentence1_prefix,
cache_path=Path(cache_dir) / f"{split}_embeddings1.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.encode_kwargs,
+ **encode_kwargs,
)
embeddings2 = model.batch_encode_with_cache(
[item.sentence2 for item in dataset],
prefix=self.sentence2_prefix,
cache_path=Path(cache_dir) / f"{split}_embeddings2.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.encode_kwargs,
+ **encode_kwargs,
)
golden_labels = [item.label for item in dataset]
return embeddings1, embeddings2, golden_labels
diff --git a/src/jmteb/evaluators/reranking/evaluator.py b/src/jmteb/evaluators/reranking/evaluator.py
index 144ed36..0d1be95 100644
--- a/src/jmteb/evaluators/reranking/evaluator.py
+++ b/src/jmteb/evaluators/reranking/evaluator.py
@@ -38,6 +38,8 @@ class RerankingEvaluator(EmbeddingEvaluator):
query_prefix (str | None): prefix for queries. Defaults to None.
doc_prefix (str | None): prefix for documents. Defaults to None.
log_predictions (bool): whether to log predictions of each datapoint. Defaults to False.
+ force_max_length (bool): whether to overwrite the global max_length with model's maximum token length.
+ Defaults to False.
top_n_docs_to_log (int): log only top n documents. Defaults to 5.
query_encode_kwargs (dict): kwargs passed to embedder's encode function when encoding queries. Defaults to {}.
doc_encode_kwargs (dict): kwargs passed to embedder's encode function when encoding documents. Defaults to {}.
@@ -53,6 +55,7 @@ def __init__(
doc_prefix: str | None = None,
log_predictions: bool = False,
top_n_docs_to_log: int = 5,
+ force_max_length: bool = False,
query_encode_kwargs: dict = {},
doc_encode_kwargs: dict = {},
) -> None:
@@ -65,6 +68,7 @@ def __init__(
self.doc_prefix = doc_prefix
self.log_predictions = log_predictions
self.top_n_docs_to_log = top_n_docs_to_log
+ self.force_max_length = force_max_length
self.query_encode_kwargs = query_encode_kwargs
self.doc_encode_kwargs = doc_encode_kwargs
@@ -75,15 +79,33 @@ def __call__(
overwrite_cache: bool = False,
) -> EvaluationResults:
model.set_output_tensor()
+ if self.force_max_length:
+ model.reset_max_seq_length()
+
if cache_dir is not None:
Path(cache_dir).mkdir(parents=True, exist_ok=True)
+ # Auto-optimize for PlamoEmbedder if no explicit kwargs provided
+ query_kwargs = self.query_encode_kwargs.copy()
+ doc_kwargs = self.doc_encode_kwargs.copy()
+
+ # Check if this is a PlamoEmbedder and set optimal encoding modes
+ if model.__class__.__name__ in ("PlamoEmbedder", "GemmaEmbedder"):
+ if "query_mode" not in query_kwargs:
+ query_kwargs["query_mode"] = True # Use query mode for queries
+ if "query_mode" not in doc_kwargs:
+ doc_kwargs["query_mode"] = False # Use document mode for docs
+ logger.info(
+ f"Auto-optimized {model.__class__.__name__}: query_mode=True for queries,"
+ "query_mode=False for documents"
+ )
+
val_query_embeddings = model.batch_encode_with_cache(
text_list=[item.query for item in self.val_query_dataset],
prefix=self.query_prefix,
cache_path=Path(cache_dir) / "val_query.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.query_encode_kwargs,
+ **query_kwargs,
)
if self.val_query_dataset == self.test_query_dataset:
test_query_embeddings = val_query_embeddings
@@ -93,14 +115,14 @@ def __call__(
prefix=self.query_prefix,
cache_path=Path(cache_dir) / "test_query.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.query_encode_kwargs,
+ **query_kwargs,
)
doc_embeddings = model.batch_encode_with_cache(
text_list=[item.text for item in self.doc_dataset],
prefix=self.doc_prefix,
cache_path=Path(cache_dir) / "corpus.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.doc_encode_kwargs,
+ **doc_kwargs,
)
logger.info("Start reranking")
@@ -211,8 +233,6 @@ def _format_predictions(
pred_docs: list[RerankingDoc] = [
doc_dataset[doc_dataset.docid_to_idx[pred_docid]] for pred_docid in pred_docids
]
- logger.info(f"{golden_docs=}")
- logger.info(f"{pred_docs=}")
prediction = RerankingPrediction(
query=q.query,
relevant_docs=golden_docs,
diff --git a/src/jmteb/evaluators/retrieval/evaluator.py b/src/jmteb/evaluators/retrieval/evaluator.py
index 2fd6a21..fc7476e 100644
--- a/src/jmteb/evaluators/retrieval/evaluator.py
+++ b/src/jmteb/evaluators/retrieval/evaluator.py
@@ -41,6 +41,8 @@ class RetrievalEvaluator(EmbeddingEvaluator):
query_prefix (str | None): prefix for queries. Defaults to None.
doc_prefix (str | None): prefix for documents. Defaults to None.
log_predictions (bool): whether to log predictions of each datapoint. Defaults to False.
+ force_max_length (bool): whether to overwrite the global max_length with model's maximum token length.
+ Defaults to False.
top_n_docs_to_log (int): log only top n documents that are predicted as relevant. Defaults to 5.
query_encode_kwargs (dict): kwargs passed to embedder's encode function when encoding queries. Defaults to {}.
doc_encode_kwargs (dict): kwargs passed to embedder's encode function when encoding documents. Defaults to {}.
@@ -58,6 +60,7 @@ def __init__(
doc_prefix: str | None = None,
log_predictions: bool = False,
top_n_docs_to_log: int = 5,
+ force_max_length: bool = False,
query_encode_kwargs: dict = {},
doc_encode_kwargs: dict = {},
) -> None:
@@ -67,7 +70,7 @@ def __init__(
self.doc_chunk_size = doc_chunk_size
- self.accuracy_at_k = accuracy_at_k or [1, 3, 5, 10]
+ self.accuracy_at_k = accuracy_at_k or [1, 3, 5, 10, 20, 30, 50]
self.ndcg_at_k = ndcg_at_k or [10]
self.max_top_k = max(sum([self.accuracy_at_k, self.ndcg_at_k], []))
self.main_metric = f"ndcg@{self.ndcg_at_k[0]}"
@@ -76,6 +79,7 @@ def __init__(
self.doc_prefix = doc_prefix
self.log_predictions = log_predictions
self.top_n_docs_to_log = top_n_docs_to_log
+ self.force_max_length = force_max_length
self.query_encode_kwargs = query_encode_kwargs
self.doc_encode_kwargs = doc_encode_kwargs
@@ -86,15 +90,32 @@ def __call__(
overwrite_cache: bool = False,
) -> EvaluationResults:
model.set_output_tensor()
+ if self.force_max_length:
+ model.reset_max_seq_length()
if cache_dir is not None:
Path(cache_dir).mkdir(parents=True, exist_ok=True)
+ # Auto-optimize for PlamoEmbedder if no explicit kwargs provided
+ query_kwargs = self.query_encode_kwargs.copy()
+ doc_kwargs = self.doc_encode_kwargs.copy()
+
+ # Check if this is a PlamoEmbedder and set optimal encoding modes
+ if model.__class__.__name__ in ("PlamoEmbedder", "GemmaEmbedder"):
+ if "query_mode" not in query_kwargs:
+ query_kwargs["query_mode"] = True # Use query mode for queries
+ if "query_mode" not in doc_kwargs:
+ doc_kwargs["query_mode"] = False # Use document mode for docs
+ logger.info(
+ f"Auto-optimized {model.__class__.__name__}: query_mode=True for queries,"
+ "query_mode=False for documents"
+ )
+
val_query_embeddings = model.batch_encode_with_cache(
text_list=[item.query for item in self.val_query_dataset],
prefix=self.query_prefix,
cache_path=Path(cache_dir) / "val_query.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.query_encode_kwargs,
+ **query_kwargs,
)
if self.val_query_dataset == self.test_query_dataset:
test_query_embeddings = val_query_embeddings
@@ -104,7 +125,7 @@ def __call__(
prefix=self.query_prefix,
cache_path=Path(cache_dir) / "test_query.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.query_encode_kwargs,
+ **query_kwargs,
)
doc_embeddings = model.batch_encode_with_cache(
@@ -112,7 +133,7 @@ def __call__(
prefix=self.doc_prefix,
cache_path=Path(cache_dir) / "corpus.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.doc_encode_kwargs,
+ **doc_kwargs,
)
logger.info("Start retrieval")
diff --git a/src/jmteb/evaluators/sts/evaluator.py b/src/jmteb/evaluators/sts/evaluator.py
index 380ceea..f4d4359 100644
--- a/src/jmteb/evaluators/sts/evaluator.py
+++ b/src/jmteb/evaluators/sts/evaluator.py
@@ -52,8 +52,17 @@ def __call__(
if cache_dir is not None:
Path(cache_dir).mkdir(parents=True, exist_ok=True)
+ # Auto-optimize for PlamoEmbedder if no explicit kwargs provided
+ encode_kwargs = self.encode_kwargs.copy()
+
+ # # Check if this is a PlamoEmbedder and set optimal encoding mode
+ # if model.__class__.__name__ == "PlamoEmbedder":
+ # if "query_mode" not in encode_kwargs:
+ # encode_kwargs["query_mode"] = False # Use document mode for STS texts
+ # logger.info("Auto-optimized PlamoEmbedder: query_mode=False for STS texts")
+
val_embeddings1, val_embeddings2, val_golden_scores = self._convert_to_embeddings(
- model, self.val_dataset, "dev", overwrite_cache, cache_dir
+ model, self.val_dataset, "dev", overwrite_cache, cache_dir, encode_kwargs
)
if self.val_dataset == self.test_dataset:
test_embeddings1, test_embeddings2, test_golden_scores = (
@@ -62,7 +71,7 @@ def __call__(
val_golden_scores,
)
test_embeddings1, test_embeddings2, test_golden_scores = self._convert_to_embeddings(
- model, self.test_dataset, "test", overwrite_cache, cache_dir
+ model, self.test_dataset, "test", overwrite_cache, cache_dir, encode_kwargs
)
similarity_functions = {
@@ -146,20 +155,24 @@ def _convert_to_embeddings(
split: str = "test",
overwrite_cache: bool = False,
cache_dir: str | None = None,
+ encode_kwargs: dict | None = None,
) -> tuple[Tensor, Tensor, list[float]]:
+ if encode_kwargs is None:
+ encode_kwargs = self.encode_kwargs
+
embeddings1 = model.batch_encode_with_cache(
[item.sentence1 for item in dataset],
prefix=self.sentence1_prefix,
cache_path=Path(cache_dir) / f"{split}_embeddings1.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.encode_kwargs,
+ **encode_kwargs,
)
embeddings2 = model.batch_encode_with_cache(
[item.sentence2 for item in dataset],
prefix=self.sentence2_prefix,
cache_path=Path(cache_dir) / f"{split}_embeddings2.bin" if cache_dir is not None else None,
overwrite_cache=overwrite_cache,
- **self.encode_kwargs,
+ **encode_kwargs,
)
device = "cuda" if torch.cuda.is_available() else "cpu"
embeddings1 = convert_to_tensor(embeddings1, device)
diff --git a/src/jmteb/utils/score_recorder.py b/src/jmteb/utils/score_recorder.py
index afbf22c..361c809 100644
--- a/src/jmteb/utils/score_recorder.py
+++ b/src/jmteb/utils/score_recorder.py
@@ -56,8 +56,21 @@ def record_predictions(self, results: EvaluationResults, dataset_name: str, task
def record_summary(self):
if not self.save_dir:
return
- summary: dict[str, dict[str, dict[str, float]]] = defaultdict(dict)
+
+ summary_path = Path(self.save_dir) / "summary.json"
+
+ # Load existing summary if it exists
+ if summary_path.exists():
+ with open(summary_path, "r") as fin:
+ summary = json.load(fin)
+ else:
+ summary = {}
+
+ # Merge new results into existing summary
for task_name, task_scores in self.scores.items():
+ if task_name not in summary:
+ summary[task_name] = {}
for dataset_name, results in self.scores[task_name].items():
summary[task_name][dataset_name] = {results.metric_name: results.metric_value}
- self.save_to_json(summary, Path(self.save_dir) / "summary.json")
+
+ self.save_to_json(summary, summary_path)
diff --git a/tests/embedders/test_openai.py b/tests/embedders/test_openai.py
index 448dfaf..c1429da 100644
--- a/tests/embedders/test_openai.py
+++ b/tests/embedders/test_openai.py
@@ -75,9 +75,7 @@ def test_token_count(self):
def test_truncate(self):
assert len(self.model.encode_and_truncate_text(TEXT)) == 6
- assert (
- len(self.model.encode_and_truncate_text(TEXT * self.model.max_token_length)) == self.model.max_token_length
- )
+ assert len(self.model.encode_and_truncate_text(TEXT * self.model.max_seq_length)) == self.model.max_seq_length
def test_nonexistent_model(self):
with pytest.raises(AssertionError):
@@ -89,9 +87,9 @@ def test_model_dim(self):
assert OpenAIEmbedder(model="text-embedding-ada-002").dim == 1536
def test_model_max_token_length(self):
- assert OpenAIEmbedder(model="text-embedding-3-large").max_token_length == 8191
- assert OpenAIEmbedder(model="text-embedding-3-small").max_token_length == 8191
- assert OpenAIEmbedder(model="text-embedding-ada-002").max_token_length == 8191
+ assert OpenAIEmbedder(model="text-embedding-3-large").max_seq_length == 8191
+ assert OpenAIEmbedder(model="text-embedding-3-small").max_seq_length == 8191
+ assert OpenAIEmbedder(model="text-embedding-ada-002").max_seq_length == 8191
def test_model_encoder(self):
assert OpenAIEmbedder(model="text-embedding-3-large").encoding.name == "cl100k_base"