diff --git a/docs/results/BAAI/bge-m3/summary.json b/docs/results/BAAI/bge-m3/summary.json new file mode 100644 index 0000000..72a5ee8 --- /dev/null +++ b/docs/results/BAAI/bge-m3/summary.json @@ -0,0 +1,96 @@ +{ + "Classification": { + "amazon_counterfactual_classification": { + "macro_f1": 0.718621425743256 + }, + "amazon_review_classification": { + "macro_f1": 0.5664555524508175 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9441075327867781 + }, + "massive_intent_classification": { + "macro_f1": 0.7868184551588373 + }, + "massive_scenario_classification": { + "macro_f1": 0.8970320222457714 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8424907003170607 + }, + "wrime_classification": { + "macro_f1": 0.4316630478439933 + } + }, + "Reranking": { + "esci": { + "ndcg@10": 0.9327323748768209 + }, + "jacwir_reranking": { + "ndcg@10": 0.8955144849023412 + }, + "jqara": { + "ndcg@10": 0.5391637817603238 + }, + "miracl_reranking": { + "ndcg@10": 0.8596271423829606 + }, + "mldr_reranking": { + "ndcg@10": 0.9778261029468881 + } + }, + "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.851348898788452 + }, + "jagovfaqs_22k": { + "ndcg@10": 0.6906829361885021 + }, + "jaqket": { + "ndcg@10": 0.5659460589444328 + }, + "mintaka_retrieval": { + "ndcg@10": 0.32175483024897333 + }, + "miracl_retrieval": { + "ndcg@10": 0.734809783755516 + }, + "mldr_retrieval": { + "ndcg@10": 0.5126063501865914 + }, + "mrtydi": { + "ndcg@10": 0.45179452203971654 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.9521915103722084 + }, + "nlp_journal_abs_intro": { + "ndcg@10": 0.9752948774973371 + }, + "nlp_journal_title_abs": { + "ndcg@10": 0.9602075886902439 + }, + "nlp_journal_title_intro": { + "ndcg@10": 0.9197525363243463 + } + }, + "STS": { + "jsick": { + "spearman": 0.7926524802982091 + }, + "jsts": { + "spearman": 0.8020865982595183 + } + }, + "Clustering": { + "livedoor_news": { + "v_measure_score": 0.5475619174246511 + }, + "mewsc16": { + "v_measure_score": 0.4200457612686986 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.3991288954568376 + } + } +} \ No newline at end of file diff --git a/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json b/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json index 1b99a44..beacb01 100644 --- a/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json +++ b/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7809527709426081 + "macro_f1": 0.7769528027441275 }, "amazon_review_classification": { - "macro_f1": 0.5155899232320224 + "macro_f1": 0.5146406875677701 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8844781754440035 }, "massive_intent_classification": { - "macro_f1": 0.7879373479249787 + "macro_f1": 0.7872353730798753 }, "massive_scenario_classification": { - "macro_f1": 0.8662625888023707 + "macro_f1": 0.8639715373498098 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8350488266987821 + }, + "wrime_classification": { + "macro_f1": 0.3815230965003785 } }, "Reranking": { "esci": { - "ndcg@10": 0.9095168116460639 + "ndcg@10": 0.909518320556229 + }, + "jacwir_reranking": { + "ndcg@10": 0.5981293078380808 + }, + "jqara": { + "ndcg@10": 0.3719557553111225 + }, + "miracl_reranking": { + "ndcg@10": 0.6789908587925922 + }, + "mldr_reranking": { + "ndcg@10": 0.8281088898171538 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.4085978545476503 + }, "jagovfaqs_22k": { - "ndcg@10": 0.42314124780036416 + "ndcg@10": 0.43879890119990833 }, "jaqket": { - "ndcg@10": 0.36199154051747723 + "ndcg@10": 0.3555985699236658 + }, + "mintaka_retrieval": { + "ndcg@10": 0.1997740482697841 + }, + "miracl_retrieval": { + "ndcg@10": 0.16521386136598404 + }, + "mldr_retrieval": { + "ndcg@10": 0.12060735418211223 }, "mrtydi": { - "ndcg@10": 0.07810683176415421 + "ndcg@10": 0.07107405961190999 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.5430415601583998 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.6077212544951452 + "ndcg@10": 0.5585881454407594 }, "nlp_journal_title_abs": { - "ndcg@10": 0.6433890489201118 + "ndcg@10": 0.629620778788499 }, "nlp_journal_title_intro": { - "ndcg@10": 0.39317174536190913 + "ndcg@10": 0.3517328767423871 } }, "STS": { "jsick": { - "spearman": 0.754165277432144 + "spearman": 0.7775668305928584 }, "jsts": { - "spearman": 0.7558202366183716 + "spearman": 0.7563460117163054 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.4966545453348478 + "v_measure_score": 0.4601335671191492 }, "mewsc16": { - "v_measure_score": 0.3877356318022785 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6237623762376237 + "v_measure_score": 0.3922006290468797 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.3456006554316726 } } } \ No newline at end of file diff --git a/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json b/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json index ea227c2..6a83eb2 100644 --- a/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json +++ b/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.776174162517931 + "macro_f1": 0.7779156199278396 }, "amazon_review_classification": { - "macro_f1": 0.5085781180553806 + "macro_f1": 0.5111451768867725 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8782111274457993 }, "massive_intent_classification": { - "macro_f1": 0.7718541530739129 + "macro_f1": 0.7796973463634825 }, "massive_scenario_classification": { - "macro_f1": 0.8592571786794985 + "macro_f1": 0.8634142669499835 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8506408877596591 + }, + "wrime_classification": { + "macro_f1": 0.3656175961601361 } }, "Reranking": { "esci": { - "ndcg@10": 0.9100551950168166 + "ndcg@10": 0.9092446252246911 + }, + "jacwir_reranking": { + "ndcg@10": 0.605113846464576 + }, + "jqara": { + "ndcg@10": 0.36840730960684165 + }, + "miracl_reranking": { + "ndcg@10": 0.693114284522583 + }, + "mldr_reranking": { + "ndcg@10": 0.8530771666734125 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.42431895793525753 + }, "jagovfaqs_22k": { - "ndcg@10": 0.42368135774043536 + "ndcg@10": 0.43601956332213093 }, "jaqket": { - "ndcg@10": 0.37721850397542034 + "ndcg@10": 0.37354035206874886 + }, + "mintaka_retrieval": { + "ndcg@10": 0.2518443007449429 + }, + "miracl_retrieval": { + "ndcg@10": 0.14756204576714857 + }, + "mldr_retrieval": { + "ndcg@10": 0.16862391555076126 }, "mrtydi": { - "ndcg@10": 0.07878085186566607 + "ndcg@10": 0.07770347901718931 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.5689006657309228 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.636999375405723 + "ndcg@10": 0.5911474254499767 }, "nlp_journal_title_abs": { - "ndcg@10": 0.6413498649875696 + "ndcg@10": 0.618101892252404 }, "nlp_journal_title_intro": { - "ndcg@10": 0.397250919496823 + "ndcg@10": 0.3287673013916751 } }, "STS": { "jsick": { - "spearman": 0.7756925231422259 + "spearman": 0.7893346270810556 }, "jsts": { - "spearman": 0.7652968548841591 + "spearman": 0.7657111966582518 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5262387436934941 + "v_measure_score": 0.48558605187442483 }, "mewsc16": { - "v_measure_score": 0.37277574537292835 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.623321554770318 + "v_measure_score": 0.4319848997472401 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.3860004176729398 } } } \ No newline at end of file diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json index dbed068..ebc1037 100644 --- a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json +++ b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7619809437515043 + "macro_f1": 0.7430232193667698 }, "amazon_review_classification": { - "macro_f1": 0.5205592432502059 + "macro_f1": 0.5196833867285527 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8969457721352727 }, "massive_intent_classification": { - "macro_f1": 0.7789367871593064 + "macro_f1": 0.7782504182162112 }, "massive_scenario_classification": { - "macro_f1": 0.8490320705866646 + "macro_f1": 0.8459551634050977 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8382321236746973 + }, + "wrime_classification": { + "macro_f1": 0.3814631725334783 } }, "Reranking": { "esci": { - "ndcg@10": 0.9065584234991577 + "ndcg@10": 0.906706098295787 + }, + "jacwir_reranking": { + "ndcg@10": 0.581551030502223 + }, + "jqara": { + "ndcg@10": 0.3666097794082717 + }, + "miracl_reranking": { + "ndcg@10": 0.6908907697836885 + }, + "mldr_reranking": { + "ndcg@10": 0.8615323536010276 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.39917758524262303 + }, "jagovfaqs_22k": { - "ndcg@10": 0.4411487123884245 + "ndcg@10": 0.4460371569059824 }, "jaqket": { - "ndcg@10": 0.39613283459361814 + "ndcg@10": 0.3845053301501902 + }, + "mintaka_retrieval": { + "ndcg@10": 0.2239147895010841 + }, + "miracl_retrieval": { + "ndcg@10": 0.13942471586306499 + }, + "mldr_retrieval": { + "ndcg@10": 0.139069576010256 }, "mrtydi": { - "ndcg@10": 0.08154879873415645 + "ndcg@10": 0.07299085059942924 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.5835049460335981 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.6276035246534508 + "ndcg@10": 0.5863133806218087 }, "nlp_journal_title_abs": { - "ndcg@10": 0.5838785018803183 + "ndcg@10": 0.5743459511193183 }, "nlp_journal_title_intro": { - "ndcg@10": 0.3489329387182086 + "ndcg@10": 0.32465205260710006 } }, "STS": { "jsick": { - "spearman": 0.7463567093877269 + "spearman": 0.7525289500265361 }, "jsts": { - "spearman": 0.7468283806971927 + "spearman": 0.7466329702466956 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.41041888940251137 + "v_measure_score": 0.45840176801621957 }, "mewsc16": { - "v_measure_score": 0.45175891401665724 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6236711552090717 + "v_measure_score": 0.4407932537977668 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.38669286929581886 } } } \ No newline at end of file diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json index 9528312..46f5e26 100644 --- a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json +++ b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7619809437515043 + "macro_f1": 0.7640029182013914 }, "amazon_review_classification": { - "macro_f1": 0.5152108946679324 + "macro_f1": 0.5165133824101508 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8785996540635361 }, "massive_intent_classification": { - "macro_f1": 0.7895128475562229 + "macro_f1": 0.7815141648175687 }, "massive_scenario_classification": { - "macro_f1": 0.865430249169577 + "macro_f1": 0.8643739735863134 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8179797886754027 + }, + "wrime_classification": { + "macro_f1": 0.37929751450328747 } }, "Reranking": { "esci": { - "ndcg@10": 0.9115815294581953 + "ndcg@10": 0.9116742957456255 + }, + "jacwir_reranking": { + "ndcg@10": 0.6540921936468603 + }, + "jqara": { + "ndcg@10": 0.3839109493881204 + }, + "miracl_reranking": { + "ndcg@10": 0.7018821974047713 + }, + "mldr_reranking": { + "ndcg@10": 0.8442037101394532 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.4895140949755706 + }, "jagovfaqs_22k": { - "ndcg@10": 0.47387768939865055 + "ndcg@10": 0.48413330907538854 }, "jaqket": { - "ndcg@10": 0.3956683977353904 + "ndcg@10": 0.3872950509227257 + }, + "mintaka_retrieval": { + "ndcg@10": 0.25723625707011927 + }, + "miracl_retrieval": { + "ndcg@10": 0.2159968215066114 + }, + "mldr_retrieval": { + "ndcg@10": 0.18105368261359917 }, "mrtydi": { - "ndcg@10": 0.1144234568266308 + "ndcg@10": 0.11016096912346693 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.5890880676571459 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.6416096544574569 + "ndcg@10": 0.6005134171957127 }, "nlp_journal_title_abs": { - "ndcg@10": 0.7023477497744102 + "ndcg@10": 0.691482229451667 }, "nlp_journal_title_intro": { - "ndcg@10": 0.4536720868647063 + "ndcg@10": 0.377200379602747 } }, "STS": { "jsick": { - "spearman": 0.781770693640686 + "spearman": 0.7914302448138066 }, "jsts": { - "spearman": 0.7680617109850311 + "spearman": 0.7677275529386515 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5301620892693397 + "v_measure_score": 0.4879255424919774 }, "mewsc16": { - "v_measure_score": 0.4034776723308173 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6238078417520311 + "v_measure_score": 0.42611073323310256 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.2641681900458691 } } } \ No newline at end of file diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json index b36686c..dad1d0c 100644 --- a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json +++ b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7725250131648236 + "macro_f1": 0.7767065011282246 }, "amazon_review_classification": { - "macro_f1": 0.5341627023771393 + "macro_f1": 0.5348080733659045 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8928165629175933 }, "massive_intent_classification": { - "macro_f1": 0.7682863192709365 + "macro_f1": 0.7678594675802368 }, "massive_scenario_classification": { - "macro_f1": 0.8639396658321546 + "macro_f1": 0.8624414954250645 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8376983111767246 + }, + "wrime_classification": { + "macro_f1": 0.4088843388537483 } }, "Reranking": { "esci": { - "ndcg@10": 0.9094717381883379 + "ndcg@10": 0.9093431066849924 + }, + "jacwir_reranking": { + "ndcg@10": 0.6144762455614383 + }, + "jqara": { + "ndcg@10": 0.42466871751866847 + }, + "miracl_reranking": { + "ndcg@10": 0.7065312090166875 + }, + "mldr_reranking": { + "ndcg@10": 0.8742363417086798 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.4627911424268102 + }, "jagovfaqs_22k": { - "ndcg@10": 0.47038430326303626 + "ndcg@10": 0.4824617060944974 }, "jaqket": { - "ndcg@10": 0.44101304795602897 + "ndcg@10": 0.4416882664197474 + }, + "mintaka_retrieval": { + "ndcg@10": 0.28888654887615833 + }, + "miracl_retrieval": { + "ndcg@10": 0.1951539369285861 + }, + "mldr_retrieval": { + "ndcg@10": 0.18656064853165188 }, "mrtydi": { - "ndcg@10": 0.11429128335865787 + "ndcg@10": 0.11438786651077741 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.439694854198857 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.43434267808785576 + "ndcg@10": 0.40326645532241284 }, "nlp_journal_title_abs": { - "ndcg@10": 0.6240651697600803 + "ndcg@10": 0.6048895627840009 }, "nlp_journal_title_intro": { - "ndcg@10": 0.3651687833824759 + "ndcg@10": 0.36508949429446635 } }, "STS": { "jsick": { - "spearman": 0.787528927058734 + "spearman": 0.7876474308902304 }, "jsts": { - "spearman": 0.7781413957931619 + "spearman": 0.7782114794698556 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.48448646364489634 + "v_measure_score": 0.5129910499369752 }, "mewsc16": { - "v_measure_score": 0.43168522818790694 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6235418875927891 + "v_measure_score": 0.46267377071476495 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.3603960521680572 } } } \ No newline at end of file diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json index f620d50..3101473 100644 --- a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json +++ b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7635642561809131 + "macro_f1": 0.7655145272700131 }, "amazon_review_classification": { - "macro_f1": 0.5275222511867922 + "macro_f1": 0.5273281594091623 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8821782850442395 }, "massive_intent_classification": { - "macro_f1": 0.7688060073049678 + "macro_f1": 0.772169445045981 }, "massive_scenario_classification": { - "macro_f1": 0.8651446837233107 + "macro_f1": 0.8625146467158739 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8145447793317748 + }, + "wrime_classification": { + "macro_f1": 0.40382215327142257 } }, "Reranking": { "esci": { - "ndcg@10": 0.9129851570116734 + "ndcg@10": 0.9130235242422614 + }, + "jacwir_reranking": { + "ndcg@10": 0.6513884390883999 + }, + "jqara": { + "ndcg@10": 0.44959095699445484 + }, + "miracl_reranking": { + "ndcg@10": 0.7121442551193732 + }, + "mldr_reranking": { + "ndcg@10": 0.8679395106334268 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.5316167737103407 + }, "jagovfaqs_22k": { - "ndcg@10": 0.5014367709991477 + "ndcg@10": 0.5120263378587457 }, "jaqket": { - "ndcg@10": 0.4583812630740073 + "ndcg@10": 0.45810454318653493 + }, + "mintaka_retrieval": { + "ndcg@10": 0.30420713299186014 + }, + "miracl_retrieval": { + "ndcg@10": 0.260782337674165 + }, + "mldr_retrieval": { + "ndcg@10": 0.23652695166828322 }, "mrtydi": { - "ndcg@10": 0.13003320802922363 + "ndcg@10": 0.1306190778426387 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.5464834936384055 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.5508587506679636 + "ndcg@10": 0.5213267121181618 }, "nlp_journal_title_abs": { - "ndcg@10": 0.7497069192695408 + "ndcg@10": 0.7412764112062588 }, "nlp_journal_title_intro": { - "ndcg@10": 0.4524300499843447 + "ndcg@10": 0.4220927003134505 } }, "STS": { "jsick": { - "spearman": 0.7984403024596518 + "spearman": 0.7985649981589037 }, "jsts": { - "spearman": 0.7813685476201204 + "spearman": 0.7813825399856615 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5319881995988209 + "v_measure_score": 0.5159318544938132 }, "mewsc16": { - "v_measure_score": 0.4330807170988368 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6226614895870103 + "v_measure_score": 0.4267958807672512 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.3178045302473092 } } } \ No newline at end of file diff --git a/docs/results/OpenAI/text-embedding-3-large/summary.json b/docs/results/OpenAI/text-embedding-3-large/summary.json index 0029b0d..cf5b699 100644 --- a/docs/results/OpenAI/text-embedding-3-large/summary.json +++ b/docs/results/OpenAI/text-embedding-3-large/summary.json @@ -6,28 +6,64 @@ "amazon_review_classification": { "macro_f1": 0.6043632319384946 }, + "japanese_sentiment_classification": { + "macro_f1": 0.9689111460113327 + }, "massive_intent_classification": { "macro_f1": 0.8090871295952566 }, "massive_scenario_classification": { "macro_f1": 0.9108443051510002 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8785070638424861 + }, + "wrime_classification": { + "macro_f1": 0.45837220696591946 } }, "Reranking": { "esci": { "ndcg@10": 0.9358042266852659 + }, + "jacwir_reranking": { + "ndcg@10": 0.8678014849879991 + }, + "jqara": { + "ndcg@10": 0.5688951496540466 + }, + "miracl_reranking": { + "ndcg@10": 0.8379796888542357 + }, + "mldr_reranking": { + "ndcg@10": 0.9423911330344104 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8290267731484572 + }, "jagovfaqs_22k": { "ndcg@10": 0.7240937077183436 }, "jaqket": { "ndcg@10": 0.48208863565793814 }, + "mintaka_retrieval": { + "ndcg@10": 0.6351669096573943 + }, + "miracl_retrieval": { + "ndcg@10": 0.6056623188124566 + }, + "mldr_retrieval": { + "ndcg@10": 0.4526315025094686 + }, "mrtydi": { "ndcg@10": 0.3488438390945784 }, + "nlp_journal_abs_article": { + "ndcg@10": 0.923732838888777 + }, "nlp_journal_abs_intro": { "ndcg@10": 0.9932811349540317 }, @@ -36,7 +72,7 @@ }, "nlp_journal_title_intro": { "ndcg@10": 0.9547126796600445 - } + } }, "STS": { "jsick": { @@ -52,11 +88,9 @@ }, "mewsc16": { "v_measure_score": 0.4955424351458981 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6234502302515055 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.4882897499806697 } } } \ No newline at end of file diff --git a/docs/results/OpenAI/text-embedding-3-small/summary.json b/docs/results/OpenAI/text-embedding-3-small/summary.json index 3391826..ccfdccb 100644 --- a/docs/results/OpenAI/text-embedding-3-small/summary.json +++ b/docs/results/OpenAI/text-embedding-3-small/summary.json @@ -6,28 +6,64 @@ "amazon_review_classification": { "macro_f1": 0.5592259673654241 }, + "japanese_sentiment_classification": { + "macro_f1": 0.8997314741995592 + }, "massive_intent_classification": { "macro_f1": 0.7766119663088307 }, "massive_scenario_classification": { "macro_f1": 0.8866536867311439 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8472270726472407 + }, + "wrime_classification": { + "macro_f1": 0.4005292604550654 } }, "Reranking": { "esci": { "ndcg@10": 0.9291728102678644 + }, + "jacwir_reranking": { + "ndcg@10": 0.8472076343603366 + }, + "jqara": { + "ndcg@10": 0.3858424853310068 + }, + "miracl_reranking": { + "ndcg@10": 0.7761045097931168 + }, + "mldr_reranking": { + "ndcg@10": 0.9261211375496474 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.7958409152797974 + }, "jagovfaqs_22k": { "ndcg@10": 0.640150048193537 }, "jaqket": { "ndcg@10": 0.3394304922804131 }, + "mintaka_retrieval": { + "ndcg@10": 0.3243993062339528 + }, + "miracl_retrieval": { + "ndcg@10": 0.4844750116221409 + }, + "mldr_retrieval": { + "ndcg@10": 0.35067885909631535 + }, "mrtydi": { "ndcg@10": 0.2002984123046011 }, + "nlp_journal_abs_article": { + "ndcg@10": 0.8583248954344459 + }, "nlp_journal_abs_intro": { "ndcg@10": 0.9846617848570168 }, @@ -52,11 +88,9 @@ }, "mewsc16": { "v_measure_score": 0.4755374215259236 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6227417640807651 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.44591888262353296 } } } \ No newline at end of file diff --git a/docs/results/OpenAI/text-embedding-ada-002/summary.json b/docs/results/OpenAI/text-embedding-ada-002/summary.json index 3523d7d..851c798 100644 --- a/docs/results/OpenAI/text-embedding-ada-002/summary.json +++ b/docs/results/OpenAI/text-embedding-ada-002/summary.json @@ -6,28 +6,64 @@ "amazon_review_classification": { "macro_f1": 0.5312953134953877 }, + "japanese_sentiment_classification": { + "macro_f1": 0.8876337189807528 + }, "massive_intent_classification": { "macro_f1": 0.7457150118928685 }, "massive_scenario_classification": { "macro_f1": 0.8689044829586676 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8039306302437722 + }, + "wrime_classification": { + "macro_f1": 0.3757375090991345 } }, "Reranking": { "esci": { "ndcg@10": 0.9303611831749345 + }, + "jacwir_reranking": { + "ndcg@10": 0.8391440408595291 + }, + "jqara": { + "ndcg@10": 0.37540986441296365 + }, + "miracl_reranking": { + "ndcg@10": 0.7282642345185789 + }, + "mldr_reranking": { + "ndcg@10": 0.9082852722613336 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.7807563383397835 + }, "jagovfaqs_22k": { "ndcg@10": 0.6102270226904314 }, "jaqket": { "ndcg@10": 0.4256467956806472 }, + "mintaka_retrieval": { + "ndcg@10": 0.27093020670377677 + }, "mrtydi": { "ndcg@10": 0.1450739420851161 }, + "miracl_retrieval": { + "ndcg@10": 0.3453600176817199 + }, + "mldr_retrieval": { + "ndcg@10": 0.3189777971587629 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.9750618854208265 + }, "nlp_journal_abs_intro": { "ndcg@10": 0.9499224324391132 }, @@ -52,11 +88,9 @@ }, "mewsc16": { "v_measure_score": 0.4691938182964486 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6239830208701805 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.49744578060685957 } } } \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-base-v2/summary.json b/docs/results/cl-nagoya/ruri-base-v2/summary.json new file mode 100644 index 0000000..c090ce8 --- /dev/null +++ b/docs/results/cl-nagoya/ruri-base-v2/summary.json @@ -0,0 +1,96 @@ +{ + "Classification": { + "amazon_counterfactual_classification": { + "macro_f1": 0.7597182825660609 + }, + "amazon_review_classification": { + "macro_f1": 0.5554544939941979 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9235657959062215 + }, + "massive_intent_classification": { + "macro_f1": 0.8092593406289539 + }, + "massive_scenario_classification": { + "macro_f1": 0.8886710878440421 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8926416828413609 + }, + "wrime_classification": { + "macro_f1": 0.461674192977988 + } + }, + "Reranking": { + "esci": { + "ndcg@10": 0.9317155624145913 + }, + "jacwir_reranking": { + "ndcg@10": 0.8576025511447865 + }, + "jqara": { + "ndcg@10": 0.6066458919871698 + }, + "miracl_reranking": { + "ndcg@10": 0.842561072326263 + }, + "mldr_reranking": { + "ndcg@10": 0.8846847676615118 + } + }, + "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8101096413526069 + }, + "jagovfaqs_22k": { + "ndcg@10": 0.7590325308586044 + }, + "jaqket": { + "ndcg@10": 0.5700921243106366 + }, + "mintaka_retrieval": { + "ndcg@10": 0.4417665675636218 + }, + "miracl_retrieval": { + "ndcg@10": 0.6821942595823656 + }, + "mldr_retrieval": { + "ndcg@10": 0.3773323411085737 + }, + "mrtydi": { + "ndcg@10": 0.4088554217076187 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.8805294567802572 + }, + "nlp_journal_abs_intro": { + "ndcg@10": 0.8973083823806287 + }, + "nlp_journal_title_abs": { + "ndcg@10": 0.9696059096853805 + }, + "nlp_journal_title_intro": { + "ndcg@10": 0.789314612552914 + } + }, + "STS": { + "jsick": { + "spearman": 0.8262585834114126 + }, + "jsts": { + "spearman": 0.8343314248100878 + } + }, + "Clustering": { + "livedoor_news": { + "v_measure_score": 0.5437561090974637 + }, + "mewsc16": { + "v_measure_score": 0.5060934807171409 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.3553392136864812 + } + } +} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-base/summary.json b/docs/results/cl-nagoya/ruri-base/summary.json index a7c7b05..591ccd2 100644 --- a/docs/results/cl-nagoya/ruri-base/summary.json +++ b/docs/results/cl-nagoya/ruri-base/summary.json @@ -4,59 +4,93 @@ "macro_f1": 0.7665550732749669 }, "amazon_review_classification": { - "macro_f1": 0.5575876111411316 + "macro_f1": 0.5602315794414631 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.916854859845768 }, "massive_intent_classification": { - "macro_f1": 0.8141210121425055 + "macro_f1": 0.8122217429688374 }, "massive_scenario_classification": { - "macro_f1": 0.8848812917656395 + "macro_f1": 0.8861454528496383 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8773434580133629 + }, + "wrime_classification": { + "macro_f1": 0.4546702469392619 } }, "Reranking": { "esci": { - "ndcg@10": 0.9290942178703699 + "ndcg@10": 0.9291919623555276 + }, + "jacwir_reranking": { + "ndcg@10": 0.8723926273423869 + }, + "jqara": { + "ndcg@10": 0.5415330056104515 + }, + "miracl_reranking": { + "ndcg@10": 0.7921821114257664 + }, + "mldr_reranking": { + "ndcg@10": 0.8801076117078023 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8247892121220626 + }, "jagovfaqs_22k": { - "ndcg@10": 0.7455660589538348 + "ndcg@10": 0.7550451217031677 }, "jaqket": { - "ndcg@10": 0.5012253145754781 + "ndcg@10": 0.5023277717264268 + }, + "mintaka_retrieval": { + "ndcg@10": 0.45371270319906437 + }, + "miracl_retrieval": { + "ndcg@10": 0.5488453168704391 + }, + "mldr_retrieval": { + "ndcg@10": 0.35421737773497164 }, "mrtydi": { - "ndcg@10": 0.3545113073009125 + "ndcg@10": 0.3558845666232437 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.8664858820958761 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.8689204088388403 + "ndcg@10": 0.8723253192804757 }, "nlp_journal_title_abs": { - "ndcg@10": 0.9656989703684407 + "ndcg@10": 0.952690372948545 }, "nlp_journal_title_intro": { - "ndcg@10": 0.7531306059721564 + "ndcg@10": 0.7624967518065642 } }, "STS": { "jsick": { - "spearman": 0.8231772134744029 + "spearman": 0.8232158602892652 }, "jsts": { - "spearman": 0.8342848039994751 + "spearman": 0.8343499347567392 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5427223607801758 + "v_measure_score": 0.5669485444435229 }, "mewsc16": { - "v_measure_score": 0.5404099864321413 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6237623762376238 + "v_measure_score": 0.5205022529269108 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.3854934527391879 } } } \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-large-v2/summary.json b/docs/results/cl-nagoya/ruri-large-v2/summary.json new file mode 100644 index 0000000..e4a22b7 --- /dev/null +++ b/docs/results/cl-nagoya/ruri-large-v2/summary.json @@ -0,0 +1,96 @@ +{ + "Classification": { + "amazon_counterfactual_classification": { + "macro_f1": 0.7950890220234579 + }, + "amazon_review_classification": { + "macro_f1": 0.5708906806011181 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.935661827685557 + }, + "massive_intent_classification": { + "macro_f1": 0.8087242075730218 + }, + "massive_scenario_classification": { + "macro_f1": 0.8970775785938794 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8471804883814585 + }, + "wrime_classification": { + "macro_f1": 0.47233151152826275 + } + }, + "Reranking": { + "esci": { + "ndcg@10": 0.9321133927024134 + }, + "jacwir_reranking": { + "ndcg@10": 0.8529056816630052 + }, + "jqara": { + "ndcg@10": 0.644692559122629 + }, + "miracl_reranking": { + "ndcg@10": 0.857799148388121 + }, + "mldr_reranking": { + "ndcg@10": 0.9068464851749977 + } + }, + "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8048616669652183 + }, + "jagovfaqs_22k": { + "ndcg@10": 0.7822527313926262 + }, + "jaqket": { + "ndcg@10": 0.6561070613824674 + }, + "mintaka_retrieval": { + "ndcg@10": 0.5040548535978852 + }, + "miracl_retrieval": { + "ndcg@10": 0.7046000072363299 + }, + "mldr_retrieval": { + "ndcg@10": 0.36969618230893564 + }, + "mrtydi": { + "ndcg@10": 0.4636780745156557 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.9085158509835447 + }, + "nlp_journal_abs_intro": { + "ndcg@10": 0.9114732359476821 + }, + "nlp_journal_title_abs": { + "ndcg@10": 0.977434890774318 + }, + "nlp_journal_title_intro": { + "ndcg@10": 0.8232131912662143 + } + }, + "STS": { + "jsick": { + "spearman": 0.8212250726981067 + }, + "jsts": { + "spearman": 0.8424300570470996 + } + }, + "Clustering": { + "livedoor_news": { + "v_measure_score": 0.5562089376369613 + }, + "mewsc16": { + "v_measure_score": 0.509675337301281 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.4605817648504685 + } + } +} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-large/summary.json b/docs/results/cl-nagoya/ruri-large/summary.json index e86c46b..2e2cead 100644 --- a/docs/results/cl-nagoya/ruri-large/summary.json +++ b/docs/results/cl-nagoya/ruri-large/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.8080806321853091 + "macro_f1": 0.7950391460082398 }, "amazon_review_classification": { - "macro_f1": 0.5680171450057119 + "macro_f1": 0.5685184036314727 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9356380708493385 }, "massive_intent_classification": { - "macro_f1": 0.8255898596881264 + "macro_f1": 0.8209962603450597 }, "massive_scenario_classification": { - "macro_f1": 0.8956410349938264 + "macro_f1": 0.9002551808707712 + }, + "sib200_japanese_classification": { + "macro_f1": 0.852564312646895 + }, + "wrime_classification": { + "macro_f1": 0.46447181564392015 } }, "Reranking": { "esci": { - "ndcg@10": 0.9298524733536755 + "ndcg@10": 0.9298778327436324 + }, + "jacwir_reranking": { + "ndcg@10": 0.8661076138203823 + }, + "jqara": { + "ndcg@10": 0.5958950681984889 + }, + "miracl_reranking": { + "ndcg@10": 0.8022791978749706 + }, + "mldr_reranking": { + "ndcg@10": 0.8690504682983363 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8169123630823522 + }, "jagovfaqs_22k": { - "ndcg@10": 0.7667506664925435 + "ndcg@10": 0.7763829985024149 }, "jaqket": { - "ndcg@10": 0.6173871224245404 + "ndcg@10": 0.617343261611166 + }, + "mintaka_retrieval": { + "ndcg@10": 0.5106450721691843 + }, + "miracl_retrieval": { + "ndcg@10": 0.5547009159538185 + }, + "mldr_retrieval": { + "ndcg@10": 0.3476835812045506 }, "mrtydi": { - "ndcg@10": 0.3803302462897418 + "ndcg@10": 0.38120908812619875 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.8652992529882778 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.8712459719069233 + "ndcg@10": 0.8891161860918603 }, "nlp_journal_title_abs": { - "ndcg@10": 0.9657898747088243 + "ndcg@10": 0.9617411892426375 }, "nlp_journal_title_intro": { - "ndcg@10": 0.779665053945222 + "ndcg@10": 0.7922108957487803 } }, "STS": { "jsick": { - "spearman": 0.8199959693684533 + "spearman": 0.8199569498182433 }, "jsts": { - "spearman": 0.8426164139167538 + "spearman": 0.8426241685487486 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5139491572866559 + "v_measure_score": 0.5443732953428371 }, "mewsc16": { - "v_measure_score": 0.5225025331595674 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6228813559322034 + "v_measure_score": 0.5058998835740889 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.44757212682292163 } } } \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-small-v2/summary.json b/docs/results/cl-nagoya/ruri-small-v2/summary.json new file mode 100644 index 0000000..eec64ee --- /dev/null +++ b/docs/results/cl-nagoya/ruri-small-v2/summary.json @@ -0,0 +1,96 @@ +{ + "Classification": { + "amazon_counterfactual_classification": { + "macro_f1": 0.7767065011282246 + }, + "amazon_review_classification": { + "macro_f1": 0.5559888936165459 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8863640825159859 + }, + "massive_intent_classification": { + "macro_f1": 0.8199647165894474 + }, + "massive_scenario_classification": { + "macro_f1": 0.8816435555944846 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8156946375922746 + }, + "wrime_classification": { + "macro_f1": 0.452255956789983 + } + }, + "Reranking": { + "esci": { + "ndcg@10": 0.9320364061675573 + }, + "jacwir_reranking": { + "ndcg@10": 0.8818198634914105 + }, + "jqara": { + "ndcg@10": 0.5670420631375501 + }, + "miracl_reranking": { + "ndcg@10": 0.8332825788093644 + }, + "mldr_reranking": { + "ndcg@10": 0.9009377977029078 + } + }, + "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8303842720270221 + }, + "jagovfaqs_22k": { + "ndcg@10": 0.7401670430071696 + }, + "jaqket": { + "ndcg@10": 0.6225429070303006 + }, + "mintaka_retrieval": { + "ndcg@10": 0.3530718504041533 + }, + "miracl_retrieval": { + "ndcg@10": 0.6689773236918534 + }, + "mldr_retrieval": { + "ndcg@10": 0.32577528652704146 + }, + "mrtydi": { + "ndcg@10": 0.42400768916861914 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.9064650891678154 + }, + "nlp_journal_abs_intro": { + "ndcg@10": 0.9041671364705328 + }, + "nlp_journal_title_abs": { + "ndcg@10": 0.9729556994161748 + }, + "nlp_journal_title_intro": { + "ndcg@10": 0.7821156819492701 + } + }, + "STS": { + "jsick": { + "spearman": 0.8387675357095226 + }, + "jsts": { + "spearman": 0.8193470885317312 + } + }, + "Clustering": { + "livedoor_news": { + "v_measure_score": 0.5260577746749562 + }, + "mewsc16": { + "v_measure_score": 0.4947076915300828 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.47820319421479446 + } + } +} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-small/summary.json b/docs/results/cl-nagoya/ruri-small/summary.json index cb591ea..079db3e 100644 --- a/docs/results/cl-nagoya/ruri-small/summary.json +++ b/docs/results/cl-nagoya/ruri-small/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7991935990685706 + "macro_f1": 0.8055421233612723 }, "amazon_review_classification": { - "macro_f1": 0.556129066893332 + "macro_f1": 0.5541385299441624 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8885932202820669 }, "massive_intent_classification": { - "macro_f1": 0.8148895285345188 + "macro_f1": 0.8108237159349728 }, "massive_scenario_classification": { - "macro_f1": 0.8787774569382543 + "macro_f1": 0.8800077744996155 + }, + "sib200_japanese_classification": { + "macro_f1": 0.839667353042202 + }, + "wrime_classification": { + "macro_f1": 0.4595261443020403 } }, "Reranking": { "esci": { - "ndcg@10": 0.9300177985352138 + "ndcg@10": 0.9301438020851305 + }, + "jacwir_reranking": { + "ndcg@10": 0.8766726074179287 + }, + "jqara": { + "ndcg@10": 0.5325863556709908 + }, + "miracl_reranking": { + "ndcg@10": 0.7783787989685144 + }, + "mldr_reranking": { + "ndcg@10": 0.8813650067339368 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.825837748200516 + }, "jagovfaqs_22k": { - "ndcg@10": 0.736494039429321 + "ndcg@10": 0.740126693753929 }, "jaqket": { - "ndcg@10": 0.484437639428696 + "ndcg@10": 0.4844203596195783 + }, + "mintaka_retrieval": { + "ndcg@10": 0.3723496207549938 + }, + "miracl_retrieval": { + "ndcg@10": 0.5222032466588368 + }, + "mldr_retrieval": { + "ndcg@10": 0.2898890422890513 }, "mrtydi": { - "ndcg@10": 0.3342716158897666 + "ndcg@10": 0.3351374258570715 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.8689213841203763 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.8768878489670099 + "ndcg@10": 0.8723259697162892 }, "nlp_journal_title_abs": { - "ndcg@10": 0.9716879343439146 + "ndcg@10": 0.9619567235021281 }, "nlp_journal_title_intro": { - "ndcg@10": 0.7608660955794895 + "ndcg@10": 0.7608782792491423 } }, "STS": { "jsick": { - "spearman": 0.8343927017558587 + "spearman": 0.8344934497771457 }, "jsts": { - "spearman": 0.8213297790184827 + "spearman": 0.8213145808052514 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5096442244018489 + "v_measure_score": 0.5289736036070719 }, "mewsc16": { - "v_measure_score": 0.5141045788711239 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6211267605633802 + "v_measure_score": 0.4936801242208388 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.46507426407220503 } } } \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-v3-130m/summary.json b/docs/results/cl-nagoya/ruri-v3-130m/summary.json new file mode 100644 index 0000000..5700f32 --- /dev/null +++ b/docs/results/cl-nagoya/ruri-v3-130m/summary.json @@ -0,0 +1,96 @@ +{ + "Classification": { + "amazon_counterfactual_classification": { + "macro_f1": 0.7674793827265536 + }, + "amazon_review_classification": { + "macro_f1": 0.5955994619477079 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9500285886600925 + }, + "massive_intent_classification": { + "macro_f1": 0.807938642045445 + }, + "massive_scenario_classification": { + "macro_f1": 0.8790346026671575 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8287806075978352 + }, + "wrime_classification": { + "macro_f1": 0.46634901067800855 + } + }, + "Reranking": { + "esci": { + "ndcg@10": 0.9336981049156847 + }, + "jacwir_reranking": { + "ndcg@10": 0.8864670177419038 + }, + "jqara": { + "ndcg@10": 0.663018840039673 + }, + "miracl_reranking": { + "ndcg@10": 0.865876689917921 + }, + "mldr_reranking": { + "ndcg@10": 0.9362058245511219 + } + }, + "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8421113535976967 + }, + "jagovfaqs_22k": { + "ndcg@10": 0.7532393338902414 + }, + "jaqket": { + "ndcg@10": 0.730979460582779 + }, + "mintaka_retrieval": { + "ndcg@10": 0.5177034569356731 + }, + "miracl_retrieval": { + "ndcg@10": 0.7100959869376436 + }, + "mldr_retrieval": { + "ndcg@10": 0.45158335316076936 + }, + "mrtydi": { + "ndcg@10": 0.4780012151028164 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.995144547086835 + }, + "nlp_journal_abs_intro": { + "ndcg@10": 0.9887952520028016 + }, + "nlp_journal_title_abs": { + "ndcg@10": 0.9795152116360624 + }, + "nlp_journal_title_intro": { + "ndcg@10": 0.9628103840588119 + } + }, + "STS": { + "jsick": { + "spearman": 0.7885956280300046 + }, + "jsts": { + "spearman": 0.8323603869543141 + } + }, + "Clustering": { + "livedoor_news": { + "v_measure_score": 0.5436288048604071 + }, + "mewsc16": { + "v_measure_score": 0.4883532965483729 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.5019988844015973 + } + } +} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-v3-30m/summary.json b/docs/results/cl-nagoya/ruri-v3-30m/summary.json new file mode 100644 index 0000000..c4e768a --- /dev/null +++ b/docs/results/cl-nagoya/ruri-v3-30m/summary.json @@ -0,0 +1,96 @@ +{ + "Classification": { + "amazon_counterfactual_classification": { + "macro_f1": 0.7559571782387728 + }, + "amazon_review_classification": { + "macro_f1": 0.5570789457429248 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9262839486939813 + }, + "massive_intent_classification": { + "macro_f1": 0.783074979041957 + }, + "massive_scenario_classification": { + "macro_f1": 0.8672396605716526 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8140481078951145 + }, + "wrime_classification": { + "macro_f1": 0.4311261750368354 + } + }, + "Reranking": { + "esci": { + "ndcg@10": 0.9305651903486406 + }, + "jacwir_reranking": { + "ndcg@10": 0.8761294751423317 + }, + "jqara": { + "ndcg@10": 0.5747490185208084 + }, + "miracl_reranking": { + "ndcg@10": 0.8352458113588647 + }, + "mldr_reranking": { + "ndcg@10": 0.9297421530365237 + } + }, + "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.827028266156452 + }, + "jagovfaqs_22k": { + "ndcg@10": 0.7020872105862214 + }, + "jaqket": { + "ndcg@10": 0.6244733500896729 + }, + "mintaka_retrieval": { + "ndcg@10": 0.4304756847175998 + }, + "miracl_retrieval": { + "ndcg@10": 0.6498916988979277 + }, + "mldr_retrieval": { + "ndcg@10": 0.4577076048703079 + }, + "mrtydi": { + "ndcg@10": 0.41775750844113785 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.9876046427100846 + }, + "nlp_journal_abs_intro": { + "ndcg@10": 0.9916030162169887 + }, + "nlp_journal_title_abs": { + "ndcg@10": 0.9699245797579602 + }, + "nlp_journal_title_intro": { + "ndcg@10": 0.9534027111106339 + } + }, + "STS": { + "jsick": { + "spearman": 0.8161946935797372 + }, + "jsts": { + "spearman": 0.819463211043541 + } + }, + "Clustering": { + "livedoor_news": { + "v_measure_score": 0.5369067977199252 + }, + "mewsc16": { + "v_measure_score": 0.47961175798341066 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.4804316290090649 + } + } +} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-v3-310m/summary.json b/docs/results/cl-nagoya/ruri-v3-310m/summary.json new file mode 100644 index 0000000..c27fed8 --- /dev/null +++ b/docs/results/cl-nagoya/ruri-v3-310m/summary.json @@ -0,0 +1,96 @@ +{ + "Classification": { + "amazon_counterfactual_classification": { + "macro_f1": 0.8009270010529765 + }, + "amazon_review_classification": { + "macro_f1": 0.6071898527482484 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9530657500380437 + }, + "massive_intent_classification": { + "macro_f1": 0.8176293812793415 + }, + "massive_scenario_classification": { + "macro_f1": 0.890051922198645 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8812655271153628 + }, + "wrime_classification": { + "macro_f1": 0.4852854023445756 + } + }, + "Reranking": { + "esci": { + "ndcg@10": 0.9342725351989479 + }, + "jacwir_reranking": { + "ndcg@10": 0.8845859005757672 + }, + "jqara": { + "ndcg@10": 0.6893206802955604 + }, + "miracl_reranking": { + "ndcg@10": 0.8500853284469898 + }, + "mldr_reranking": { + "ndcg@10": 0.9335769070370818 + } + }, + "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8406411130636801 + }, + "jagovfaqs_22k": { + "ndcg@10": 0.7648595155366429 + }, + "jaqket": { + "ndcg@10": 0.7186721885111346 + }, + "mintaka_retrieval": { + "ndcg@10": 0.5225348075920366 + }, + "miracl_retrieval": { + "ndcg@10": 0.677145342243983 + }, + "mldr_retrieval": { + "ndcg@10": 0.43425275955863796 + }, + "mrtydi": { + "ndcg@10": 0.47064490316120666 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.9958682142366949 + }, + "nlp_journal_abs_intro": { + "ndcg@10": 0.9935172926595653 + }, + "nlp_journal_title_abs": { + "ndcg@10": 0.9790717306095701 + }, + "nlp_journal_title_intro": { + "ndcg@10": 0.9658294271714906 + } + }, + "STS": { + "jsick": { + "spearman": 0.7886332339318622 + }, + "jsts": { + "spearman": 0.8430847366018317 + } + }, + "Clustering": { + "livedoor_news": { + "v_measure_score": 0.5855988614657296 + }, + "mewsc16": { + "v_measure_score": 0.4860478393120035 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.4440626045366051 + } + } +} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-v3-70m/summary.json b/docs/results/cl-nagoya/ruri-v3-70m/summary.json new file mode 100644 index 0000000..3a2c52d --- /dev/null +++ b/docs/results/cl-nagoya/ruri-v3-70m/summary.json @@ -0,0 +1,96 @@ +{ + "Classification": { + "amazon_counterfactual_classification": { + "macro_f1": 0.8180877928218353 + }, + "amazon_review_classification": { + "macro_f1": 0.5798379850008339 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9339140455312027 + }, + "massive_intent_classification": { + "macro_f1": 0.7891754112354649 + }, + "massive_scenario_classification": { + "macro_f1": 0.8782518076402043 + }, + "sib200_japanese_classification": { + "macro_f1": 0.7686616284901401 + }, + "wrime_classification": { + "macro_f1": 0.4437562280187194 + } + }, + "Reranking": { + "esci": { + "ndcg@10": 0.9320237969329785 + }, + "jacwir_reranking": { + "ndcg@10": 0.8748197118530385 + }, + "jqara": { + "ndcg@10": 0.6309432249818713 + }, + "miracl_reranking": { + "ndcg@10": 0.8503057292439823 + }, + "mldr_reranking": { + "ndcg@10": 0.9225778620264797 + } + }, + "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8275893500639571 + }, + "jagovfaqs_22k": { + "ndcg@10": 0.7327144021448485 + }, + "jaqket": { + "ndcg@10": 0.6768047159335538 + }, + "mintaka_retrieval": { + "ndcg@10": 0.4626106409683068 + }, + "miracl_retrieval": { + "ndcg@10": 0.6797764462851262 + }, + "mldr_retrieval": { + "ndcg@10": 0.43554376517918675 + }, + "mrtydi": { + "ndcg@10": 0.4499999994407917 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.984966699117648 + }, + "nlp_journal_abs_intro": { + "ndcg@10": 0.9868218521221748 + }, + "nlp_journal_title_abs": { + "ndcg@10": 0.9706955197203543 + }, + "nlp_journal_title_intro": { + "ndcg@10": 0.9573354583951488 + } + }, + "STS": { + "jsick": { + "spearman": 0.7909930894957667 + }, + "jsts": { + "spearman": 0.828242284804404 + } + }, + "Clustering": { + "livedoor_news": { + "v_measure_score": 0.5492094636693866 + }, + "mewsc16": { + "v_measure_score": 0.47739615416643866 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.4719940146272088 + } + } +} \ No newline at end of file diff --git a/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json b/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json index 42cc5ff..91d272c 100644 --- a/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json +++ b/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7234436301724776 + "macro_f1": 0.7192545517004465 }, "amazon_review_classification": { - "macro_f1": 0.5441445333270086 + "macro_f1": 0.5454422812215437 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9100588500656168 }, "massive_intent_classification": { - "macro_f1": 0.7951973953020242 + "macro_f1": 0.8011172170046241 }, "massive_scenario_classification": { - "macro_f1": 0.8760200177186923 + "macro_f1": 0.8762609424720998 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8191722798191963 + }, + "wrime_classification": { + "macro_f1": 0.4188203301151871 } }, "Reranking": { "esci": { - "ndcg@10": 0.9183455876236017 + "ndcg@10": 0.9184207070049463 + }, + "jacwir_reranking": { + "ndcg@10": 0.6426611140199804 + }, + "jqara": { + "ndcg@10": 0.3748362133870952 + }, + "miracl_reranking": { + "ndcg@10": 0.7087840971938433 + }, + "mldr_reranking": { + "ndcg@10": 0.8734013475096433 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.5331630522529377 + }, "jagovfaqs_22k": { - "ndcg@10": 0.5161990612242935 + "ndcg@10": 0.5202480516932524 }, "jaqket": { - "ndcg@10": 0.5024513438428565 + "ndcg@10": 0.5013089667314551 + }, + "mintaka_retrieval": { + "ndcg@10": 0.3288294149496304 + }, + "miracl_retrieval": { + "ndcg@10": 0.20681341934572967 + }, + "mldr_retrieval": { + "ndcg@10": 0.24700329716018354 }, "mrtydi": { - "ndcg@10": 0.13976323269046823 + "ndcg@10": 0.141360680613414 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.6909104560170936 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.6807886421530585 + "ndcg@10": 0.6619434888289687 }, "nlp_journal_title_abs": { - "ndcg@10": 0.6570889175649209 + "ndcg@10": 0.6484407439307039 }, "nlp_journal_title_intro": { - "ndcg@10": 0.48219159577174137 + "ndcg@10": 0.4696725603511326 } }, "STS": { "jsick": { - "spearman": 0.8282816229512862 + "spearman": 0.8283659349049672 }, "jsts": { - "spearman": 0.8127259236647225 + "spearman": 0.8126484380435667 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5266774168531417 + "v_measure_score": 0.5511252826598367 }, "mewsc16": { - "v_measure_score": 0.5091016872016825 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6256665481692143 + "v_measure_score": 0.5339141639252604 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.49207894013578146 } } } \ No newline at end of file diff --git a/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json b/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json index a2d8924..c2b5a3e 100644 --- a/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json +++ b/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7321444865928852 + "macro_f1": 0.7260568612881779 }, "amazon_review_classification": { - "macro_f1": 0.5475800661400465 + "macro_f1": 0.5455832826466495 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8942024454984163 }, "massive_intent_classification": { - "macro_f1": 0.7922802742146243 + "macro_f1": 0.792273118014186 }, "massive_scenario_classification": { - "macro_f1": 0.8772172454209797 + "macro_f1": 0.8770657195206764 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8042709569831964 + }, + "wrime_classification": { + "macro_f1": 0.4525777476393026 } }, "Reranking": { "esci": { - "ndcg@10": 0.9148471751378899 + "ndcg@10": 0.9149640515619839 + }, + "jacwir_reranking": { + "ndcg@10": 0.5614550878114778 + }, + "jqara": { + "ndcg@10": 0.38302855218604437 + }, + "miracl_reranking": { + "ndcg@10": 0.7126433285790728 + }, + "mldr_reranking": { + "ndcg@10": 0.8659821811381412 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.4370774500135088 + }, "jagovfaqs_22k": { - "ndcg@10": 0.4683673504170269 + "ndcg@10": 0.47421467281855384 }, "jaqket": { - "ndcg@10": 0.39878189118804513 + "ndcg@10": 0.4004385277719307 + }, + "mintaka_retrieval": { + "ndcg@10": 0.376774984849213 + }, + "miracl_retrieval": { + "ndcg@10": 0.18125969161337505 + }, + "mldr_retrieval": { + "ndcg@10": 0.23480755788261093 }, "mrtydi": { - "ndcg@10": 0.11834919561027905 + "ndcg@10": 0.1188048690188868 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.6407825080386719 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.634254459552888 + "ndcg@10": 0.6295135121177772 }, "nlp_journal_title_abs": { - "ndcg@10": 0.37927566884615427 + "ndcg@10": 0.36949537039923136 }, "nlp_journal_title_intro": { - "ndcg@10": 0.25787534957423713 + "ndcg@10": 0.2490316613470849 } }, "STS": { "jsick": { - "spearman": 0.837959537101532 + "spearman": 0.8377753687267541 }, "jsts": { - "spearman": 0.825691902117111 + "spearman": 0.8256006176068381 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5074967876488787 + "v_measure_score": 0.5337915256082275 }, "mewsc16": { - "v_measure_score": 0.503782014677764 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6250885896527285 + "v_measure_score": 0.5125821768154618 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.45736658859438273 } } } \ No newline at end of file diff --git a/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json b/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json index 3863c9e..2cc2181 100644 --- a/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json +++ b/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7330185800774036 + "macro_f1": 0.7364790582283407 }, "amazon_review_classification": { - "macro_f1": 0.5392887528271114 + "macro_f1": 0.5413541626836352 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8986588956343088 }, "massive_intent_classification": { - "macro_f1": 0.7907120296283751 + "macro_f1": 0.7767897385750657 }, "massive_scenario_classification": { - "macro_f1": 0.8597097942715117 + "macro_f1": 0.8610390686035142 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8413013579577491 + }, + "wrime_classification": { + "macro_f1": 0.41309966752995253 } }, "Reranking": { "esci": { - "ndcg@10": 0.9115668272308735 + "ndcg@10": 0.9117818311636607 + }, + "jacwir_reranking": { + "ndcg@10": 0.5154239181007129 + }, + "jqara": { + "ndcg@10": 0.3218696921394324 + }, + "miracl_reranking": { + "ndcg@10": 0.6995597032253587 + }, + "mldr_reranking": { + "ndcg@10": 0.8612256071032377 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.35106925427500363 + }, "jagovfaqs_22k": { - "ndcg@10": 0.46003459081522513 + "ndcg@10": 0.4673719618749888 }, "jaqket": { - "ndcg@10": 0.3945725593125862 + "ndcg@10": 0.3951670829019162 + }, + "mintaka_retrieval": { + "ndcg@10": 0.299231152726057 + }, + "miracl_retrieval": { + "ndcg@10": 0.10934136213023636 + }, + "mldr_retrieval": { + "ndcg@10": 0.15981611825721914 }, "mrtydi": { - "ndcg@10": 0.055507775092798486 + "ndcg@10": 0.055133639963568334 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.582165240647806 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.6025847751308843 + "ndcg@10": 0.5841104498413489 }, "nlp_journal_title_abs": { - "ndcg@10": 0.5562839869857912 + "ndcg@10": 0.55577879846708 }, "nlp_journal_title_intro": { - "ndcg@10": 0.3449181162324482 + "ndcg@10": 0.3284050897756761 } }, "STS": { "jsick": { - "spearman": 0.7849379492955117 + "spearman": 0.7852600594448598 }, "jsts": { - "spearman": 0.7894946592483818 + "spearman": 0.7894496424482047 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5223347838445698 + "v_measure_score": 0.5065452260003059 }, "mewsc16": { - "v_measure_score": 0.37310458219601117 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.624424778761062 + "v_measure_score": 0.39578933501406055 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.3362930091678794 } } } \ No newline at end of file diff --git a/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json b/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json index d37618a..09525c9 100644 --- a/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json +++ b/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.767905114979583 + "macro_f1": 0.7640316468319925 }, "amazon_review_classification": { - "macro_f1": 0.5537089641846143 + "macro_f1": 0.5504736753163985 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9057099704855596 }, "massive_intent_classification": { - "macro_f1": 0.7912698845073401 + "macro_f1": 0.792495956569193 }, "massive_scenario_classification": { - "macro_f1": 0.8736185210672394 + "macro_f1": 0.8749858164207054 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8288719236604842 + }, + "wrime_classification": { + "macro_f1": 0.44326523397693174 } }, "Reranking": { "esci": { - "ndcg@10": 0.9095494729022622 + "ndcg@10": 0.9094836571513687 + }, + "jacwir_reranking": { + "ndcg@10": 0.5417192948613557 + }, + "jqara": { + "ndcg@10": 0.3877939946491903 + }, + "miracl_reranking": { + "ndcg@10": 0.7001887861606321 + }, + "mldr_reranking": { + "ndcg@10": 0.8303617273610736 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.37613574135010835 + }, "jagovfaqs_22k": { - "ndcg@10": 0.4509073581555124 + "ndcg@10": 0.46564010373437337 }, "jaqket": { - "ndcg@10": 0.34595043675331943 + "ndcg@10": 0.3452888488420233 + }, + "mintaka_retrieval": { + "ndcg@10": 0.3058130510308383 + }, + "miracl_retrieval": { + "ndcg@10": 0.10326154138228141 + }, + "mldr_retrieval": { + "ndcg@10": 0.12550430031143336 }, "mrtydi": { - "ndcg@10": 0.05750859876901772 + "ndcg@10": 0.057502989435967655 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.504469050615059 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.550742021417855 + "ndcg@10": 0.5069650402920987 }, "nlp_journal_title_abs": { - "ndcg@10": 0.6307172007359215 + "ndcg@10": 0.6043158227609278 }, "nlp_journal_title_intro": { - "ndcg@10": 0.39612451822677164 + "ndcg@10": 0.34323430832579677 } }, "STS": { "jsick": { - "spearman": 0.8014979086154339 + "spearman": 0.8013849170804103 }, "jsts": { - "spearman": 0.8097685749017456 + "spearman": 0.809789575264219 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5090447587797094 + "v_measure_score": 0.5147732775967515 }, "mewsc16": { - "v_measure_score": 0.4591920015613856 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6248671625929861 + "v_measure_score": 0.44443267597570074 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.34646662604886447 } } } \ No newline at end of file diff --git a/docs/results/colorfulscoop/sbert-base-ja/summary.json b/docs/results/colorfulscoop/sbert-base-ja/summary.json index 2a08044..91ef6aa 100644 --- a/docs/results/colorfulscoop/sbert-base-ja/summary.json +++ b/docs/results/colorfulscoop/sbert-base-ja/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7221023294352484 + "macro_f1": 0.7080315613053877 }, "amazon_review_classification": { - "macro_f1": 0.47952384496155054 + "macro_f1": 0.4779713813897666 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8350239953633378 }, "massive_intent_classification": { - "macro_f1": 0.725195343788811 + "macro_f1": 0.7288673932703351 }, "massive_scenario_classification": { - "macro_f1": 0.836177960542408 + "macro_f1": 0.8370655127879382 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8262660922438109 + }, + "wrime_classification": { + "macro_f1": 0.35057897749310646 } }, "Reranking": { "esci": { - "ndcg@10": 0.8997301146575819 + "ndcg@10": 0.8996866702578056 + }, + "jacwir_reranking": { + "ndcg@10": 0.37147215136686634 + }, + "jqara": { + "ndcg@10": 0.2220517076242275 + }, + "miracl_reranking": { + "ndcg@10": 0.6502702968219343 + }, + "mldr_reranking": { + "ndcg@10": 0.8255483571039144 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.192984468642645 + }, "jagovfaqs_22k": { - "ndcg@10": 0.21501915127957166 + "ndcg@10": 0.21704292684612675 }, "jaqket": { - "ndcg@10": 0.13161989528541293 + "ndcg@10": 0.13139887002144995 + }, + "mintaka_retrieval": { + "ndcg@10": 0.19067862146114167 + }, + "miracl_retrieval": { + "ndcg@10": 0.018598782450328283 + }, + "mldr_retrieval": { + "ndcg@10": 0.06972936265190934 }, "mrtydi": { - "ndcg@10": 0.00436010196904899 + "ndcg@10": 0.004126228941345733 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.29023294982669573 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.2878020264605714 + "ndcg@10": 0.2580237968832312 }, "nlp_journal_title_abs": { - "ndcg@10": 0.22397059858982324 + "ndcg@10": 0.21071404885072903 }, "nlp_journal_title_intro": { - "ndcg@10": 0.12815871897103842 + "ndcg@10": 0.11573741610386916 } }, "STS": { "jsick": { - "spearman": 0.6659298300713198 + "spearman": 0.6656074999372202 }, "jsts": { - "spearman": 0.7423952309826243 + "spearman": 0.7425444938991701 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.4298579019834722 + "v_measure_score": 0.4059869097583984 }, "mewsc16": { - "v_measure_score": 0.46641671645082333 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6231013776050865 + "v_measure_score": 0.4617625340860209 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.3035702180528845 } } } \ No newline at end of file diff --git a/docs/results/google/embeddinggemma-300m/summary.json b/docs/results/google/embeddinggemma-300m/summary.json new file mode 100644 index 0000000..1cbe1cd --- /dev/null +++ b/docs/results/google/embeddinggemma-300m/summary.json @@ -0,0 +1,96 @@ +{ + "Classification": { + "amazon_counterfactual_classification": { + "macro_f1": 0.7473788045121156 + }, + "amazon_review_classification": { + "macro_f1": 0.5803989931720487 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9598578035045773 + }, + "massive_intent_classification": { + "macro_f1": 0.8007123314267398 + }, + "massive_scenario_classification": { + "macro_f1": 0.9058457580997293 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8691524520966505 + }, + "wrime_classification": { + "macro_f1": 0.46617181157351545 + } + }, + "Reranking": { + "esci": { + "ndcg@10": 0.9325852428034396 + }, + "jacwir_reranking": { + "ndcg@10": 0.8672290139012463 + }, + "jqara": { + "ndcg@10": 0.5208735587352208 + }, + "miracl_reranking": { + "ndcg@10": 0.8237547981136122 + }, + "mldr_reranking": { + "ndcg@10": 0.9019285986799139 + } + }, + "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8107178459954021 + }, + "jagovfaqs_22k": { + "ndcg@10": 0.6942509653422283 + }, + "jaqket": { + "ndcg@10": 0.6326539731698172 + }, + "mintaka_retrieval": { + "ndcg@10": 0.38634126517980316 + }, + "miracl_retrieval": { + "ndcg@10": 0.3527982534428366 + }, + "mldr_retrieval": { + "ndcg@10": 0.34664273718176375 + }, + "mrtydi": { + "ndcg@10": 0.13863867175417482 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.9934404877801122 + }, + "nlp_journal_abs_intro": { + "ndcg@10": 0.9902425863025213 + }, + "nlp_journal_title_abs": { + "ndcg@10": 0.9611708983967426 + }, + "nlp_journal_title_intro": { + "ndcg@10": 0.9435055100669566 + } + }, + "STS": { + "jsick": { + "spearman": 0.8167115014804869 + }, + "jsts": { + "spearman": 0.8381005453815682 + } + }, + "Clustering": { + "livedoor_news": { + "v_measure_score": 0.553278169293011 + }, + "mewsc16": { + "v_measure_score": 0.5055377268682895 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.4254674919395097 + } + } +} \ No newline at end of file diff --git a/docs/results/hotchpotch/static-embedding-japanese/summary.json b/docs/results/hotchpotch/static-embedding-japanese/summary.json new file mode 100644 index 0000000..dea2123 --- /dev/null +++ b/docs/results/hotchpotch/static-embedding-japanese/summary.json @@ -0,0 +1,96 @@ +{ + "Classification": { + "amazon_counterfactual_classification": { + "macro_f1": 0.6806231003039513 + }, + "amazon_review_classification": { + "macro_f1": 0.46807443888459704 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.7982203591912549 + }, + "massive_intent_classification": { + "macro_f1": 0.7479207001300227 + }, + "massive_scenario_classification": { + "macro_f1": 0.8218342894775092 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8333478541030553 + }, + "wrime_classification": { + "macro_f1": 0.32116037890073806 + } + }, + "Reranking": { + "esci": { + "ndcg@10": 0.918697023137389 + }, + "jacwir_reranking": { + "ndcg@10": 0.8096474845962077 + }, + "jqara": { + "ndcg@10": 0.470607034824141 + }, + "miracl_reranking": { + "ndcg@10": 0.7201497903350694 + }, + "mldr_reranking": { + "ndcg@10": 0.9355298111228094 + } + }, + "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.7227068099625594 + }, + "jagovfaqs_22k": { + "ndcg@10": 0.5555106276533467 + }, + "jaqket": { + "ndcg@10": 0.6403798293637829 + }, + "mintaka_retrieval": { + "ndcg@10": 0.3893399585539267 + }, + "miracl_retrieval": { + "ndcg@10": 0.3261108514005591 + }, + "mldr_retrieval": { + "ndcg@10": 0.4251322740050699 + }, + "mrtydi": { + "ndcg@10": 0.1118466505474389 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.7618517724714088 + }, + "nlp_journal_abs_intro": { + "ndcg@10": 0.9573914637080742 + }, + "nlp_journal_title_abs": { + "ndcg@10": 0.9036776565067465 + }, + "nlp_journal_title_intro": { + "ndcg@10": 0.862455457223212 + } + }, + "STS": { + "jsick": { + "spearman": 0.8251124620732032 + }, + "jsts": { + "spearman": 0.7781260135980573 + } + }, + "Clustering": { + "livedoor_news": { + "v_measure_score": 0.5143752588371998 + }, + "mewsc16": { + "v_measure_score": 0.34814733829489664 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.21465115117004985 + } + } +} \ No newline at end of file diff --git a/docs/results/intfloat/multilingual-e5-base/summary.json b/docs/results/intfloat/multilingual-e5-base/summary.json index 96f9640..4d84be2 100644 --- a/docs/results/intfloat/multilingual-e5-base/summary.json +++ b/docs/results/intfloat/multilingual-e5-base/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.6367079139150691 + "macro_f1": 0.6428957534047911 }, "amazon_review_classification": { - "macro_f1": 0.5424265794470897 + "macro_f1": 0.5417258327796466 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9231910434886872 }, "massive_intent_classification": { - "macro_f1": 0.7277503514873049 + "macro_f1": 0.7318717264077053 }, "massive_scenario_classification": { - "macro_f1": 0.8652828949015864 + "macro_f1": 0.8677940980663801 + }, + "sib200_japanese_classification": { + "macro_f1": 0.785022714268383 + }, + "wrime_classification": { + "macro_f1": 0.3865061394465788 } }, "Reranking": { "esci": { - "ndcg@10": 0.9285060467194839 + "ndcg@10": 0.9290148108090969 + }, + "jacwir_reranking": { + "ndcg@10": 0.8865491934939191 + }, + "jqara": { + "ndcg@10": 0.4761308479065645 + }, + "miracl_reranking": { + "ndcg@10": 0.8196779545649944 + }, + "mldr_reranking": { + "ndcg@10": 0.8614612823139557 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8431602298737804 + }, "jagovfaqs_22k": { - "ndcg@10": 0.6534478396845428 + "ndcg@10": 0.687214041967885 }, "jaqket": { - "ndcg@10": 0.5067444792013236 + "ndcg@10": 0.5169392915456349 + }, + "mintaka_retrieval": { + "ndcg@10": 0.34676383987252357 + }, + "miracl_retrieval": { + "ndcg@10": 0.6449511893902589 + }, + "mldr_retrieval": { + "ndcg@10": 0.2573147838464383 }, "mrtydi": { - "ndcg@10": 0.3837652120001251 + "ndcg@10": 0.42298287793585587 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.8355946539433561 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.8709767034225332 + "ndcg@10": 0.8447862631398672 }, "nlp_journal_title_abs": { - "ndcg@10": 0.9473129303429082 + "ndcg@10": 0.9461907998491789 }, "nlp_journal_title_intro": { - "ndcg@10": 0.7304538728893641 + "ndcg@10": 0.7469571396756213 } }, "STS": { "jsick": { - "spearman": 0.8128058660848744 + "spearman": 0.8125544166626103 }, "jsts": { - "spearman": 0.7839196475937381 + "spearman": 0.7965480195299134 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5502694126615243 + "v_measure_score": 0.5379041349111564 }, "mewsc16": { - "v_measure_score": 0.41494514000218946 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6226482073127441 + "v_measure_score": 0.4943772106331262 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.4713134178805946 } } } \ No newline at end of file diff --git a/docs/results/intfloat/multilingual-e5-large/summary.json b/docs/results/intfloat/multilingual-e5-large/summary.json index a28c470..40752a5 100644 --- a/docs/results/intfloat/multilingual-e5-large/summary.json +++ b/docs/results/intfloat/multilingual-e5-large/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.706580687830688 + "macro_f1": 0.6969861236021963 }, "amazon_review_classification": { - "macro_f1": 0.5653992303516462 + "macro_f1": 0.5763612743026115 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9554866923455646 }, "massive_intent_classification": { - "macro_f1": 0.7577710251429624 + "macro_f1": 0.7401244088033258 }, "massive_scenario_classification": { - "macro_f1": 0.8859090262583831 + "macro_f1": 0.887053685338159 + }, + "sib200_japanese_classification": { + "macro_f1": 0.7811476853348774 + }, + "wrime_classification": { + "macro_f1": 0.42377599926222737 } }, "Reranking": { "esci": { - "ndcg@10": 0.9296254722183955 + "ndcg@10": 0.9330712866652149 + }, + "jacwir_reranking": { + "ndcg@10": 0.9036816685131848 + }, + "jqara": { + "ndcg@10": 0.561374764136422 + }, + "miracl_reranking": { + "ndcg@10": 0.8631195198401651 + }, + "mldr_reranking": { + "ndcg@10": 0.8891328806594833 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8641271530674604 + }, "jagovfaqs_22k": { - "ndcg@10": 0.7030214336558751 + "ndcg@10": 0.7297746711291291 }, "jaqket": { - "ndcg@10": 0.5878065301444064 + "ndcg@10": 0.5967326588135612 + }, + "mintaka_retrieval": { + "ndcg@10": 0.3958992445664435 + }, + "miracl_retrieval": { + "ndcg@10": 0.7095604570396511 + }, + "mldr_retrieval": { + "ndcg@10": 0.2984972238105224 }, "mrtydi": { - "ndcg@10": 0.4363167873386172 + "ndcg@10": 0.4781603349494696 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.8326468852967057 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.8600225120389309 + "ndcg@10": 0.8571088737195884 }, "nlp_journal_title_abs": { - "ndcg@10": 0.9469712765040588 + "ndcg@10": 0.952870249874937 }, "nlp_journal_title_intro": { - "ndcg@10": 0.7248023877969718 + "ndcg@10": 0.7257268520360993 } }, "STS": { "jsick": { - "spearman": 0.7840335060728089 + "spearman": 0.7985423882395024 }, "jsts": { - "spearman": 0.8098724997856234 + "spearman": 0.8186303902222064 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5713023706914878 + "v_measure_score": 0.5157643001398088 }, "mewsc16": { - "v_measure_score": 0.4534484706354193 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.621496984746364 + "v_measure_score": 0.46806674695304834 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.5334765362912619 } } } \ No newline at end of file diff --git a/docs/results/intfloat/multilingual-e5-small/summary.json b/docs/results/intfloat/multilingual-e5-small/summary.json index 99a4423..5a3add1 100644 --- a/docs/results/intfloat/multilingual-e5-small/summary.json +++ b/docs/results/intfloat/multilingual-e5-small/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.6214130966524566 + "macro_f1": 0.5866005078388893 }, "amazon_review_classification": { - "macro_f1": 0.5127428912860463 + "macro_f1": 0.5120598395740691 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8773239262941632 }, "massive_intent_classification": { - "macro_f1": 0.7085230519111091 + "macro_f1": 0.7134377059258787 }, "massive_scenario_classification": { - "macro_f1": 0.8622036829599259 + "macro_f1": 0.8676947906742417 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8177503141758454 + }, + "wrime_classification": { + "macro_f1": 0.36913347435432137 } }, "Reranking": { "esci": { - "ndcg@10": 0.9303349187158247 + "ndcg@10": 0.9298402731760124 + }, + "jacwir_reranking": { + "ndcg@10": 0.8998812594907971 + }, + "jqara": { + "ndcg@10": 0.49280220404951935 + }, + "miracl_reranking": { + "ndcg@10": 0.8178461260193638 + }, + "mldr_reranking": { + "ndcg@10": 0.864145360860429 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8558160940470637 + }, "jagovfaqs_22k": { - "ndcg@10": 0.6411252958220891 + "ndcg@10": 0.6568760244912849 }, "jaqket": { - "ndcg@10": 0.49966509556428645 + "ndcg@10": 0.5157123960708363 + }, + "mintaka_retrieval": { + "ndcg@10": 0.3153737960263929 + }, + "miracl_retrieval": { + "ndcg@10": 0.6323300168472976 + }, + "mldr_retrieval": { + "ndcg@10": 0.2590832302769219 }, "mrtydi": { - "ndcg@10": 0.36054822913647616 + "ndcg@10": 0.4236692119753354 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.8396508926780583 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.8520749151982298 + "ndcg@10": 0.8409842458346825 }, "nlp_journal_title_abs": { - "ndcg@10": 0.9526123412781002 + "ndcg@10": 0.9447219194706624 }, "nlp_journal_title_intro": { - "ndcg@10": 0.729906931983999 + "ndcg@10": 0.7455737280382885 } }, "STS": { "jsick": { - "spearman": 0.8150271836013705 + "spearman": 0.8199946308873799 }, "jsts": { - "spearman": 0.786450077409501 + "spearman": 0.7892106647109823 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5470075389200084 + "v_measure_score": 0.5194355229712517 }, "mewsc16": { - "v_measure_score": 0.391226933590049 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6219382321618744 + "v_measure_score": 0.5233814767010047 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.43592128019411325 } } } \ No newline at end of file diff --git a/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json b/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json index 6b7309a..38e78b4 100644 --- a/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json +++ b/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7994675369288904 + "macro_f1": 0.7972419438068292 }, "amazon_review_classification": { - "macro_f1": 0.5748206591211895 + "macro_f1": 0.5802127224160758 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9199098092136551 }, "massive_intent_classification": { - "macro_f1": 0.8025949222725076 + "macro_f1": 0.8015558847211773 }, "massive_scenario_classification": { - "macro_f1": 0.8875250742566655 + "macro_f1": 0.8878291337617034 + }, + "sib200_japanese_classification": { + "macro_f1": 0.7731122315942124 + }, + "wrime_classification": { + "macro_f1": 0.4573111522822367 } }, "Reranking": { "esci": { - "ndcg@10": 0.9156331205981866 + "ndcg@10": 0.9151322326635167 + }, + "jacwir_reranking": { + "ndcg@10": 0.6745048816141938 + }, + "jqara": { + "ndcg@10": 0.36039102371287524 + }, + "miracl_reranking": { + "ndcg@10": 0.6867643099800397 + }, + "mldr_reranking": { + "ndcg@10": 0.8538476294446257 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.5964999187333498 + }, "jagovfaqs_22k": { - "ndcg@10": 0.519938655947725 + "ndcg@10": 0.5407367959715127 }, "jaqket": { - "ndcg@10": 0.4206746951743811 + "ndcg@10": 0.4021523812335328 + }, + "mintaka_retrieval": { + "ndcg@10": 0.2482827887837841 + }, + "miracl_retrieval": { + "ndcg@10": 0.17190013577864438 + }, + "mldr_retrieval": { + "ndcg@10": 0.19084474235068657 }, "mrtydi": { - "ndcg@10": 0.10116108109776817 + "ndcg@10": 0.10090455185771262 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.44067635335327865 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.4930421996747514 + "ndcg@10": 0.44837143094362086 }, "nlp_journal_title_abs": { - "ndcg@10": 0.719369187830078 + "ndcg@10": 0.7368252250653567 }, "nlp_journal_title_intro": { - "ndcg@10": 0.3258568875005778 + "ndcg@10": 0.3115238718909808 } }, "STS": { "jsick": { - "spearman": 0.7211422898060521 + "spearman": 0.7203759702575281 }, "jsts": { - "spearman": 0.8109305772255819 + "spearman": 0.8107670759374308 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.4677177349822789 + "v_measure_score": 0.5170361974340975 }, "mewsc16": { - "v_measure_score": 0.5389209739242912 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6237623762376237 + "v_measure_score": 0.5152481901891431 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.43034104597999767 } } } \ No newline at end of file diff --git a/docs/results/pfnet/plamo-embedding-1b/summary.json b/docs/results/pfnet/plamo-embedding-1b/summary.json new file mode 100644 index 0000000..bbd1ebe --- /dev/null +++ b/docs/results/pfnet/plamo-embedding-1b/summary.json @@ -0,0 +1,96 @@ +{ + "Classification": { + "amazon_counterfactual_classification": { + "macro_f1": 0.7758538459902731 + }, + "amazon_review_classification": { + "macro_f1": 0.5947995518406083 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9172503242542154 + }, + "massive_intent_classification": { + "macro_f1": 0.8278794713377423 + }, + "massive_scenario_classification": { + "macro_f1": 0.8994521566290758 + }, + "sib200_japanese_classification": { + "macro_f1": 0.9031045220702235 + }, + "wrime_classification": { + "macro_f1": 0.4920234056704329 + } + }, + "Reranking": { + "esci": { + "ndcg@10": 0.9358806147164782 + }, + "jacwir_reranking": { + "ndcg@10": 0.9174123687849153 + }, + "jqara": { + "ndcg@10": 0.6614745715723234 + }, + "miracl_reranking": { + "ndcg@10": 0.8191089804461983 + }, + "mldr_reranking": { + "ndcg@10": 0.9187107530127357 + } + }, + "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8891350347274469 + }, + "jagovfaqs_22k": { + "ndcg@10": 0.7902563114751548 + }, + "jaqket": { + "ndcg@10": 0.543879907336617 + }, + "mintaka_retrieval": { + "ndcg@10": 0.5455917771478032 + }, + "miracl_retrieval": { + "ndcg@10": 0.5991430810654191 + }, + "mldr_retrieval": { + "ndcg@10": 0.3668286739593277 + }, + "mrtydi": { + "ndcg@10": 0.4186565845821445 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.9765055597743824 + }, + "nlp_journal_abs_intro": { + "ndcg@10": 0.990219021795052 + }, + "nlp_journal_title_abs": { + "ndcg@10": 0.9862781050998647 + }, + "nlp_journal_title_intro": { + "ndcg@10": 0.9510769472900551 + } + }, + "STS": { + "jsick": { + "spearman": 0.81830804755845 + }, + "jsts": { + "spearman": 0.8446183418196836 + } + }, + "Clustering": { + "livedoor_news": { + "v_measure_score": 0.6173644704637056 + }, + "mewsc16": { + "v_measure_score": 0.4802637594283387 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.4773483587781526 + } + } +} \ No newline at end of file diff --git a/docs/results/pkshatech/GLuCoSE-base-ja-v2/summary.json b/docs/results/pkshatech/GLuCoSE-base-ja-v2/summary.json index 7318aab..6d1041e 100644 --- a/docs/results/pkshatech/GLuCoSE-base-ja-v2/summary.json +++ b/docs/results/pkshatech/GLuCoSE-base-ja-v2/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7492232749031491 + "macro_f1": 0.7528271196943096 }, "amazon_review_classification": { - "macro_f1": 0.5530707609927811 + "macro_f1": 0.5518771080100612 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.892368025976312 }, "massive_intent_classification": { - "macro_f1": 0.7979144461303402 + "macro_f1": 0.7872725195473699 }, "massive_scenario_classification": { - "macro_f1": 0.8683641924034757 + "macro_f1": 0.8713846348082936 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8583089323083904 + }, + "wrime_classification": { + "macro_f1": 0.4323129039345514 } }, "Reranking": { "esci": { - "ndcg@10": 0.9301469431250418 + "ndcg@10": 0.9301525338489429 + }, + "jacwir_reranking": { + "ndcg@10": 0.8827390816541736 + }, + "jqara": { + "ndcg@10": 0.6070225247152883 + }, + "miracl_reranking": { + "ndcg@10": 0.8243623644224994 + }, + "mldr_reranking": { + "ndcg@10": 0.887121388271364 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8385011452405416 + }, "jagovfaqs_22k": { - "ndcg@10": 0.6979374757372254 + "ndcg@10": 0.6984652569482365 }, "jaqket": { - "ndcg@10": 0.6729417850207029 + "ndcg@10": 0.6751948574643762 + }, + "mintaka_retrieval": { + "ndcg@10": 0.3957491894384977 + }, + "miracl_retrieval": { + "ndcg@10": 0.652881832622734 + }, + "mldr_retrieval": { + "ndcg@10": 0.3374776122444277 }, "mrtydi": { - "ndcg@10": 0.41858579533990486 + "ndcg@10": 0.4167021902708705 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.899055473429718 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.9029337913460675 + "ndcg@10": 0.9008045583912581 }, "nlp_journal_title_abs": { - "ndcg@10": 0.9511153967130517 + "ndcg@10": 0.9566816164352073 }, "nlp_journal_title_intro": { - "ndcg@10": 0.7580448576047344 + "ndcg@10": 0.757906107708436 } }, "STS": { "jsick": { - "spearman": 0.849637366944316 + "spearman": 0.8494858386977019 }, "jsts": { - "spearman": 0.8095684318108997 + "spearman": 0.8095670694135243 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5151536908540161 + "v_measure_score": 0.5446091559116468 }, "mewsc16": { - "v_measure_score": 0.45782610528001805 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.623716814159292 + "v_measure_score": 0.4611859858929692 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.43979504978761347 } } -} +} \ No newline at end of file diff --git a/docs/results/pkshatech/GLuCoSE-base-ja/summary.json b/docs/results/pkshatech/GLuCoSE-base-ja/summary.json index 9048691..5a50ab4 100644 --- a/docs/results/pkshatech/GLuCoSE-base-ja/summary.json +++ b/docs/results/pkshatech/GLuCoSE-base-ja/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.8243606275521169 + "macro_f1": 0.8203088346974938 }, "amazon_review_classification": { - "macro_f1": 0.580654308041878 + "macro_f1": 0.5793470941382456 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9289309593569228 }, "massive_intent_classification": { - "macro_f1": 0.7885427536904928 + "macro_f1": 0.7852003872158392 }, "massive_scenario_classification": { - "macro_f1": 0.8794225134482166 + "macro_f1": 0.8771105186592234 + }, + "sib200_japanese_classification": { + "macro_f1": 0.7723533533184818 + }, + "wrime_classification": { + "macro_f1": 0.48820317778534994 } }, "Reranking": { "esci": { - "ndcg@10": 0.9190289767663239 + "ndcg@10": 0.9182072351783757 + }, + "jacwir_reranking": { + "ndcg@10": 0.7453523153562407 + }, + "jqara": { + "ndcg@10": 0.30235678517238046 + }, + "miracl_reranking": { + "ndcg@10": 0.7782487998017047 + }, + "mldr_reranking": { + "ndcg@10": 0.8742431547482784 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.6929937892822252 + }, "jagovfaqs_22k": { - "ndcg@10": 0.6387979415478197 + "ndcg@10": 0.6414300605061649 }, "jaqket": { - "ndcg@10": 0.3981609655991592 + "ndcg@10": 0.39775627519142726 + }, + "mintaka_retrieval": { + "ndcg@10": 0.2981097485323552 + }, + "miracl_retrieval": { + "ndcg@10": 0.4826861479972318 + }, + "mldr_retrieval": { + "ndcg@10": 0.2507030467719784 }, "mrtydi": { - "ndcg@10": 0.30281316435910444 + "ndcg@10": 0.3013997193651328 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.7677861541704494 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.7825765249971093 + "ndcg@10": 0.7720777474520221 }, "nlp_journal_title_abs": { - "ndcg@10": 0.8206371528870603 + "ndcg@10": 0.8139955508348415 }, "nlp_journal_title_intro": { - "ndcg@10": 0.5982476164344701 + "ndcg@10": 0.5843440022515908 } }, "STS": { "jsick": { - "spearman": 0.7496711324072552 + "spearman": 0.7489963692364312 }, "jsts": { - "spearman": 0.824592262812859 + "spearman": 0.8246470658338377 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.49890886040948096 + "v_measure_score": 0.5040813114960272 }, "mewsc16": { - "v_measure_score": 0.49676862904881375 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.663883089770355 + "v_measure_score": 0.4952409837584659 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.41426282292221306 } } } \ No newline at end of file diff --git a/docs/results/pkshatech/RoSEtta-base-ja/summary.json b/docs/results/pkshatech/RoSEtta-base-ja/summary.json index d82af4b..7951ed1 100644 --- a/docs/results/pkshatech/RoSEtta-base-ja/summary.json +++ b/docs/results/pkshatech/RoSEtta-base-ja/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7005147244958231 + "macro_f1": 0.7021400751808275 }, "amazon_review_classification": { - "macro_f1": 0.5263680453119501 + "macro_f1": 0.5261693704750353 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8728387064627037 }, "massive_intent_classification": { - "macro_f1": 0.7983787583297884 + "macro_f1": 0.7958661089844552 }, "massive_scenario_classification": { - "macro_f1": 0.8709593192703351 + "macro_f1": 0.869642477269303 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8400507949086808 + }, + "wrime_classification": { + "macro_f1": 0.41243251223612126 } }, "Reranking": { "esci": { - "ndcg@10": 0.9268625513429571 + "ndcg@10": 0.9267709447988313 + }, + "jacwir_reranking": { + "ndcg@10": 0.8682926176464301 + }, + "jqara": { + "ndcg@10": 0.5792158527364997 + }, + "miracl_reranking": { + "ndcg@10": 0.8038275156892214 + }, + "mldr_reranking": { + "ndcg@10": 0.8844542290758788 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8201713015308671 + }, "jagovfaqs_22k": { - "ndcg@10": 0.6595934642903105 + "ndcg@10": 0.6627940635852495 }, "jaqket": { - "ndcg@10": 0.6533452086105761 + "ndcg@10": 0.642772517951208 + }, + "mintaka_retrieval": { + "ndcg@10": 0.3404237377925581 + }, + "miracl_retrieval": { + "ndcg@10": 0.6016261958696313 + }, + "mldr_retrieval": { + "ndcg@10": 0.3236631225997826 }, "mrtydi": { - "ndcg@10": 0.36731170141136216 + "ndcg@10": 0.36773428568023436 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.9604317247356383 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.9553567926226499 + "ndcg@10": 0.9541194598644321 }, "nlp_journal_title_abs": { - "ndcg@10": 0.940828991756893 + "ndcg@10": 0.931681815900694 }, "nlp_journal_title_intro": { - "ndcg@10": 0.8163161967769845 + "ndcg@10": 0.821937205258955 } }, "STS": { "jsick": { - "spearman": 0.8383455453168481 + "spearman": 0.8383423614590403 }, "jsts": { - "spearman": 0.7895388048564987 + "spearman": 0.7894639448529204 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5861760622672214 + "v_measure_score": 0.4888541691163841 }, "mewsc16": { - "v_measure_score": 0.4784844036038961 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6173974540311173 + "v_measure_score": 0.4515710456360326 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.4060764834036522 } } -} +} \ No newline at end of file diff --git a/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json b/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json index cc9f179..5bbd9f7 100644 --- a/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json +++ b/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.6748573563374541 + "macro_f1": 0.6827876647194675 }, "amazon_review_classification": { - "macro_f1": 0.5084883283463678 + "macro_f1": 0.5175208911836656 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8821403624230039 }, "massive_intent_classification": { - "macro_f1": 0.7967050091211104 + "macro_f1": 0.7964832948145142 }, "massive_scenario_classification": { - "macro_f1": 0.871999260591497 + "macro_f1": 0.8722583552883876 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8118131918956941 + }, + "wrime_classification": { + "macro_f1": 0.38393198133793865 } }, "Reranking": { "esci": { - "ndcg@10": 0.914930352019688 + "ndcg@10": 0.9127205853729194 + }, + "jacwir_reranking": { + "ndcg@10": 0.5745412347869042 + }, + "jqara": { + "ndcg@10": 0.31740297589991745 + }, + "miracl_reranking": { + "ndcg@10": 0.7212459481239325 + }, + "mldr_reranking": { + "ndcg@10": 0.8749859006713937 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.45027356866159485 + }, "jagovfaqs_22k": { - "ndcg@10": 0.41496851385134836 + "ndcg@10": 0.4100248722670852 }, "jaqket": { - "ndcg@10": 0.46003031782136106 + "ndcg@10": 0.37009937036200197 + }, + "mintaka_retrieval": { + "ndcg@10": 0.3129516236109114 + }, + "miracl_retrieval": { + "ndcg@10": 0.16066205698392905 + }, + "mldr_retrieval": { + "ndcg@10": 0.20077263817507693 }, "mrtydi": { - "ndcg@10": 0.1019130492122431 + "ndcg@10": 0.10152904724472846 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.3813451499418741 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.4014036990267884 + "ndcg@10": 0.3760245554186644 }, "nlp_journal_title_abs": { - "ndcg@10": 0.5962532652358485 + "ndcg@10": 0.5918422105100428 }, "nlp_journal_title_intro": { - "ndcg@10": 0.2452584471710635 + "ndcg@10": 0.25260061985270044 } }, "STS": { "jsick": { - "spearman": 0.7307715649457595 + "spearman": 0.7310527928257868 }, "jsts": { - "spearman": 0.8052279921326252 + "spearman": 0.8050903530724467 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.4476707933600858 + "v_measure_score": 0.491058629988371 }, "mewsc16": { - "v_measure_score": 0.5029508725037098 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6239830208701805 + "v_measure_score": 0.4702243143778868 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.5220924001787737 } } } \ No newline at end of file diff --git a/docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json b/docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json index 30385ec..d1a1183 100644 --- a/docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json +++ b/docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7910202863961814 + "macro_f1": 0.7966249319542733 }, "amazon_review_classification": { - "macro_f1": 0.614759364446128 + "macro_f1": 0.6202158443035662 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9503418215782169 }, "massive_intent_classification": { - "macro_f1": 0.8225880728874561 + "macro_f1": 0.8121127783146885 }, "massive_scenario_classification": { - "macro_f1": 0.9065030576701741 + "macro_f1": 0.9015618520645106 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8262549610016919 + }, + "wrime_classification": { + "macro_f1": 0.496952794347916 } }, "Reranking": { "esci": { - "ndcg@10": 0.9374394712541568 + "ndcg@10": 0.9359864365331227 + }, + "jacwir_reranking": { + "ndcg@10": 0.8684667204236405 + }, + "jqara": { + "ndcg@10": 0.6592446626934351 + }, + "miracl_reranking": { + "ndcg@10": 0.8516895656188278 + }, + "mldr_reranking": { + "ndcg@10": 0.9024168764200886 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8242898079860301 + }, "jagovfaqs_22k": { - "ndcg@10": 0.7168374490004555 + "ndcg@10": 0.7176236149918197 }, "jaqket": { - "ndcg@10": 0.7279485535689915 + "ndcg@10": 0.729199960117355 + }, + "mintaka_retrieval": { + "ndcg@10": 0.6260117718497401 + }, + "miracl_retrieval": { + "ndcg@10": 0.6323109932464099 + }, + "mldr_retrieval": { + "ndcg@10": 0.3458953565848906 }, "mrtydi": { - "ndcg@10": 0.41952210141116814 + "ndcg@10": 0.4075091710258615 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.9919931534803926 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.9394095717236127 + "ndcg@10": 0.9916030162169888 }, "nlp_journal_title_abs": { - "ndcg@10": 0.9695624263086593 + "ndcg@10": 0.968506421217649 }, "nlp_journal_title_intro": { - "ndcg@10": 0.8832876426024624 + "ndcg@10": 0.9629377323425067 } }, "STS": { "jsick": { - "spearman": 0.8022484725822061 + "spearman": 0.7979403746663343 }, "jsts": { - "spearman": 0.851980317221987 + "spearman": 0.8362521198880197 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.5641831341687762 + "v_measure_score": 0.5603187837880047 }, "mewsc16": { - "v_measure_score": 0.5129216698739159 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.62 + "v_measure_score": 0.5068875864473731 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.4418928761777483 } } } \ No newline at end of file diff --git a/docs/results/sbintuitions/sarashina-embedding-v2-1b/summary.json b/docs/results/sbintuitions/sarashina-embedding-v2-1b/summary.json new file mode 100644 index 0000000..86137b4 --- /dev/null +++ b/docs/results/sbintuitions/sarashina-embedding-v2-1b/summary.json @@ -0,0 +1,96 @@ +{ + "Classification": { + "amazon_counterfactual_classification": { + "macro_f1": 0.7981260149778604 + }, + "amazon_review_classification": { + "macro_f1": 0.613904230518876 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.9350720201784032 + }, + "massive_intent_classification": { + "macro_f1": 0.8368870408710274 + }, + "massive_scenario_classification": { + "macro_f1": 0.9023393778180459 + }, + "sib200_japanese_classification": { + "macro_f1": 0.814822834466633 + }, + "wrime_classification": { + "macro_f1": 0.49874416955622525 + } + }, + "Reranking": { + "esci": { + "ndcg@10": 0.9357698212029779 + }, + "jacwir_reranking": { + "ndcg@10": 0.8879290064759172 + }, + "jqara": { + "ndcg@10": 0.7055458565694387 + }, + "miracl_reranking": { + "ndcg@10": 0.8593120098725527 + }, + "mldr_reranking": { + "ndcg@10": 0.9252857993806471 + } + }, + "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.8553812052293157 + }, + "jagovfaqs_22k": { + "ndcg@10": 0.748733390366879 + }, + "jaqket": { + "ndcg@10": 0.7351759183476264 + }, + "mintaka_retrieval": { + "ndcg@10": 0.6610711832074698 + }, + "miracl_retrieval": { + "ndcg@10": 0.6825626228833273 + }, + "mldr_retrieval": { + "ndcg@10": 0.403522262945172 + }, + "mrtydi": { + "ndcg@10": 0.4956554219902846 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.9684244331815967 + }, + "nlp_journal_abs_intro": { + "ndcg@10": 0.9627838420424424 + }, + "nlp_journal_title_abs": { + "ndcg@10": 0.9810825575187433 + }, + "nlp_journal_title_intro": { + "ndcg@10": 0.9178887982974248 + } + }, + "STS": { + "jsick": { + "spearman": 0.8257994437715604 + }, + "jsts": { + "spearman": 0.8586626198858301 + } + }, + "Clustering": { + "livedoor_news": { + "v_measure_score": 0.5741299477926689 + }, + "mewsc16": { + "v_measure_score": 0.5167004748357505 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.48585227521060775 + } + } +} \ No newline at end of file diff --git a/docs/results/sentence-transformers/LaBSE/summary.json b/docs/results/sentence-transformers/LaBSE/summary.json index de8fd21..d4575ba 100644 --- a/docs/results/sentence-transformers/LaBSE/summary.json +++ b/docs/results/sentence-transformers/LaBSE/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7361214773958769 + "macro_f1": 0.7473900578785092 }, "amazon_review_classification": { - "macro_f1": 0.516957890685124 + "macro_f1": 0.5163381922398036 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8952055768957177 }, "massive_intent_classification": { - "macro_f1": 0.7698802987251081 + "macro_f1": 0.7708783013419095 }, "massive_scenario_classification": { - "macro_f1": 0.8835366493433755 + "macro_f1": 0.883882574111003 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8147469939175009 + }, + "wrime_classification": { + "macro_f1": 0.4010561963802254 } }, "Reranking": { "esci": { - "ndcg@10": 0.9162507647227857 + "ndcg@10": 0.9147393987384248 + }, + "jacwir_reranking": { + "ndcg@10": 0.6785244283016075 + }, + "jqara": { + "ndcg@10": 0.24624584903493016 + }, + "miracl_reranking": { + "ndcg@10": 0.692780512325045 + }, + "mldr_reranking": { + "ndcg@10": 0.818396899799895 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.49122610922285737 + }, "jagovfaqs_22k": { - "ndcg@10": 0.4310160105414995 + "ndcg@10": 0.4243154817699682 }, "jaqket": { - "ndcg@10": 0.34245849139132745 + "ndcg@10": 0.24919695742546066 + }, + "mintaka_retrieval": { + "ndcg@10": 0.20021150938693902 + }, + "miracl_retrieval": { + "ndcg@10": 0.09357313571231995 + }, + "mldr_retrieval": { + "ndcg@10": 0.07525879379433965 }, "mrtydi": { - "ndcg@10": 0.04238747941951049 + "ndcg@10": 0.04221321214455149 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.48063138821949475 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.48918127058907085 + "ndcg@10": 0.48202233374429526 }, "nlp_journal_title_abs": { - "ndcg@10": 0.7513086500303519 + "ndcg@10": 0.7559363652226313 }, "nlp_journal_title_intro": { - "ndcg@10": 0.35089108319096984 + "ndcg@10": 0.3553481928114969 } }, "STS": { "jsick": { - "spearman": 0.7698905918950973 + "spearman": 0.770087314840748 }, "jsts": { - "spearman": 0.7612337568248777 + "spearman": 0.7611615118281959 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.4829337123233023 + "v_measure_score": 0.4908336523752348 }, "mewsc16": { - "v_measure_score": 0.41471299546625956 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.623321554770318 + "v_measure_score": 0.41781835844551085 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.2859403214333406 } } } \ No newline at end of file diff --git a/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json b/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json index 12f71a2..4a59ed9 100644 --- a/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json +++ b/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json @@ -1,62 +1,96 @@ { "Classification": { "amazon_counterfactual_classification": { - "macro_f1": 0.7565022696601644 + "macro_f1": 0.7514299930187799 }, "amazon_review_classification": { - "macro_f1": 0.5131771609073525 + "macro_f1": 0.516712003417941 + }, + "japanese_sentiment_classification": { + "macro_f1": 0.8714537157100772 }, "massive_intent_classification": { - "macro_f1": 0.7427818411370812 + "macro_f1": 0.7433839585058197 }, "massive_scenario_classification": { - "macro_f1": 0.8609512679368835 + "macro_f1": 0.8606582397219589 + }, + "sib200_japanese_classification": { + "macro_f1": 0.8372998969612304 + }, + "wrime_classification": { + "macro_f1": 0.4167776597670575 } }, "Reranking": { "esci": { - "ndcg@10": 0.901984958764163 + "ndcg@10": 0.8971639400421929 + }, + "jacwir_reranking": { + "ndcg@10": 0.3920595575511347 + }, + "jqara": { + "ndcg@10": 0.18511169246774806 + }, + "miracl_reranking": { + "ndcg@10": 0.6535500060613615 + }, + "mldr_reranking": { + "ndcg@10": 0.768787823495723 } }, "Retrieval": { + "jacwir_retrieval": { + "ndcg@10": 0.21075313614845367 + }, "jagovfaqs_22k": { - "ndcg@10": 0.2511106863952595 + "ndcg@10": 0.2248606553485316 }, "jaqket": { - "ndcg@10": 0.21606007987072834 + "ndcg@10": 0.06494577519372931 + }, + "mintaka_retrieval": { + "ndcg@10": 0.22312923127278733 + }, + "miracl_retrieval": { + "ndcg@10": 0.022833015048992402 + }, + "mldr_retrieval": { + "ndcg@10": 0.06529330431356167 }, "mrtydi": { - "ndcg@10": 0.027590779174942116 + "ndcg@10": 0.027849411947159904 + }, + "nlp_journal_abs_article": { + "ndcg@10": 0.24914118502751986 }, "nlp_journal_abs_intro": { - "ndcg@10": 0.2848558252647936 + "ndcg@10": 0.2554860092306942 }, "nlp_journal_title_abs": { - "ndcg@10": 0.3646520309406354 + "ndcg@10": 0.35835508156998896 }, "nlp_journal_title_intro": { - "ndcg@10": 0.11545016260271045 + "ndcg@10": 0.12133118349638791 } }, "STS": { "jsick": { - "spearman": 0.7236409557069434 + "spearman": 0.7238085290735078 }, "jsts": { - "spearman": 0.7843597058304203 + "spearman": 0.784483411606707 } }, "Clustering": { "livedoor_news": { - "v_measure_score": 0.24487129939212224 + "v_measure_score": 0.26615937330682315 }, "mewsc16": { - "v_measure_score": 0.304278393205056 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.6219686162624821 + "v_measure_score": 0.32048277963560623 + }, + "sib200_japanese_clustering": { + "v_measure_score": 0.2434250739162938 } } } \ No newline at end of file diff --git a/leaderboard.md b/leaderboard.md index dd64309..1b83092 100644 --- a/leaderboard.md +++ b/leaderboard.md @@ -5,233 +5,266 @@ This leaderboard shows the results stored under `docs/results`. The scores are a The summary shows the average scores within each task. The average score is the average of scores by dataset. -| Model | Avg. | Retrieval | STS | Classification | Reranking | Clustering | PairClassification | -|:----------------------------------------------|:----------|:------------|:----------|:-----------------|:------------|:-------------|:---------------------| -| sbintuitions/sarashina-embedding-v1-1b | **75.50** | **77.61** | 82.71 | **78.37** | **93.74** | 53.86 | 62.00 | -| OpenAI/text-embedding-3-large | 74.05 | 74.48 | 82.52 | 77.58 | 93.58 | 53.32 | 62.35 | -| jinaai/jina-embeddings-v3 | 73.44 | 75.22 | 80.05 | 76.39 | 92.71 | 51.46 | 62.37 | -| cl-nagoya/ruri-large | 73.31 | 73.02 | 83.13 | 77.43 | 92.99 | 51.82 | 62.29 | -| pkshatech/GLuCoSE-base-ja-v2 | 72.23 | 73.36 | 82.96 | 74.21 | 93.01 | 48.65 | 62.37 | -| pkshatech/RoSEtta-base-ja | 72.04 | 73.21 | 81.39 | 72.41 | 92.69 | 53.23 | 61.74 | -| cl-nagoya/ruri-base | 71.91 | 69.82 | 82.87 | 75.58 | 92.91 | **54.16** | 62.38 | -| cl-nagoya/ruri-small | 71.53 | 69.41 | 82.79 | 76.22 | 93.00 | 51.19 | 62.11 | -| intfloat/multilingual-e5-large | 70.90 | 70.98 | 79.70 | 72.89 | 92.96 | 51.24 | 62.15 | -| OpenAI/text-embedding-3-small | 69.18 | 66.39 | 79.46 | 73.06 | 92.92 | 51.06 | 62.27 | -| intfloat/multilingual-e5-base | 68.61 | 68.21 | 79.84 | 69.30 | 92.85 | 48.26 | 62.26 | -| intfloat/multilingual-e5-small | 67.71 | 67.27 | 80.07 | 67.62 | 93.03 | 46.91 | 62.19 | -| pkshatech/GLuCoSE-base-ja | 67.29 | 59.02 | 78.71 | 76.82 | 91.90 | 49.78 | **66.39** | -| OpenAI/text-embedding-ada-002 | 67.21 | 64.38 | 79.02 | 69.75 | 93.04 | 48.30 | 62.40 | -| cl-nagoya/sup-simcse-ja-base | 63.36 | 49.64 | 82.05 | 73.47 | 91.83 | 51.79 | 62.57 | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 61.55 | 47.38 | 78.99 | 73.13 | 91.30 | 48.25 | 62.27 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 60.83 | 46.36 | 77.49 | 73.30 | 91.16 | 46.68 | 62.38 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 60.77 | 43.00 | 76.60 | 76.61 | 91.56 | 50.33 | 62.38 | -| cl-nagoya/unsup-simcse-ja-large | 59.58 | 40.53 | 80.56 | 74.66 | 90.95 | 48.41 | 62.49 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 59.03 | 42.59 | 77.05 | 72.90 | 91.01 | 44.95 | 62.33 | -| cl-nagoya/sup-simcse-ja-large | 58.88 | 37.62 | **83.18** | 73.73 | 91.48 | 50.56 | 62.51 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 58.77 | 40.82 | 78.28 | 73.47 | 90.95 | 45.81 | 62.35 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 58.66 | 41.79 | 75.50 | 73.77 | 90.95 | 44.22 | 62.38 | -| cl-nagoya/unsup-simcse-ja-base | 58.39 | 40.23 | 78.72 | 73.07 | 91.16 | 44.77 | 62.44 | -| sentence-transformers/LaBSE | 58.01 | 40.12 | 76.56 | 72.66 | 91.63 | 44.88 | 62.33 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 57.97 | 41.32 | 74.66 | 72.76 | 90.66 | 43.11 | 62.37 | -| pkshatech/simcse-ja-bert-base-clcmlp | 56.86 | 37.00 | 76.80 | 71.30 | 91.49 | 47.53 | 62.40 | -| sentence-transformers/stsb-xlm-r-multilingual | 48.21 | 21.00 | 75.40 | 71.84 | 90.20 | 27.46 | 62.20 | -| colorfulscoop/sbert-base-ja | 47.38 | 16.52 | 70.42 | 69.07 | 89.97 | 44.81 | 62.31 | +| Model | Avg. | Retrieval | STS | Classification | Reranking | Clustering | +|:----------------------------------------------|:---------:|:-----------:|:---------:|:----------------:|:-----------:|:------------:| +| sbintuitions/sarashina-embedding-v2-1b | **76.38** | **76.48** | **84.22** | 77.14 | **86.28** | 52.56 | +| cl-nagoya/ruri-v3-310m | 75.85 | 76.03 | 81.59 | **77.65** | 85.84 | 50.52 | +| cl-nagoya/ruri-v3-130m | 75.52 | 76.45 | 81.05 | 75.65 | 85.71 | 51.13 | +| sbintuitions/sarashina-embedding-v1-1b | 74.87 | 74.53 | 81.71 | 77.20 | 84.36 | 50.30 | +| pfnet/plamo-embedding-1b | 74.85 | 73.25 | 83.15 | 77.29 | 85.05 | 52.50 | +| cl-nagoya/ruri-v3-70m | 73.95 | 74.23 | 80.96 | 74.45 | 84.21 | 49.95 | +| OpenAI/text-embedding-3-large | 73.86 | 71.95 | 82.52 | 77.27 | 83.06 | 51.82 | +| cl-nagoya/ruri-large-v2 | 73.63 | 71.87 | 83.18 | 76.10 | 83.89 | 50.88 | +| cl-nagoya/ruri-v3-30m | 72.95 | 72.84 | 81.78 | 73.35 | 82.93 | 49.90 | +| BAAI/bge-m3 | 72.46 | 72.15 | 79.74 | 74.10 | 84.10 | 45.56 | +| cl-nagoya/ruri-large | 71.69 | 68.30 | 83.13 | 76.25 | 81.26 | 49.93 | +| cl-nagoya/ruri-base-v2 | 71.66 | 68.96 | 83.03 | 75.59 | 82.46 | 46.84 | +| cl-nagoya/ruri-small-v2 | 71.40 | 68.46 | 82.91 | 74.12 | 82.30 | 49.97 | +| pkshatech/GLuCoSE-base-ja-v2 | 71.11 | 68.45 | 82.95 | 73.52 | 82.63 | 48.19 | +| intfloat/multilingual-e5-large | 70.67 | 67.65 | 80.86 | 72.30 | 83.01 | 50.58 | +| google/embeddinggemma-300m | 70.59 | 65.91 | 82.74 | 76.14 | 80.93 | 49.48 | +| cl-nagoya/ruri-base | 70.25 | 65.90 | 82.88 | 75.34 | 80.31 | 49.10 | +| pkshatech/RoSEtta-base-ja | 69.58 | 67.52 | 81.39 | 71.70 | 81.25 | 44.88 | +| cl-nagoya/ruri-small | 69.34 | 63.95 | 82.79 | 74.83 | 79.98 | 49.59 | +| intfloat/multilingual-e5-base | 68.06 | 64.48 | 80.46 | 69.70 | 79.46 | 50.12 | +| intfloat/multilingual-e5-small | 67.38 | 63.91 | 80.46 | 67.77 | 80.09 | 49.29 | +| OpenAI/text-embedding-3-small | 67.10 | 61.79 | 79.46 | 72.43 | 77.29 | 48.91 | +| OpenAI/text-embedding-ada-002 | 65.13 | 59.58 | 79.02 | 69.39 | 75.63 | 48.78 | +| hotchpotch/static-embedding-japanese | 63.80 | 60.51 | 80.16 | 66.73 | 77.09 | 35.91 | +| pkshatech/GLuCoSE-base-ja | 63.79 | 54.58 | 78.68 | 75.02 | 72.37 | 47.12 | +| cl-nagoya/sup-simcse-ja-base | 59.91 | 45.00 | 82.05 | 72.72 | 70.36 | **52.57** | +| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 57.60 | 42.41 | 79.00 | 71.83 | 71.88 | 42.02 | +| oshizo/sbert-jsnli-luke-japanese-base-lite | 56.75 | 38.08 | 76.56 | 74.53 | 69.81 | 48.75 | +| cl-nagoya/sup-simcse-ja-large | 56.46 | 37.38 | 83.17 | 72.74 | 68.76 | 50.12 | +| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 55.78 | 39.85 | 77.96 | 71.46 | 69.92 | 39.27 | +| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 55.35 | 36.23 | 78.29 | 72.59 | 70.59 | 44.54 | +| MU-Kindai/Japanese-MixCSE-BERT-base | 54.65 | 36.24 | 77.75 | 71.81 | 68.58 | 43.45 | +| cl-nagoya/unsup-simcse-ja-large | 54.23 | 33.98 | 80.56 | 73.71 | 67.39 | 43.52 | +| cl-nagoya/unsup-simcse-ja-base | 53.86 | 35.34 | 78.74 | 72.41 | 66.20 | 41.29 | +| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 53.82 | 35.22 | 74.96 | 71.48 | 68.15 | 42.86 | +| MU-Kindai/Japanese-DiffCSE-BERT-base | 53.59 | 34.93 | 76.70 | 72.06 | 67.73 | 39.93 | +| pkshatech/simcse-ja-bert-base-clcmlp | 53.48 | 32.80 | 76.81 | 70.67 | 68.02 | 49.45 | +| sentence-transformers/LaBSE | 52.70 | 33.18 | 76.56 | 71.85 | 67.01 | 39.82 | +| sentence-transformers/stsb-xlm-r-multilingual | 43.06 | 16.58 | 75.41 | 71.40 | 57.93 | 27.67 | +| colorfulscoop/sbert-base-ja | 42.90 | 15.45 | 70.41 | 68.05 | 59.38 | 39.04 | ## Retrieval -| Model | Avg. | jagovfaqs_22k
(ndcg@10) | jaqket
(ndcg@10) | mrtydi
(ndcg@10) | nlp_journal_abs_intro
(ndcg@10) | nlp_journal_title_abs
(ndcg@10) | nlp_journal_title_intro
(ndcg@10) | -|:----------------------------------------------|:----------|:-----------------------------|:----------------------|:----------------------|:-------------------------------------|:-------------------------------------|:---------------------------------------| -| sbintuitions/sarashina-embedding-v1-1b | **77.61** | 71.68 | **72.79** | 41.95 | 93.94 | 96.96 | 88.33 | -| jinaai/jina-embeddings-v3 | 75.22 | 71.50 | 46.48 | **45.45** | 98.43 | 95.62 | 93.85 | -| OpenAI/text-embedding-3-large | 74.48 | 72.41 | 48.21 | 34.88 | **99.33** | 96.55 | **95.47** | -| pkshatech/GLuCoSE-base-ja-v2 | 73.36 | 69.79 | 67.29 | 41.86 | 90.29 | 95.11 | 75.80 | -| pkshatech/RoSEtta-base-ja | 73.21 | 65.96 | 65.33 | 36.73 | 95.54 | 94.08 | 81.63 | -| cl-nagoya/ruri-large | 73.02 | **76.68** | 61.74 | 38.03 | 87.12 | 96.58 | 77.97 | -| intfloat/multilingual-e5-large | 70.98 | 70.30 | 58.78 | 43.63 | 86.00 | 94.70 | 72.48 | -| cl-nagoya/ruri-base | 69.82 | 74.56 | 50.12 | 35.45 | 86.89 | 96.57 | 75.31 | -| cl-nagoya/ruri-small | 69.41 | 73.65 | 48.44 | 33.43 | 87.69 | **97.17** | 76.09 | -| intfloat/multilingual-e5-base | 68.21 | 65.34 | 50.67 | 38.38 | 87.10 | 94.73 | 73.05 | -| intfloat/multilingual-e5-small | 67.27 | 64.11 | 49.97 | 36.05 | 85.21 | 95.26 | 72.99 | -| OpenAI/text-embedding-3-small | 66.39 | 64.02 | 33.94 | 20.03 | 98.47 | 91.70 | 90.17 | -| OpenAI/text-embedding-ada-002 | 64.38 | 61.02 | 42.56 | 14.51 | 94.99 | 91.23 | 81.98 | -| pkshatech/GLuCoSE-base-ja | 59.02 | 63.88 | 39.82 | 30.28 | 78.26 | 82.06 | 59.82 | -| cl-nagoya/sup-simcse-ja-base | 49.64 | 51.62 | 50.25 | 13.98 | 68.08 | 65.71 | 48.22 | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 47.38 | 50.14 | 45.84 | 13.00 | 55.09 | 74.97 | 45.24 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 46.36 | 47.39 | 39.57 | 11.44 | 64.16 | 70.23 | 45.37 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 43.00 | 51.99 | 42.07 | 10.12 | 49.30 | 71.94 | 32.59 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 42.59 | 42.37 | 37.72 | 7.88 | 63.70 | 64.13 | 39.73 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 41.79 | 42.31 | 36.20 | 7.81 | 60.77 | 64.34 | 39.32 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 41.32 | 44.11 | 39.61 | 8.15 | 62.76 | 58.39 | 34.89 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 40.82 | 47.04 | 44.10 | 11.43 | 43.43 | 62.41 | 36.52 | -| cl-nagoya/unsup-simcse-ja-large | 40.53 | 45.09 | 34.60 | 5.75 | 55.07 | 63.07 | 39.61 | -| cl-nagoya/unsup-simcse-ja-base | 40.23 | 46.00 | 39.46 | 5.55 | 60.26 | 55.63 | 34.49 | -| sentence-transformers/LaBSE | 40.12 | 43.10 | 34.25 | 4.24 | 48.92 | 75.13 | 35.09 | -| cl-nagoya/sup-simcse-ja-large | 37.62 | 46.84 | 39.88 | 11.83 | 63.43 | 37.93 | 25.79 | -| pkshatech/simcse-ja-bert-base-clcmlp | 37.00 | 41.50 | 46.00 | 10.19 | 40.14 | 59.63 | 24.53 | -| sentence-transformers/stsb-xlm-r-multilingual | 21.00 | 25.11 | 21.61 | 2.76 | 28.49 | 36.47 | 11.55 | -| colorfulscoop/sbert-base-ja | 16.52 | 21.50 | 13.16 | 0.44 | 28.78 | 22.40 | 12.82 | +| Model | Avg. | jacwir_retrieval
(ndcg@10) | jagovfaqs_22k
(ndcg@10) | jaqket
(ndcg@10) | mintaka_retrieval
(ndcg@10) | miracl_retrieval
(ndcg@10) | mldr_retrieval
(ndcg@10) | mrtydi
(ndcg@10) | nlp_abs_article
(ndcg@10) | nlp_abs_intro
(ndcg@10) | nlp_title_abs
(ndcg@10) | nlp_title_intro
(ndcg@10) | +|:----------------------------------------------|:---------:|:-------------------------------:|:----------------------------:|:---------------------:|:--------------------------------:|:-------------------------------:|:-----------------------------:|:---------------------:|:------------------------------:|:----------------------------:|:----------------------------:|:------------------------------:| +| sbintuitions/sarashina-embedding-v2-1b | **76.48** | 85.54 | 74.87 | **73.52** | **66.11** | 68.26 | 40.35 | **49.57** | 96.84 | 96.28 | 98.11 | 91.79 | +| cl-nagoya/ruri-v3-130m | 76.45 | 84.21 | 75.32 | 73.10 | 51.77 | 71.01 | 45.16 | 47.80 | 99.51 | 98.88 | 97.95 | 96.28 | +| cl-nagoya/ruri-v3-310m | 76.03 | 84.06 | 76.49 | 71.87 | 52.25 | 67.71 | 43.43 | 47.06 | **99.59** | **99.35** | 97.91 | **96.58** | +| sbintuitions/sarashina-embedding-v1-1b | 74.53 | 82.43 | 71.76 | 72.92 | 62.60 | 63.23 | 34.59 | 40.75 | 99.20 | 99.16 | 96.85 | 96.29 | +| cl-nagoya/ruri-v3-70m | 74.23 | 82.76 | 73.27 | 67.68 | 46.26 | 67.98 | 43.55 | 45.00 | 98.50 | 98.68 | 97.07 | 95.73 | +| pfnet/plamo-embedding-1b | 73.25 | **88.91** | **79.03** | 54.39 | 54.56 | 59.91 | 36.68 | 41.87 | 97.65 | 99.02 | **98.63** | 95.11 | +| cl-nagoya/ruri-v3-30m | 72.84 | 82.70 | 70.21 | 62.45 | 43.05 | 64.99 | 45.77 | 41.78 | 98.76 | 99.16 | 96.99 | 95.34 | +| BAAI/bge-m3 | 72.15 | 85.13 | 69.07 | 56.59 | 32.18 | **73.48** | **51.26** | 45.18 | 95.22 | 97.53 | 96.02 | 91.98 | +| OpenAI/text-embedding-3-large | 71.95 | 82.90 | 72.41 | 48.21 | 63.52 | 60.57 | 45.26 | 34.88 | 92.37 | 99.33 | 96.55 | 95.47 | +| cl-nagoya/ruri-large-v2 | 71.87 | 80.49 | 78.23 | 65.61 | 50.41 | 70.46 | 36.97 | 46.37 | 90.85 | 91.15 | 97.74 | 82.32 | +| cl-nagoya/ruri-base-v2 | 68.96 | 81.01 | 75.90 | 57.01 | 44.18 | 68.22 | 37.73 | 40.89 | 88.05 | 89.73 | 96.96 | 78.93 | +| cl-nagoya/ruri-small-v2 | 68.46 | 83.04 | 74.02 | 62.25 | 35.31 | 66.90 | 32.58 | 42.40 | 90.65 | 90.42 | 97.30 | 78.21 | +| pkshatech/GLuCoSE-base-ja-v2 | 68.45 | 83.85 | 69.85 | 67.52 | 39.57 | 65.29 | 33.75 | 41.67 | 89.91 | 90.08 | 95.67 | 75.79 | +| cl-nagoya/ruri-large | 68.30 | 81.69 | 77.64 | 61.73 | 51.06 | 55.47 | 34.77 | 38.12 | 86.53 | 88.91 | 96.17 | 79.22 | +| intfloat/multilingual-e5-large | 67.65 | 86.41 | 72.98 | 59.67 | 39.59 | 70.96 | 29.85 | 47.82 | 83.26 | 85.71 | 95.29 | 72.57 | +| pkshatech/RoSEtta-base-ja | 67.52 | 82.02 | 66.28 | 64.28 | 34.04 | 60.16 | 32.37 | 36.77 | 96.04 | 95.41 | 93.17 | 82.19 | +| google/embeddinggemma-300m | 65.91 | 81.07 | 69.43 | 63.27 | 38.63 | 35.28 | 34.66 | 13.86 | 99.34 | 99.02 | 96.12 | 94.35 | +| cl-nagoya/ruri-base | 65.90 | 82.48 | 75.50 | 50.23 | 45.37 | 54.88 | 35.42 | 35.59 | 86.65 | 87.23 | 95.27 | 76.25 | +| intfloat/multilingual-e5-base | 64.48 | 84.32 | 68.72 | 51.69 | 34.68 | 64.50 | 25.73 | 42.30 | 83.56 | 84.48 | 94.62 | 74.70 | +| cl-nagoya/ruri-small | 63.95 | 82.58 | 74.01 | 48.44 | 37.23 | 52.22 | 28.99 | 33.51 | 86.89 | 87.23 | 96.20 | 76.09 | +| intfloat/multilingual-e5-small | 63.91 | 85.58 | 65.69 | 51.57 | 31.54 | 63.23 | 25.91 | 42.37 | 83.97 | 84.10 | 94.47 | 74.56 | +| OpenAI/text-embedding-3-small | 61.79 | 79.58 | 64.02 | 33.94 | 32.44 | 48.45 | 35.07 | 20.03 | 85.83 | 98.47 | 91.70 | 90.17 | +| hotchpotch/static-embedding-japanese | 60.51 | 72.27 | 55.55 | 64.04 | 38.93 | 32.61 | 42.51 | 11.18 | 76.19 | 95.74 | 90.37 | 86.25 | +| OpenAI/text-embedding-ada-002 | 59.58 | 78.08 | 61.02 | 42.56 | 27.09 | 34.54 | 31.90 | 14.51 | 97.51 | 94.99 | 91.23 | 81.98 | +| pkshatech/GLuCoSE-base-ja | 54.58 | 69.30 | 64.14 | 39.78 | 29.81 | 48.27 | 25.07 | 30.14 | 76.78 | 77.21 | 81.40 | 58.43 | +| cl-nagoya/sup-simcse-ja-base | 45.00 | 53.32 | 52.02 | 50.13 | 32.88 | 20.68 | 24.70 | 14.14 | 69.09 | 66.19 | 64.84 | 46.97 | +| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 42.41 | 53.16 | 51.20 | 45.81 | 30.42 | 26.08 | 23.65 | 13.06 | 54.65 | 52.13 | 74.13 | 42.21 | +| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 39.85 | 48.95 | 48.41 | 38.73 | 25.72 | 21.60 | 18.11 | 11.02 | 58.91 | 60.05 | 69.15 | 37.72 | +| oshizo/sbert-jsnli-luke-japanese-base-lite | 38.08 | 59.65 | 54.07 | 40.22 | 24.83 | 17.19 | 19.08 | 10.09 | 44.07 | 44.84 | 73.68 | 31.15 | +| cl-nagoya/sup-simcse-ja-large | 37.38 | 43.71 | 47.42 | 40.04 | 37.68 | 18.13 | 23.48 | 11.88 | 64.08 | 62.95 | 36.95 | 24.90 | +| MU-Kindai/Japanese-MixCSE-BERT-base | 36.24 | 42.43 | 43.60 | 37.35 | 25.18 | 14.76 | 16.86 | 7.77 | 56.89 | 59.11 | 61.81 | 32.88 | +| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 36.23 | 46.28 | 48.25 | 44.17 | 28.89 | 19.52 | 18.66 | 11.44 | 43.97 | 40.33 | 60.49 | 36.51 | +| cl-nagoya/unsup-simcse-ja-base | 35.34 | 35.11 | 46.74 | 39.52 | 29.92 | 10.93 | 15.98 | 5.51 | 58.22 | 58.41 | 55.58 | 32.84 | +| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 35.22 | 39.92 | 44.60 | 38.45 | 22.39 | 13.94 | 13.91 | 7.30 | 58.35 | 58.63 | 57.43 | 32.47 | +| MU-Kindai/Japanese-DiffCSE-BERT-base | 34.93 | 40.86 | 43.88 | 35.56 | 19.98 | 16.52 | 12.06 | 7.11 | 54.30 | 55.86 | 62.96 | 35.17 | +| cl-nagoya/unsup-simcse-ja-large | 33.98 | 37.61 | 46.56 | 34.53 | 30.58 | 10.33 | 12.55 | 5.75 | 50.45 | 50.70 | 60.43 | 34.32 | +| sentence-transformers/LaBSE | 33.18 | 49.12 | 42.43 | 24.92 | 20.02 | 9.36 | 7.53 | 4.22 | 48.06 | 48.20 | 75.59 | 35.53 | +| pkshatech/simcse-ja-bert-base-clcmlp | 32.80 | 45.03 | 41.00 | 37.01 | 31.30 | 16.07 | 20.08 | 10.15 | 38.13 | 37.60 | 59.18 | 25.26 | +| sentence-transformers/stsb-xlm-r-multilingual | 16.58 | 21.08 | 22.49 | 6.49 | 22.31 | 2.28 | 6.53 | 2.78 | 24.91 | 25.55 | 35.84 | 12.13 | +| colorfulscoop/sbert-base-ja | 15.45 | 19.30 | 21.70 | 13.14 | 19.07 | 1.86 | 6.97 | 0.41 | 29.02 | 25.80 | 21.07 | 11.57 | ## STS -| Model | Avg. | jsick
(spearman) | jsts
(spearman) | -|:----------------------------------------------|:----------|:----------------------|:---------------------| -| cl-nagoya/sup-simcse-ja-large | **83.18** | 83.80 | 82.57 | -| cl-nagoya/ruri-large | 83.13 | 82.00 | 84.26 | -| pkshatech/GLuCoSE-base-ja-v2 | 82.96 | **84.96** | 80.96 | -| cl-nagoya/ruri-base | 82.87 | 82.32 | 83.43 | -| cl-nagoya/ruri-small | 82.79 | 83.44 | 82.13 | -| sbintuitions/sarashina-embedding-v1-1b | 82.71 | 80.22 | **85.20** | -| OpenAI/text-embedding-3-large | 82.52 | 81.27 | 83.77 | -| cl-nagoya/sup-simcse-ja-base | 82.05 | 82.83 | 81.27 | -| pkshatech/RoSEtta-base-ja | 81.39 | 83.83 | 78.95 | -| cl-nagoya/unsup-simcse-ja-large | 80.56 | 80.15 | 80.98 | -| intfloat/multilingual-e5-small | 80.07 | 81.50 | 78.65 | -| jinaai/jina-embeddings-v3 | 80.05 | 78.16 | 81.93 | -| intfloat/multilingual-e5-base | 79.84 | 81.28 | 78.39 | -| intfloat/multilingual-e5-large | 79.70 | 78.40 | 80.99 | -| OpenAI/text-embedding-3-small | 79.46 | 80.83 | 78.08 | -| OpenAI/text-embedding-ada-002 | 79.02 | 79.09 | 78.94 | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 78.99 | 79.84 | 78.14 | -| cl-nagoya/unsup-simcse-ja-base | 78.72 | 78.49 | 78.95 | -| pkshatech/GLuCoSE-base-ja | 78.71 | 74.97 | 82.46 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 78.28 | 78.75 | 77.81 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 77.49 | 78.18 | 76.81 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 77.05 | 77.57 | 76.53 | -| pkshatech/simcse-ja-bert-base-clcmlp | 76.80 | 73.08 | 80.52 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 76.60 | 72.11 | 81.09 | -| sentence-transformers/LaBSE | 76.56 | 76.99 | 76.12 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 75.50 | 75.42 | 75.58 | -| sentence-transformers/stsb-xlm-r-multilingual | 75.40 | 72.36 | 78.44 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 74.66 | 74.64 | 74.68 | -| colorfulscoop/sbert-base-ja | 70.42 | 66.59 | 74.24 | +| Model | Avg. | jsick
(spearman) | jsts
(spearman) | +|:----------------------------------------------|:---------:|:---------------------:|:--------------------:| +| sbintuitions/sarashina-embedding-v2-1b | **84.22** | 82.58 | **85.87** | +| cl-nagoya/ruri-large-v2 | 83.18 | 82.12 | 84.24 | +| cl-nagoya/sup-simcse-ja-large | 83.17 | 83.78 | 82.56 | +| pfnet/plamo-embedding-1b | 83.15 | 81.83 | 84.46 | +| cl-nagoya/ruri-large | 83.13 | 82.00 | 84.26 | +| cl-nagoya/ruri-base-v2 | 83.03 | 82.63 | 83.43 | +| pkshatech/GLuCoSE-base-ja-v2 | 82.95 | **84.95** | 80.96 | +| cl-nagoya/ruri-small-v2 | 82.91 | 83.88 | 81.93 | +| cl-nagoya/ruri-base | 82.88 | 82.32 | 83.43 | +| cl-nagoya/ruri-small | 82.79 | 83.45 | 82.13 | +| google/embeddinggemma-300m | 82.74 | 81.67 | 83.81 | +| OpenAI/text-embedding-3-large | 82.52 | 81.27 | 83.77 | +| cl-nagoya/sup-simcse-ja-base | 82.05 | 82.84 | 81.26 | +| cl-nagoya/ruri-v3-30m | 81.78 | 81.62 | 81.95 | +| sbintuitions/sarashina-embedding-v1-1b | 81.71 | 79.79 | 83.63 | +| cl-nagoya/ruri-v3-310m | 81.59 | 78.86 | 84.31 | +| pkshatech/RoSEtta-base-ja | 81.39 | 83.83 | 78.95 | +| cl-nagoya/ruri-v3-130m | 81.05 | 78.86 | 83.24 | +| cl-nagoya/ruri-v3-70m | 80.96 | 79.10 | 82.82 | +| intfloat/multilingual-e5-large | 80.86 | 79.85 | 81.86 | +| cl-nagoya/unsup-simcse-ja-large | 80.56 | 80.14 | 80.98 | +| intfloat/multilingual-e5-small | 80.46 | 82.00 | 78.92 | +| intfloat/multilingual-e5-base | 80.46 | 81.26 | 79.65 | +| hotchpotch/static-embedding-japanese | 80.16 | 82.51 | 77.81 | +| BAAI/bge-m3 | 79.74 | 79.27 | 80.21 | +| OpenAI/text-embedding-3-small | 79.46 | 80.83 | 78.08 | +| OpenAI/text-embedding-ada-002 | 79.02 | 79.09 | 78.94 | +| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 79.00 | 79.86 | 78.14 | +| cl-nagoya/unsup-simcse-ja-base | 78.74 | 78.53 | 78.94 | +| pkshatech/GLuCoSE-base-ja | 78.68 | 74.90 | 82.46 | +| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 78.29 | 78.76 | 77.82 | +| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 77.96 | 79.14 | 76.77 | +| MU-Kindai/Japanese-MixCSE-BERT-base | 77.75 | 78.93 | 76.57 | +| pkshatech/simcse-ja-bert-base-clcmlp | 76.81 | 73.11 | 80.51 | +| MU-Kindai/Japanese-DiffCSE-BERT-base | 76.70 | 77.76 | 75.63 | +| sentence-transformers/LaBSE | 76.56 | 77.01 | 76.12 | +| oshizo/sbert-jsnli-luke-japanese-base-lite | 76.56 | 72.04 | 81.08 | +| sentence-transformers/stsb-xlm-r-multilingual | 75.41 | 72.38 | 78.45 | +| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 74.96 | 75.25 | 74.66 | +| colorfulscoop/sbert-base-ja | 70.41 | 66.56 | 74.25 | ## Classification -| Model | Avg. | amazon_counterfactual
(macro_f1) | amazon_review
(macro_f1) | massive_intent
(macro_f1) | massive_scenario
(macro_f1) | -|:----------------------------------------------|:----------|:--------------------------------------|:------------------------------|:-------------------------------|:---------------------------------| -| sbintuitions/sarashina-embedding-v1-1b | **78.37** | 79.10 | **61.48** | 82.26 | 90.65 | -| OpenAI/text-embedding-3-large | 77.58 | 77.90 | 60.44 | 80.91 | **91.08** | -| cl-nagoya/ruri-large | 77.43 | 80.81 | 56.80 | **82.56** | 89.56 | -| pkshatech/GLuCoSE-base-ja | 76.82 | **82.44** | 58.07 | 78.85 | 87.94 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 76.61 | 79.95 | 57.48 | 80.26 | 88.75 | -| jinaai/jina-embeddings-v3 | 76.39 | 78.83 | 59.33 | 77.65 | 89.74 | -| cl-nagoya/ruri-small | 76.22 | 79.92 | 55.61 | 81.49 | 87.88 | -| cl-nagoya/ruri-base | 75.58 | 76.66 | 55.76 | 81.41 | 88.49 | -| cl-nagoya/unsup-simcse-ja-large | 74.66 | 76.79 | 55.37 | 79.13 | 87.36 | -| pkshatech/GLuCoSE-base-ja-v2 | 74.21 | 74.92 | 55.31 | 79.79 | 86.84 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 73.77 | 78.10 | 51.56 | 78.79 | 86.63 | -| cl-nagoya/sup-simcse-ja-large | 73.73 | 73.21 | 54.76 | 79.23 | 87.72 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 73.47 | 77.25 | 53.42 | 76.83 | 86.39 | -| cl-nagoya/sup-simcse-ja-base | 73.47 | 72.34 | 54.41 | 79.52 | 87.60 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 73.30 | 76.20 | 51.52 | 78.95 | 86.54 | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 73.13 | 76.36 | 52.75 | 76.88 | 86.51 | -| cl-nagoya/unsup-simcse-ja-base | 73.07 | 73.30 | 53.93 | 79.07 | 85.97 | -| OpenAI/text-embedding-3-small | 73.06 | 70.01 | 55.92 | 77.66 | 88.67 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 72.90 | 77.62 | 50.86 | 77.19 | 85.93 | -| intfloat/multilingual-e5-large | 72.89 | 70.66 | 56.54 | 75.78 | 88.59 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 72.76 | 76.20 | 52.06 | 77.89 | 84.90 | -| sentence-transformers/LaBSE | 72.66 | 73.61 | 51.70 | 76.99 | 88.35 | -| pkshatech/RoSEtta-base-ja | 72.41 | 70.05 | 52.64 | 79.84 | 87.10 | -| sentence-transformers/stsb-xlm-r-multilingual | 71.84 | 75.65 | 51.32 | 74.28 | 86.10 | -| pkshatech/simcse-ja-bert-base-clcmlp | 71.30 | 67.49 | 50.85 | 79.67 | 87.20 | -| OpenAI/text-embedding-ada-002 | 69.75 | 64.42 | 53.13 | 74.57 | 86.89 | -| intfloat/multilingual-e5-base | 69.30 | 63.67 | 54.24 | 72.78 | 86.53 | -| colorfulscoop/sbert-base-ja | 69.07 | 72.21 | 47.95 | 72.52 | 83.62 | -| intfloat/multilingual-e5-small | 67.62 | 62.14 | 51.27 | 70.85 | 86.22 | +| Model | Avg. | amazon_counterfactual
(macro_f1) | amazon_review
(macro_f1) | jpn_sentiment
(macro_f1) | massive_intent
(macro_f1) | massive_scenario
(macro_f1) | sib200_jpn_cls
(macro_f1) | wrime_classification
(macro_f1) | +|:----------------------------------------------|:---------:|:-------------------------------------:|:-----------------------------:|:-----------------------------:|:------------------------------:|:--------------------------------:|:------------------------------:|:------------------------------------:| +| cl-nagoya/ruri-v3-310m | **77.65** | 80.09 | 60.72 | 95.31 | 81.76 | 89.01 | 88.13 | 48.53 | +| pfnet/plamo-embedding-1b | 77.29 | 77.59 | 59.48 | 91.73 | 82.79 | 89.95 | **90.31** | 49.20 | +| OpenAI/text-embedding-3-large | 77.27 | 77.90 | 60.44 | **96.89** | 80.91 | **91.08** | 87.85 | 45.84 | +| sbintuitions/sarashina-embedding-v1-1b | 77.20 | 79.66 | **62.02** | 95.03 | 81.21 | 90.16 | 82.63 | 49.70 | +| sbintuitions/sarashina-embedding-v2-1b | 77.14 | 79.81 | 61.39 | 93.51 | **83.69** | 90.23 | 81.48 | **49.87** | +| cl-nagoya/ruri-large | 76.25 | 79.50 | 56.85 | 93.56 | 82.10 | 90.03 | 85.26 | 46.45 | +| google/embeddinggemma-300m | 76.14 | 74.74 | 58.04 | 95.99 | 80.07 | 90.58 | 86.92 | 46.62 | +| cl-nagoya/ruri-large-v2 | 76.10 | 79.51 | 57.09 | 93.57 | 80.87 | 89.71 | 84.72 | 47.23 | +| cl-nagoya/ruri-v3-130m | 75.65 | 76.75 | 59.56 | 95.00 | 80.79 | 87.90 | 82.88 | 46.63 | +| cl-nagoya/ruri-base-v2 | 75.59 | 75.97 | 55.55 | 92.36 | 80.93 | 88.87 | 89.26 | 46.17 | +| cl-nagoya/ruri-base | 75.34 | 76.66 | 56.02 | 91.69 | 81.22 | 88.61 | 87.73 | 45.47 | +| pkshatech/GLuCoSE-base-ja | 75.02 | **82.03** | 57.93 | 92.89 | 78.52 | 87.71 | 77.24 | 48.82 | +| cl-nagoya/ruri-small | 74.83 | 80.55 | 55.41 | 88.86 | 81.08 | 88.00 | 83.97 | 45.95 | +| oshizo/sbert-jsnli-luke-japanese-base-lite | 74.53 | 79.72 | 58.02 | 91.99 | 80.16 | 88.78 | 77.31 | 45.73 | +| cl-nagoya/ruri-v3-70m | 74.45 | 81.81 | 57.98 | 93.39 | 78.92 | 87.83 | 76.87 | 44.38 | +| cl-nagoya/ruri-small-v2 | 74.12 | 77.67 | 55.60 | 88.64 | 82.00 | 88.16 | 81.57 | 45.23 | +| BAAI/bge-m3 | 74.10 | 71.86 | 56.65 | 94.41 | 78.68 | 89.70 | 84.25 | 43.17 | +| cl-nagoya/unsup-simcse-ja-large | 73.71 | 76.40 | 55.05 | 90.57 | 79.25 | 87.50 | 82.89 | 44.33 | +| pkshatech/GLuCoSE-base-ja-v2 | 73.52 | 75.28 | 55.19 | 89.24 | 78.73 | 87.14 | 85.83 | 43.23 | +| cl-nagoya/ruri-v3-30m | 73.35 | 75.60 | 55.71 | 92.63 | 78.31 | 86.72 | 81.40 | 43.11 | +| cl-nagoya/sup-simcse-ja-large | 72.74 | 72.61 | 54.56 | 89.42 | 79.23 | 87.71 | 80.43 | 45.26 | +| cl-nagoya/sup-simcse-ja-base | 72.72 | 71.93 | 54.54 | 91.01 | 80.11 | 87.63 | 81.92 | 41.88 | +| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 72.59 | 77.67 | 53.48 | 89.28 | 76.79 | 86.24 | 83.77 | 40.89 | +| OpenAI/text-embedding-3-small | 72.43 | 70.01 | 55.92 | 89.97 | 77.66 | 88.67 | 84.72 | 40.05 | +| cl-nagoya/unsup-simcse-ja-base | 72.41 | 73.65 | 54.14 | 89.87 | 77.68 | 86.10 | 84.13 | 41.31 | +| intfloat/multilingual-e5-large | 72.30 | 69.70 | 57.64 | 95.55 | 74.01 | 88.71 | 78.11 | 42.38 | +| MU-Kindai/Japanese-DiffCSE-BERT-base | 72.06 | 77.70 | 51.46 | 88.45 | 78.72 | 86.40 | 83.50 | 38.15 | +| sentence-transformers/LaBSE | 71.85 | 74.74 | 51.63 | 89.52 | 77.09 | 88.39 | 81.47 | 40.11 | +| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 71.83 | 76.55 | 52.73 | 88.22 | 77.22 | 86.25 | 81.45 | 40.38 | +| MU-Kindai/Japanese-MixCSE-BERT-base | 71.81 | 77.79 | 51.11 | 87.82 | 77.97 | 86.34 | 85.06 | 36.56 | +| pkshatech/RoSEtta-base-ja | 71.70 | 70.21 | 52.62 | 87.28 | 79.59 | 86.96 | 84.01 | 41.24 | +| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 71.48 | 74.30 | 51.97 | 89.69 | 77.83 | 84.60 | 83.82 | 38.15 | +| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 71.46 | 76.40 | 51.65 | 87.86 | 78.15 | 86.44 | 81.80 | 37.93 | +| sentence-transformers/stsb-xlm-r-multilingual | 71.40 | 75.14 | 51.67 | 87.15 | 74.34 | 86.07 | 83.73 | 41.68 | +| pkshatech/simcse-ja-bert-base-clcmlp | 70.67 | 68.28 | 51.75 | 88.21 | 79.65 | 87.23 | 81.18 | 38.39 | +| intfloat/multilingual-e5-base | 69.70 | 64.29 | 54.17 | 92.32 | 73.19 | 86.78 | 78.50 | 38.65 | +| OpenAI/text-embedding-ada-002 | 69.39 | 64.42 | 53.13 | 88.76 | 74.57 | 86.89 | 80.39 | 37.57 | +| colorfulscoop/sbert-base-ja | 68.05 | 70.80 | 47.80 | 83.50 | 72.89 | 83.71 | 82.63 | 35.06 | +| intfloat/multilingual-e5-small | 67.77 | 58.66 | 51.21 | 87.73 | 71.34 | 86.77 | 81.78 | 36.91 | +| hotchpotch/static-embedding-japanese | 66.73 | 68.06 | 46.81 | 79.82 | 74.79 | 82.18 | 83.33 | 32.12 | ## Reranking -| Model | Avg. | esci
(ndcg@10) | -|:----------------------------------------------|:----------|:--------------------| -| sbintuitions/sarashina-embedding-v1-1b | **93.74** | **93.74** | -| OpenAI/text-embedding-3-large | 93.58 | 93.58 | -| OpenAI/text-embedding-ada-002 | 93.04 | 93.04 | -| intfloat/multilingual-e5-small | 93.03 | 93.03 | -| pkshatech/GLuCoSE-base-ja-v2 | 93.01 | 93.01 | -| cl-nagoya/ruri-small | 93.00 | 93.00 | -| cl-nagoya/ruri-large | 92.99 | 92.99 | -| intfloat/multilingual-e5-large | 92.96 | 92.96 | -| OpenAI/text-embedding-3-small | 92.92 | 92.92 | -| cl-nagoya/ruri-base | 92.91 | 92.91 | -| intfloat/multilingual-e5-base | 92.85 | 92.85 | -| jinaai/jina-embeddings-v3 | 92.71 | 92.71 | -| pkshatech/RoSEtta-base-ja | 92.69 | 92.69 | -| pkshatech/GLuCoSE-base-ja | 91.90 | 91.90 | -| cl-nagoya/sup-simcse-ja-base | 91.83 | 91.83 | -| sentence-transformers/LaBSE | 91.63 | 91.63 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 91.56 | 91.56 | -| pkshatech/simcse-ja-bert-base-clcmlp | 91.49 | 91.49 | -| cl-nagoya/sup-simcse-ja-large | 91.48 | 91.48 | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 91.30 | 91.30 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 91.16 | 91.16 | -| cl-nagoya/unsup-simcse-ja-base | 91.16 | 91.16 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 91.01 | 91.01 | -| cl-nagoya/unsup-simcse-ja-large | 90.95 | 90.95 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 90.95 | 90.95 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 90.95 | 90.95 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 90.66 | 90.66 | -| sentence-transformers/stsb-xlm-r-multilingual | 90.20 | 90.20 | -| colorfulscoop/sbert-base-ja | 89.97 | 89.97 | +| Model | Avg. | esci
(ndcg@10) | jacwir_reranking
(ndcg@10) | jqara
(ndcg@10) | miracl_reranking
(ndcg@10) | mldr_reranking
(ndcg@10) | +|:----------------------------------------------|:---------:|:-------------------:|:-------------------------------:|:--------------------:|:-------------------------------:|:-----------------------------:| +| sbintuitions/sarashina-embedding-v2-1b | **86.28** | 93.58 | 88.79 | **70.55** | 85.93 | 92.53 | +| cl-nagoya/ruri-v3-310m | 85.84 | 93.43 | 88.46 | 68.93 | 85.01 | 93.36 | +| cl-nagoya/ruri-v3-130m | 85.71 | 93.37 | 88.65 | 66.30 | **86.59** | 93.62 | +| pfnet/plamo-embedding-1b | 85.05 | 93.59 | **91.74** | 66.15 | 81.91 | 91.87 | +| sbintuitions/sarashina-embedding-v1-1b | 84.36 | **93.60** | 86.85 | 65.92 | 85.17 | 90.24 | +| cl-nagoya/ruri-v3-70m | 84.21 | 93.20 | 87.48 | 63.09 | 85.03 | 92.26 | +| BAAI/bge-m3 | 84.10 | 93.27 | 89.55 | 53.92 | 85.96 | **97.78** | +| cl-nagoya/ruri-large-v2 | 83.89 | 93.21 | 85.29 | 64.47 | 85.78 | 90.68 | +| OpenAI/text-embedding-3-large | 83.06 | 93.58 | 86.78 | 56.89 | 83.80 | 94.24 | +| intfloat/multilingual-e5-large | 83.01 | 93.31 | 90.37 | 56.14 | 86.31 | 88.91 | +| cl-nagoya/ruri-v3-30m | 82.93 | 93.06 | 87.61 | 57.47 | 83.52 | 92.97 | +| pkshatech/GLuCoSE-base-ja-v2 | 82.63 | 93.02 | 88.27 | 60.70 | 82.44 | 88.71 | +| cl-nagoya/ruri-base-v2 | 82.46 | 93.17 | 85.76 | 60.66 | 84.26 | 88.47 | +| cl-nagoya/ruri-small-v2 | 82.30 | 93.20 | 88.18 | 56.70 | 83.33 | 90.09 | +| cl-nagoya/ruri-large | 81.26 | 92.99 | 86.61 | 59.59 | 80.23 | 86.91 | +| pkshatech/RoSEtta-base-ja | 81.25 | 92.68 | 86.83 | 57.92 | 80.38 | 88.45 | +| google/embeddinggemma-300m | 80.93 | 93.26 | 86.72 | 52.09 | 82.38 | 90.19 | +| cl-nagoya/ruri-base | 80.31 | 92.92 | 87.24 | 54.15 | 79.22 | 88.01 | +| intfloat/multilingual-e5-small | 80.09 | 92.98 | 89.99 | 49.28 | 81.78 | 86.41 | +| cl-nagoya/ruri-small | 79.98 | 93.01 | 87.67 | 53.26 | 77.84 | 88.14 | +| intfloat/multilingual-e5-base | 79.46 | 92.90 | 88.65 | 47.61 | 81.97 | 86.15 | +| OpenAI/text-embedding-3-small | 77.29 | 92.92 | 84.72 | 38.58 | 77.61 | 92.61 | +| hotchpotch/static-embedding-japanese | 77.09 | 91.87 | 80.96 | 47.06 | 72.01 | 93.55 | +| OpenAI/text-embedding-ada-002 | 75.63 | 93.04 | 83.91 | 37.54 | 72.83 | 90.83 | +| pkshatech/GLuCoSE-base-ja | 72.37 | 91.82 | 74.54 | 30.24 | 77.82 | 87.42 | +| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 71.88 | 91.30 | 65.14 | 44.96 | 71.21 | 86.79 | +| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 70.59 | 90.93 | 61.45 | 42.47 | 70.65 | 87.42 | +| cl-nagoya/sup-simcse-ja-base | 70.36 | 91.84 | 64.27 | 37.48 | 70.88 | 87.34 | +| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 69.92 | 91.17 | 65.41 | 38.39 | 70.19 | 84.42 | +| oshizo/sbert-jsnli-luke-japanese-base-lite | 69.81 | 91.51 | 67.45 | 36.04 | 68.68 | 85.38 | +| cl-nagoya/sup-simcse-ja-large | 68.76 | 91.50 | 56.15 | 38.30 | 71.26 | 86.60 | +| MU-Kindai/Japanese-MixCSE-BERT-base | 68.58 | 90.92 | 60.51 | 36.84 | 69.31 | 85.31 | +| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 68.15 | 90.67 | 58.16 | 36.66 | 69.09 | 86.15 | +| pkshatech/simcse-ja-bert-base-clcmlp | 68.02 | 91.27 | 57.45 | 31.74 | 72.12 | 87.50 | +| MU-Kindai/Japanese-DiffCSE-BERT-base | 67.73 | 90.95 | 59.81 | 37.20 | 67.90 | 82.81 | +| cl-nagoya/unsup-simcse-ja-large | 67.39 | 90.95 | 54.17 | 38.78 | 70.02 | 83.04 | +| sentence-transformers/LaBSE | 67.01 | 91.47 | 67.85 | 24.62 | 69.28 | 81.84 | +| cl-nagoya/unsup-simcse-ja-base | 66.20 | 91.18 | 51.54 | 32.19 | 69.96 | 86.12 | +| colorfulscoop/sbert-base-ja | 59.38 | 89.97 | 37.15 | 22.21 | 65.03 | 82.55 | +| sentence-transformers/stsb-xlm-r-multilingual | 57.93 | 89.72 | 39.21 | 18.51 | 65.36 | 76.88 | ## Clustering -| Model | Avg. | livedoor_news
(v_measure_score) | mewsc16
(v_measure_score) | -|:----------------------------------------------|:----------|:-------------------------------------|:-------------------------------| -| cl-nagoya/ruri-base | **54.16** | 54.27 | **54.04** | -| sbintuitions/sarashina-embedding-v1-1b | 53.86 | 56.42 | 51.29 | -| OpenAI/text-embedding-3-large | 53.32 | 57.09 | 49.55 | -| pkshatech/RoSEtta-base-ja | 53.23 | **58.62** | 47.85 | -| cl-nagoya/ruri-large | 51.82 | 51.39 | 52.25 | -| cl-nagoya/sup-simcse-ja-base | 51.79 | 52.67 | 50.91 | -| jinaai/jina-embeddings-v3 | 51.46 | 54.72 | 48.19 | -| intfloat/multilingual-e5-large | 51.24 | 57.13 | 45.34 | -| cl-nagoya/ruri-small | 51.19 | 50.96 | 51.41 | -| OpenAI/text-embedding-3-small | 51.06 | 54.57 | 47.55 | -| cl-nagoya/sup-simcse-ja-large | 50.56 | 50.75 | 50.38 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 50.33 | 46.77 | 53.89 | -| pkshatech/GLuCoSE-base-ja | 49.78 | 49.89 | 49.68 | -| pkshatech/GLuCoSE-base-ja-v2 | 48.65 | 51.52 | 45.78 | -| cl-nagoya/unsup-simcse-ja-large | 48.41 | 50.90 | 45.92 | -| OpenAI/text-embedding-ada-002 | 48.30 | 49.67 | 46.92 | -| intfloat/multilingual-e5-base | 48.26 | 55.03 | 41.49 | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 48.25 | 53.20 | 43.31 | -| pkshatech/simcse-ja-bert-base-clcmlp | 47.53 | 44.77 | 50.30 | -| intfloat/multilingual-e5-small | 46.91 | 54.70 | 39.12 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 46.68 | 53.02 | 40.35 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 45.81 | 48.45 | 43.17 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 44.95 | 52.62 | 37.28 | -| sentence-transformers/LaBSE | 44.88 | 48.29 | 41.47 | -| colorfulscoop/sbert-base-ja | 44.81 | 42.99 | 46.64 | -| cl-nagoya/unsup-simcse-ja-base | 44.77 | 52.23 | 37.31 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 44.22 | 49.67 | 38.77 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 43.11 | 41.04 | 45.18 | -| sentence-transformers/stsb-xlm-r-multilingual | 27.46 | 24.49 | 30.43 | - -## PairClassification -| Model | Avg. | paws_x_ja
(binary_f1) | -|:----------------------------------------------|:----------|:---------------------------| -| pkshatech/GLuCoSE-base-ja | **66.39** | **66.39** | -| cl-nagoya/sup-simcse-ja-base | 62.57 | 62.57 | -| cl-nagoya/sup-simcse-ja-large | 62.51 | 62.51 | -| cl-nagoya/unsup-simcse-ja-large | 62.49 | 62.49 | -| cl-nagoya/unsup-simcse-ja-base | 62.44 | 62.44 | -| pkshatech/simcse-ja-bert-base-clcmlp | 62.40 | 62.40 | -| OpenAI/text-embedding-ada-002 | 62.40 | 62.40 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 62.38 | 62.38 | -| cl-nagoya/ruri-base | 62.38 | 62.38 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 62.38 | 62.38 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 62.38 | 62.38 | -| jinaai/jina-embeddings-v3 | 62.37 | 62.37 | -| pkshatech/GLuCoSE-base-ja-v2 | 62.37 | 62.37 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 62.37 | 62.37 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 62.35 | 62.35 | -| OpenAI/text-embedding-3-large | 62.35 | 62.35 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 62.33 | 62.33 | -| sentence-transformers/LaBSE | 62.33 | 62.33 | -| colorfulscoop/sbert-base-ja | 62.31 | 62.31 | -| cl-nagoya/ruri-large | 62.29 | 62.29 | -| OpenAI/text-embedding-3-small | 62.27 | 62.27 | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 62.27 | 62.27 | -| intfloat/multilingual-e5-base | 62.26 | 62.26 | -| sentence-transformers/stsb-xlm-r-multilingual | 62.20 | 62.20 | -| intfloat/multilingual-e5-small | 62.19 | 62.19 | -| intfloat/multilingual-e5-large | 62.15 | 62.15 | -| cl-nagoya/ruri-small | 62.11 | 62.11 | -| sbintuitions/sarashina-embedding-v1-1b | 62.00 | 62.00 | -| pkshatech/RoSEtta-base-ja | 61.74 | 61.74 | +| Model | Avg. | livedoor_news
(v_measure_score) | mewsc16
(v_measure_score) | sib200_jpn_clust
(v_measure_score) | +|:----------------------------------------------|:---------:|:------------------------------------:|:------------------------------:|:---------------------------------------:| +| cl-nagoya/sup-simcse-ja-base | **52.57** | 55.11 | **53.39** | 49.21 | +| sbintuitions/sarashina-embedding-v2-1b | 52.56 | 57.41 | 51.67 | 48.59 | +| pfnet/plamo-embedding-1b | 52.50 | **61.74** | 48.03 | 47.73 | +| OpenAI/text-embedding-3-large | 51.82 | 57.09 | 49.55 | 48.83 | +| cl-nagoya/ruri-v3-130m | 51.13 | 54.36 | 48.84 | 50.20 | +| cl-nagoya/ruri-large-v2 | 50.88 | 55.62 | 50.97 | 46.06 | +| intfloat/multilingual-e5-large | 50.58 | 51.58 | 46.81 | **53.35** | +| cl-nagoya/ruri-v3-310m | 50.52 | 58.56 | 48.60 | 44.41 | +| sbintuitions/sarashina-embedding-v1-1b | 50.30 | 56.03 | 50.69 | 44.19 | +| cl-nagoya/sup-simcse-ja-large | 50.12 | 53.38 | 51.26 | 45.74 | +| intfloat/multilingual-e5-base | 50.12 | 53.79 | 49.44 | 47.13 | +| cl-nagoya/ruri-small-v2 | 49.97 | 52.61 | 49.47 | 47.82 | +| cl-nagoya/ruri-v3-70m | 49.95 | 54.92 | 47.74 | 47.20 | +| cl-nagoya/ruri-large | 49.93 | 54.44 | 50.59 | 44.76 | +| cl-nagoya/ruri-v3-30m | 49.90 | 53.69 | 47.96 | 48.04 | +| cl-nagoya/ruri-small | 49.59 | 52.90 | 49.37 | 46.51 | +| google/embeddinggemma-300m | 49.48 | 55.33 | 50.55 | 42.55 | +| pkshatech/simcse-ja-bert-base-clcmlp | 49.45 | 49.11 | 47.02 | 52.21 | +| intfloat/multilingual-e5-small | 49.29 | 51.94 | 52.34 | 43.59 | +| cl-nagoya/ruri-base | 49.10 | 56.69 | 52.05 | 38.55 | +| OpenAI/text-embedding-3-small | 48.91 | 54.57 | 47.55 | 44.59 | +| OpenAI/text-embedding-ada-002 | 48.78 | 49.67 | 46.92 | 49.74 | +| oshizo/sbert-jsnli-luke-japanese-base-lite | 48.75 | 51.70 | 51.52 | 43.03 | +| pkshatech/GLuCoSE-base-ja-v2 | 48.19 | 54.46 | 46.12 | 43.98 | +| pkshatech/GLuCoSE-base-ja | 47.12 | 50.41 | 49.52 | 41.43 | +| cl-nagoya/ruri-base-v2 | 46.84 | 54.38 | 50.61 | 35.53 | +| BAAI/bge-m3 | 45.56 | 54.76 | 42.00 | 39.91 | +| pkshatech/RoSEtta-base-ja | 44.88 | 48.89 | 45.16 | 40.61 | +| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 44.54 | 51.30 | 46.27 | 36.04 | +| cl-nagoya/unsup-simcse-ja-large | 43.52 | 51.48 | 44.44 | 34.65 | +| MU-Kindai/Japanese-MixCSE-BERT-base | 43.45 | 48.56 | 43.20 | 38.60 | +| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 42.86 | 45.84 | 44.08 | 38.67 | +| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 42.02 | 51.59 | 42.68 | 31.78 | +| cl-nagoya/unsup-simcse-ja-base | 41.29 | 50.65 | 39.58 | 33.63 | +| MU-Kindai/Japanese-DiffCSE-BERT-base | 39.93 | 46.01 | 39.22 | 34.56 | +| sentence-transformers/LaBSE | 39.82 | 49.08 | 41.78 | 28.59 | +| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 39.27 | 48.79 | 42.61 | 26.42 | +| colorfulscoop/sbert-base-ja | 39.04 | 40.60 | 46.18 | 30.36 | +| hotchpotch/static-embedding-japanese | 35.91 | 51.44 | 34.81 | 21.47 | +| sentence-transformers/stsb-xlm-r-multilingual | 27.67 | 26.62 | 32.05 | 24.34 | diff --git a/make_leaderboard.py b/make_leaderboard.py index 0e43ccf..5d472eb 100644 --- a/make_leaderboard.py +++ b/make_leaderboard.py @@ -9,14 +9,44 @@ "amazon_review_classification": "amazon_review", "massive_intent_classification": "massive_intent", "massive_scenario_classification": "massive_scenario", + "japanese_sentiment_classification": "jpn_sentiment", + "sib200_japanese_classification": "sib200_jpn_cls", + "sib200_japanese_clustering": "sib200_jpn_clust", + "nlp_journal_abs_article": "nlp_abs_article", + "nlp_journal_abs_intro": "nlp_abs_intro", + "nlp_journal_title_abs": "nlp_title_abs", + "nlp_journal_title_intro": "nlp_title_intro", } -TASK_ORDER = ["Retrieval", "STS", "Classification", "Reranking", "Clustering", "PairClassification"] +TASK_ORDER = ["Retrieval", "STS", "Classification", "Reranking", "Clustering"] SUMMARY_KEY = "Summary" """ Collects the results from the results folder. """ +# Load reference structure from sbintuitions/sarashina-embedding-v1-1b/summary.json +reference_file = Path("docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json") +with open(reference_file) as f: + reference_structure = json.load(f) + +# Extract the expected structure +expected_structure = {} +for task_name, task_results in reference_structure.items(): + expected_structure[task_name] = set(task_results.keys()) + + +def has_same_structure(summary: dict, expected: dict) -> bool: + """Check if summary has exactly the same structure as expected.""" + if set(summary.keys()) != set(expected.keys()): + return False + + for task_name, datasets in expected.items(): + if set(summary[task_name].keys()) != datasets: + return False + + return True + + # {task_name: {model_signature: {(dataset_name, metric_name): score}}} all_results: dict[str, dict[str, dict[str, float]]] = defaultdict(lambda: defaultdict(dict)) for summary_file in Path("docs/results").rglob("summary.json"): @@ -26,6 +56,13 @@ with open(summary_file) as f: summary = json.load(f) + # Skip models that don't have the same structure as reference + if not has_same_structure(summary, expected_structure): + org_name = summary_file.parent.parent.name + model_name = summary_file.parent.name + print(f"Skipping {org_name}/{model_name}: different structure") + continue + org_name = summary_file.parent.parent.name model_name = summary_file.parent.name model_signature = f"{org_name}/{model_name}" @@ -56,17 +93,24 @@ def format_score(score: float) -> str: # format to markdown table dataset_keys = list(task_results[next(iter(task_results))].keys()) if task_name == SUMMARY_KEY: - dataset_keys = TASK_ORDER + # Only include existing tasks in the summary + dataset_keys = [task for task in TASK_ORDER if task in all_results] header = ["Model", AVG_COLUMN_NAME, *dataset_keys] table_list: list[list[str | float]] = [] for model_signature, dataset_scores in task_results.items(): + # Skip models that don't have all required datasets + if not all(k in dataset_scores for k in dataset_keys): + continue + model_scores = [dataset_scores[k] for k in dataset_keys] if task_name == SUMMARY_KEY: scores_by_dataset = [] for _task_name, _task_results in all_results.items(): - if _task_name != SUMMARY_KEY: + if _task_name != SUMMARY_KEY and model_signature in _task_results: scores_by_dataset.extend(list(_task_results[model_signature].values())) + if not scores_by_dataset: # Skip if no scores available + continue average_score = sum(scores_by_dataset) / len(scores_by_dataset) else: average_score = sum(model_scores) / len(model_scores) @@ -88,7 +132,9 @@ def format_score(score: float) -> str: # add header table_list.insert(0, ["Model", AVG_COLUMN_NAME, *dataset_keys]) - markdown_table = tabulate(table_list, headers="firstrow", tablefmt="pipe") + # Set alignment: left for model names, center for all numeric columns + col_alignment = ["left"] + ["center"] * (len(dataset_keys) + 1) + markdown_table = tabulate(table_list, headers="firstrow", tablefmt="pipe", colalign=col_alignment) markdown_tables[task_name] = markdown_table """ @@ -100,6 +146,8 @@ def format_score(score: float) -> str: "This leaderboard shows the results stored under `docs/results`. The scores are all multiplied by 100.\n\n" ) for task_name in [SUMMARY_KEY, *TASK_ORDER]: + if task_name not in markdown_tables: + continue markdown_table = markdown_tables[task_name] f.write(f"## {task_name}\n") diff --git a/poetry.lock b/poetry.lock index 40fbe9f..5766c9f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. [[package]] name = "accelerate" @@ -6,6 +6,7 @@ version = "0.31.0" description = "Accelerate" optional = false python-versions = ">=3.8.0" +groups = ["main"] files = [ {file = "accelerate-0.31.0-py3-none-any.whl", hash = "sha256:0fc608dc49584f64d04711a39711d73cb0ad4ef3d21cddee7ef2216e29471144"}, {file = "accelerate-0.31.0.tar.gz", hash = "sha256:b5199865b26106ccf9205acacbe8e4b3b428ad585e7c472d6a46f6fb75b6c176"}, @@ -37,6 +38,7 @@ version = "3.9.5" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fcde4c397f673fdec23e6b05ebf8d4751314fa7c24f93334bf1f1364c1c69ac7"}, {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d6b3f1fabe465e819aed2c421a6743d8debbde79b6a8600739300630a01bf2c"}, @@ -125,7 +127,7 @@ multidict = ">=4.5,<7.0" yarl = ">=1.0,<2.0" [package.extras] -speedups = ["Brotli", "aiodns", "brotlicffi"] +speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""] [[package]] name = "aiosignal" @@ -133,6 +135,7 @@ version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, @@ -147,6 +150,7 @@ version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -158,6 +162,7 @@ version = "4.4.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"}, {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"}, @@ -171,7 +176,7 @@ typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} [package.extras] doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\""] trio = ["trio (>=0.23)"] [[package]] @@ -180,6 +185,8 @@ version = "4.0.3" description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.7" +groups = ["main"] +markers = "python_version < \"3.11\"" files = [ {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, @@ -191,6 +198,7 @@ version = "23.2.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, @@ -201,8 +209,8 @@ cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] dev = ["attrs[tests]", "pre-commit"] docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] tests = ["attrs[tests-no-zope]", "zope-interface"] -tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] -tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.6) ; platform_python_implementation == \"CPython\" and python_version >= \"3.8\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.8\""] +tests-no-zope = ["attrs[tests-mypy]", "cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] [[package]] name = "black" @@ -210,6 +218,7 @@ version = "23.12.1" description = "The uncompromising code formatter." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"}, {file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"}, @@ -246,7 +255,7 @@ typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] +d = ["aiohttp (>=3.7.4) ; sys_platform != \"win32\" or implementation_name != \"pypy\"", "aiohttp (>=3.7.4,!=3.9.0) ; sys_platform == \"win32\" and implementation_name == \"pypy\""] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] @@ -256,6 +265,7 @@ version = "2024.6.2" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "certifi-2024.6.2-py3-none-any.whl", hash = "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56"}, {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"}, @@ -267,6 +277,7 @@ version = "3.3.2" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7.0" +groups = ["main"] files = [ {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, @@ -366,6 +377,7 @@ version = "8.1.7" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, @@ -380,10 +392,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", dev = "platform_system == \"Windows\""} [[package]] name = "datasets" @@ -391,6 +405,7 @@ version = "2.19.2" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.8.0" +groups = ["main"] files = [ {file = "datasets-2.19.2-py3-none-any.whl", hash = "sha256:e07ff15d75b1af75c87dd96323ba2a361128d495136652f37fd62f918d17bb4e"}, {file = "datasets-2.19.2.tar.gz", hash = "sha256:eccb82fb3bb5ee26ccc6d7a15b7f1f834e2cc4e59b7cff7733a003552bad51ef"}, @@ -417,7 +432,7 @@ xxhash = "*" apache-beam = ["apache-beam (>=2.26.0)"] audio = ["librosa", "soundfile (>=0.12.1)"] benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"] -dev = ["Pillow (>=9.4.0)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +dev = ["Pillow (>=9.4.0)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"] jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"] metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"] @@ -425,7 +440,7 @@ quality = ["ruff (>=0.3.0)"] s3 = ["s3fs"] tensorflow = ["tensorflow (>=2.6.0)"] tensorflow-gpu = ["tensorflow (>=2.6.0)"] -tests = ["Pillow (>=9.4.0)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +tests = ["Pillow (>=9.4.0)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] torch = ["torch"] vision = ["Pillow (>=9.4.0)"] @@ -435,6 +450,7 @@ version = "0.3.8" description = "serialize all of Python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"}, {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"}, @@ -450,6 +466,7 @@ version = "1.9.0" description = "Distro - an OS platform information API" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, @@ -461,6 +478,7 @@ version = "0.1.3" description = "Like `typing._eval_type`, but lets older Python versions use newer typing features." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "eval_type_backport-0.1.3-py3-none-any.whl", hash = "sha256:519d2a993b3da286df9f90e17f503f66435106ad870cf26620c5720e2158ddf2"}, {file = "eval_type_backport-0.1.3.tar.gz", hash = "sha256:d83ee225331dfa009493cec1f3608a71550b515ee4749abe78da14e3c5e314f5"}, @@ -475,6 +493,8 @@ version = "1.2.1" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" +groups = ["main"] +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, @@ -489,6 +509,7 @@ version = "3.14.0" description = "A platform independent file lock." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "filelock-3.14.0-py3-none-any.whl", hash = "sha256:43339835842f110ca7ae60f1e1c160714c5a6afd15a2873419ab185334975c0f"}, {file = "filelock-3.14.0.tar.gz", hash = "sha256:6ea72da3be9b8c82afd3edcf99f2fffbb5076335a5ae4d03248bb5b6c3eae78a"}, @@ -497,7 +518,7 @@ files = [ [package.extras] docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] -typing = ["typing-extensions (>=4.8)"] +typing = ["typing-extensions (>=4.8) ; python_version < \"3.11\""] [[package]] name = "flake8" @@ -505,6 +526,7 @@ version = "7.0.0" description = "the modular source code checker: pep8 pyflakes and co" optional = false python-versions = ">=3.8.1" +groups = ["dev"] files = [ {file = "flake8-7.0.0-py2.py3-none-any.whl", hash = "sha256:a6dfbb75e03252917f2473ea9653f7cd799c3064e54d4c8140044c5c065f53c3"}, {file = "flake8-7.0.0.tar.gz", hash = "sha256:33f96621059e65eec474169085dc92bf26e7b2d47366b70be2f67ab80dc25132"}, @@ -521,6 +543,7 @@ version = "1.4.1" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"}, {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"}, @@ -607,6 +630,7 @@ version = "2024.3.1" description = "File-system specification" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "fsspec-2024.3.1-py3-none-any.whl", hash = "sha256:918d18d41bf73f0e2b261824baeb1b124bcf771767e3a26425cd7dec3332f512"}, {file = "fsspec-2024.3.1.tar.gz", hash = "sha256:f39780e282d7d117ffb42bb96992f8a90795e4d0fb0f661a70ca39fe9c43ded9"}, @@ -641,50 +665,52 @@ tqdm = ["tqdm"] [[package]] name = "fugashi" -version = "1.3.2" -description = "A Cython MeCab wrapper for fast, pythonic Japanese tokenization." +version = "1.5.2" +description = "Cython MeCab wrapper for fast, pythonic Japanese tokenization." optional = false -python-versions = ">=3.7" -files = [ - {file = "fugashi-1.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:583e7a14e6ddf8a03b500bec30d708f72e98035ab43e2c92940dd9c36ee63de9"}, - {file = "fugashi-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6c67023cdc1b059b05751c1785c794c24d8862f37a16cdb805e33c7d7ae0c19d"}, - {file = "fugashi-1.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6b2e21be33ed72621d9f4a601a33c00b38052df947f297d792b221a33337f094"}, - {file = "fugashi-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:af7abac3037c7421b075782897766b8f453f28ef3bbadd3e7d69c9df409a48a8"}, - {file = "fugashi-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b915d936e3eb30d50fde86889f8ab56968e5cb4d0ceeb497ac1bb6c58531f87"}, - {file = "fugashi-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:8dc57d07809fbecdfc277d50028d5b8d23fb4c0ed12e6d6f7f565709c18848a4"}, - {file = "fugashi-1.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:50243df8758f5fb90bd2801e557168e613df61fa4d488acfe364070e8a4a234c"}, - {file = "fugashi-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9c9fb77c42e6b421e5c20f74179ed479255545b40a28f9983f264a8b19a30374"}, - {file = "fugashi-1.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6fdef6be3489279c670459a55b2dfa876c0856b3fc96b3590aa801f37af6b827"}, - {file = "fugashi-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b2ebe0d6722e05000a959df303e06937939009f4eef0b8692018eb019496013"}, - {file = "fugashi-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85d8e3a9e9d92f555525b2719153e7d3e4ec71d0bae0b076b5495634039b8490"}, - {file = "fugashi-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:9774bb52930fa17ebab17f8bcf2b5d20b6ef529b425ea65affb29a3307c003f8"}, - {file = "fugashi-1.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:fcfb3908059f4dd15d7fda64edd3c027b4da668bf1731f147aa888f5db01bd6c"}, - {file = "fugashi-1.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:49b44261f2051c43a9e31816d85bb89e5563c3e4c03ff7830d1ebf5942888cf9"}, - {file = "fugashi-1.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3a2d8aecb2a239de33bcb70806b7688001e72f68bde68961c6f2899155f15f87"}, - {file = "fugashi-1.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e24864e92ad3acf3c0b8f645e33d543fe569544bb6ee9728cb281325aa76d06"}, - {file = "fugashi-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea942e45214a99844146ce0e0f1ac43bff6e2ccbf6d1cbfde4f2bed9ca0951b1"}, - {file = "fugashi-1.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:da61498017e5cbee65c6eff88a13e17b45a5e3b0428733e99168344b8ff95da5"}, - {file = "fugashi-1.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:18cd37369c1df25e56ef55ea31b3daaa14cfaae805d0ad51ae1274f749f3748d"}, - {file = "fugashi-1.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63a6c360c1d5e8c4ffa55f1459550146a204401c5fb8cc01d4ba593586ed328a"}, - {file = "fugashi-1.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9ed34c799e945f013345a02cf27a5bc97e383b76c3127afe09008cf92b5858d"}, - {file = "fugashi-1.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:213c543e00c80fd601926b03fe489ebd6140d6022a78e2398dcbae7032a9166f"}, - {file = "fugashi-1.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:107a37140c51396776810294aa47d6b92f767f834f1b9e50ca35046a63f31dfd"}, - {file = "fugashi-1.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:13001a977d0a87e174defaa7a7d5c512da0fd021beabe80ce8eb94694a9563a8"}, - {file = "fugashi-1.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1ad77258f97547d906cc822192c6c2c99a54290b0ca8c127368e11e0a0365245"}, - {file = "fugashi-1.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d3345d2c61dd9d056442e271887a189cc2831a5365c3b8bdcccd4395b54f4fe"}, - {file = "fugashi-1.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1cb923fbdfccc5d750accd32c9b929603852d6626b162834370b4b3245bb8c2"}, - {file = "fugashi-1.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:6072a18d1f8428eb19a199ee3d8f1b01c310d15baec96aa7a9fa533e1ce60673"}, - {file = "fugashi-1.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7e392f2c57068bb892c45c1b69067c3dde94b633c81c725a613ee7defe09de47"}, - {file = "fugashi-1.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:85de463fc30390c06d985f52fcfd422acf7ada6b13f723721ca964854b9ae435"}, - {file = "fugashi-1.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5a640da3824aba966209fc425b2b19c38d22a3da637f83b4a7df83cb94376b87"}, - {file = "fugashi-1.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11e95f57b78152be3a0a1a1e77d7887cfc25c30412d5f5825711b75ea6d415be"}, - {file = "fugashi-1.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:409b83f136a3c2da805cd999bd7e1792e7c71fa8e0637f77bdec2b6fd070a3bb"}, - {file = "fugashi-1.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:51eed11fee767597cfe735bd01326eb06deb2283112e29e9e5bdc954750e7a24"}, - {file = "fugashi-1.3.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bc99b6e8f003c7a0e53e0f486caa1547f0ca8f86777610ea92af6e2f40ca212a"}, - {file = "fugashi-1.3.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4c7e97655d1d3f3f5d5c5da6ac7f31f187177a39f1557f9d3f683772a2e30815"}, - {file = "fugashi-1.3.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:54865ba40c35b3180d9c7cf629a1f3e430bca626dcd6ee6288bc5245c044edea"}, - {file = "fugashi-1.3.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:21d2dac5b085632f1f9a24edf5d7ccaeb3272be672e4aa37a0b219fc7a3b0655"}, - {file = "fugashi-1.3.2.tar.gz", hash = "sha256:964980b5d227ee41af7570542aaab56b1298c44416271cba5d8ff9a58ab40748"}, +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "fugashi-1.5.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:952533caa1704720989ee7f4262902219f938eac87a003d72b8a98b2a24b0299"}, + {file = "fugashi-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5b65bdf535d6a58cbea2938dd2de7daf001c38f8821f28006b695d3ac892f521"}, + {file = "fugashi-1.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:380e5ebe058e4243e5662b252b008782f20818c5d2d30d0e482a8911e2e68674"}, + {file = "fugashi-1.5.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:809db19725a623b5f3f47c7c11909143bb14781569caa3211e6c813608a9a213"}, + {file = "fugashi-1.5.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6464f747c38a1043c9a2da81975db8f2c9724ef59389754d8dae7328ed60a698"}, + {file = "fugashi-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:894b69898b83c6d96f73134466df68682cba10d867c1ca55a93585a7d2213133"}, + {file = "fugashi-1.5.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:072f0ba00ea38705ff43916c8438ce9560bf7ae5e67d415b80f4996f0b82b04e"}, + {file = "fugashi-1.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e16ada7b953bf5a18fc9c81b2537c58f1c9929b993c6629bf972f96762b221a2"}, + {file = "fugashi-1.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f855953ac6c98cf239d407d341e3298a54119c8de88217037f012096e41ebe7b"}, + {file = "fugashi-1.5.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:516d61660c7b2262047e531b0a99275ce63fd2256f30282fc5066160435478a6"}, + {file = "fugashi-1.5.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ff899e1767024ba8bc53d8a2cf90bca19a6a54b14ddf05a75d04169f7acb262c"}, + {file = "fugashi-1.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c5e04cb808f5cd46fc682469702f1e34f6199a264514e5c21b1e17ea4f8313f"}, + {file = "fugashi-1.5.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:4ed199a931c1d9f7d55c606d90a06323d1a60164ec222ea70af74c0c9d236faa"}, + {file = "fugashi-1.5.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3d2bb28cc6c6eec1c50729bb2dda44007a45599f0471b14c8fda57b0dde36d50"}, + {file = "fugashi-1.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8c1f64345a7a13b229fb755b567cbc993adb43b5b617ad4089521e5dd4d27b91"}, + {file = "fugashi-1.5.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ffe760c93e21896cc74066bc5e7dbee6e41a26199807c850b486e2e29b8a3131"}, + {file = "fugashi-1.5.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:83bc7bf08f81a3c3992bf10b8c681720898a826c6c3dffa80e1296e005f4bfb8"}, + {file = "fugashi-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:936d710166c5b05064ec2ce0eb347fff7a0cf102c33989012fad205346943402"}, + {file = "fugashi-1.5.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5cd0a399aad72d00a3b6b2d8c45e43a8c1e3aefd86ba153c826426b8e133e533"}, + {file = "fugashi-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:52c79cddbdcf4bbd0490212d2b2d78b6011d4cf733ff4ef9455274da9a8d54f0"}, + {file = "fugashi-1.5.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2ee7b102fef6ec554bdeba51a969ce894a519cc71bade5d05a27935de4426745"}, + {file = "fugashi-1.5.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:32e01a394011270078efb6c71ef188c327255544d953692cd82f7f726d59ecc4"}, + {file = "fugashi-1.5.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0e79d3f09d847d07eddf8e62ad9840b11331102bc31ecd66455c62581af11638"}, + {file = "fugashi-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:cc5e5ece1f6ba1ce00f2a0a9465d2b91fe01e904888aa0c7089a20e471646c47"}, + {file = "fugashi-1.5.2-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:0535dcc5a844fb196c215020a5791e5ac0b6c26ee4879cb0e63545c5e6f33642"}, + {file = "fugashi-1.5.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:0805863a5268e112bc3c01e9d77e58a7c5ea079d893a18e0d381f3874f690949"}, + {file = "fugashi-1.5.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:75a8f6219e26e54c95a969af6c5c67f6ea65e333aecc4e85ccc360488e4ba056"}, + {file = "fugashi-1.5.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:79cf4b79809e7e9016dc179e35789bb6a0b9df44e03993835c23d5cb31994de2"}, + {file = "fugashi-1.5.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71c0027aa11747adcb3753d31663290c53fea8007371f0b080c53c192918ceb9"}, + {file = "fugashi-1.5.2-cp314-cp314-win_amd64.whl", hash = "sha256:a3c69086650a66bfffb5dd4952d42a9274cea9b110df7b4837c74da1fe4f98f3"}, + {file = "fugashi-1.5.2-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:41e3f388913a87826045722ab59611b27a4654a51e2037c69d6189e04f33f6f5"}, + {file = "fugashi-1.5.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bb6e06928bd428a8a139660866f01dadd55546b6395a34dffe5602d8c1329205"}, + {file = "fugashi-1.5.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e516bde355c2ba53b5b2ce37760cf67f6f186c79efa049f9ab3767bc843f341b"}, + {file = "fugashi-1.5.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7e0c20abc9df511c54c90ceab118208d051a196ef5f68c63ab1c710fc1a35c6a"}, + {file = "fugashi-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5eda8187624053a610ec09f7b6391d0411e9148c34b5fddad522b342edbcb201"}, + {file = "fugashi-1.5.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a52b9a023522969f3d9e32172c1a49b0d10bfc187433f33d3ceb1e730cc65417"}, + {file = "fugashi-1.5.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:deca2ff8310d482b802721814b61eeecc8596af396e346b70389ae3f912790c7"}, + {file = "fugashi-1.5.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4b586b8dcdbff7bb95d36ff8c9ac7b041ed95ce4d8e734c383b3c4817e94f992"}, + {file = "fugashi-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:954b426e7886c1c4113bcf56c1faebf11bcead7768aa764c1b0d0104073c2653"}, + {file = "fugashi-1.5.2.tar.gz", hash = "sha256:a7959eab95bb37a6a934fc2314d3ff888664d11b88d0e1c596260a5785d5880e"}, ] [package.extras] @@ -697,17 +723,55 @@ version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "hf-xet" +version = "1.2.0" +description = "Fast transfer of large files with the Hugging Face Hub." +optional = false +python-versions = ">=3.8" +groups = ["main"] +markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\"" +files = [ + {file = "hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649"}, + {file = "hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813"}, + {file = "hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc"}, + {file = "hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5"}, + {file = "hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f"}, + {file = "hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832"}, + {file = "hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382"}, + {file = "hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e"}, + {file = "hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8"}, + {file = "hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0"}, + {file = "hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090"}, + {file = "hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a"}, + {file = "hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f"}, + {file = "hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc"}, + {file = "hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848"}, + {file = "hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4"}, + {file = "hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd"}, + {file = "hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c"}, + {file = "hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737"}, + {file = "hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865"}, + {file = "hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69"}, + {file = "hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f"}, +] + +[package.extras] +tests = ["pytest"] + [[package]] name = "httpcore" version = "1.0.5" description = "A minimal low-level HTTP client." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"}, {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"}, @@ -729,6 +793,7 @@ version = "0.27.0" description = "The next generation HTTP client." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"}, {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"}, @@ -742,25 +807,27 @@ idna = "*" sniffio = "*" [package.extras] -brotli = ["brotli", "brotlicffi"] +brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] [[package]] name = "huggingface-hub" -version = "0.23.3" +version = "0.36.0" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" +groups = ["main"] files = [ - {file = "huggingface_hub-0.23.3-py3-none-any.whl", hash = "sha256:22222c41223f1b7c209ae5511d2d82907325a0e3cdbce5f66949d43c598ff3bc"}, - {file = "huggingface_hub-0.23.3.tar.gz", hash = "sha256:1a1118a0b3dea3bab6c325d71be16f5ffe441d32f3ac7c348d6875911b694b5b"}, + {file = "huggingface_hub-0.36.0-py3-none-any.whl", hash = "sha256:7bcc9ad17d5b3f07b57c78e79d527102d08313caa278a641993acddcb894548d"}, + {file = "huggingface_hub-0.36.0.tar.gz", hash = "sha256:47b3f0e2539c39bf5cde015d63b72ec49baff67b6931c3d97f3f84532e2b8d25"}, ] [package.dependencies] filelock = "*" fsspec = ">=2023.5.0" +hf-xet = {version = ">=1.1.3,<2.0.0", markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""} packaging = ">=20.9" pyyaml = ">=5.1" requests = "*" @@ -768,17 +835,20 @@ tqdm = ">=4.42.1" typing-extensions = ">=3.7.4.3" [package.extras] -all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (>=1.4.0)", "mypy (==1.15.0) ; python_version >= \"3.9\"", "mypy (>=1.14.1,<1.15.0) ; python_version == \"3.8\"", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures (<16.0)", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "ty", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] cli = ["InquirerPy (==0.3.4)"] -dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (>=1.4.0)", "mypy (==1.15.0) ; python_version >= \"3.9\"", "mypy (>=1.14.1,<1.15.0) ; python_version == \"3.8\"", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures (<16.0)", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "ty", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] hf-transfer = ["hf-transfer (>=0.1.4)"] -inference = ["aiohttp", "minijinja (>=1.0)"] -quality = ["mypy (==1.5.1)", "ruff (>=0.3.0)"] +hf-xet = ["hf-xet (>=1.1.2,<2.0.0)"] +inference = ["aiohttp"] +mcp = ["aiohttp", "mcp (>=1.8.0)", "typer"] +oauth = ["authlib (>=1.3.2)", "fastapi", "httpx", "itsdangerous"] +quality = ["libcst (>=1.4.0)", "mypy (==1.15.0) ; python_version >= \"3.9\"", "mypy (>=1.14.1,<1.15.0) ; python_version == \"3.8\"", "ruff (>=0.9.0)", "ty"] tensorflow = ["graphviz", "pydot", "tensorflow"] tensorflow-testing = ["keras (<3.0)", "tensorflow"] -testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] -torch = ["safetensors", "torch"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures (<16.0)", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +torch = ["safetensors[torch]", "torch"] typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"] [[package]] @@ -787,6 +857,7 @@ version = "3.7" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.5" +groups = ["main"] files = [ {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, @@ -798,31 +869,19 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] -[[package]] -name = "intel-openmp" -version = "2021.4.0" -description = "Intel OpenMP* Runtime Library" -optional = false -python-versions = "*" -files = [ - {file = "intel_openmp-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:41c01e266a7fdb631a7609191709322da2bbf24b252ba763f125dd651bcc7675"}, - {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:3b921236a38384e2016f0f3d65af6732cf2c12918087128a9163225451e776f2"}, - {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:e2240ab8d01472fed04f3544a878cda5da16c26232b7ea1b59132dbfb48b186e"}, - {file = "intel_openmp-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:6e863d8fd3d7e8ef389d52cf97a50fe2afe1a19247e8c0d168ce021546f96fc9"}, - {file = "intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:eef4c8bcc8acefd7f5cd3b9384dbf73d59e2c99fc56545712ded913f43c4a94f"}, -] - [[package]] name = "ipadic" version = "1.0.0" description = "IPAdic packaged for Python" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "ipadic-1.0.0.tar.gz", hash = "sha256:f5923d31eca6131acaaf18ed28d8998665b1347b640d3a6476f64650e9a71c07"}, ] @@ -833,6 +892,7 @@ version = "5.13.2" description = "A Python utility / library to sort Python imports." optional = false python-versions = ">=3.8.0" +groups = ["dev"] files = [ {file = "isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6"}, {file = "isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109"}, @@ -847,6 +907,7 @@ version = "3.1.4" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, @@ -864,6 +925,7 @@ version = "1.4.2" description = "Lightweight pipelining with Python functions" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, @@ -875,6 +937,7 @@ version = "4.29.0" description = "Implement minimal boilerplate CLIs derived from type hints and parse from command line, config files and environment variables." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "jsonargparse-4.29.0-py3-none-any.whl", hash = "sha256:e093d9509996b031d156fe8d4a087e2d91adbfc654b9e2c783878d45ad0dfefe"}, {file = "jsonargparse-4.29.0.tar.gz", hash = "sha256:03d407122c856095c48b07c58107002c9d3eaeb2795d8040efad831db5817494"}, @@ -887,12 +950,12 @@ PyYAML = ">=3.13" [package.extras] all = ["jsonargparse[argcomplete]", "jsonargparse[fsspec]", "jsonargparse[jsonnet]", "jsonargparse[jsonschema]", "jsonargparse[omegaconf]", "jsonargparse[reconplogger]", "jsonargparse[ruyaml]", "jsonargparse[signatures]", "jsonargparse[typing-extensions]", "jsonargparse[urls]"] -argcomplete = ["argcomplete (>=2.0.0)", "argcomplete (>=3.3.0)"] +argcomplete = ["argcomplete (>=2.0.0) ; python_version < \"3.8\"", "argcomplete (>=3.3.0) ; python_version >= \"3.8\""] coverage = ["jsonargparse[test-no-urls]", "pytest-cov (>=4.0.0)"] dev = ["build (>=0.10.0)", "jsonargparse[coverage]", "jsonargparse[doc]", "jsonargparse[mypy]", "jsonargparse[test]", "pre-commit (>=2.19.0)", "tox (>=3.25.0)"] doc = ["Sphinx (>=1.7.9)", "autodocsumm (>=0.1.10)", "sphinx-autodoc-typehints (>=1.19.5)", "sphinx-rtd-theme (>=1.2.2)"] fsspec = ["fsspec (>=0.8.4)"] -jsonnet = ["jsonnet (>=0.13.0)", "jsonnet-binary (>=0.17.0)"] +jsonnet = ["jsonnet (>=0.13.0) ; os_name == \"posix\"", "jsonnet-binary (>=0.17.0) ; os_name != \"posix\""] jsonschema = ["jsonschema (>=3.2.0)"] maintainer = ["bump2version (>=0.5.11)", "twine (>=4.0.2)"] omegaconf = ["omegaconf (>=2.1.1)"] @@ -901,7 +964,7 @@ ruyaml = ["ruyaml (>=0.20.0)"] signatures = ["docstring-parser (>=0.15)", "jsonargparse[typing-extensions]", "typeshed-client (>=2.1.0)"] test = ["attrs (>=22.2.0)", "jsonargparse[test-no-urls]", "pydantic (>=2.3.0)", "responses (>=0.12.0)", "types-PyYAML (>=6.0.11)", "types-requests (>=2.28.9)"] test-no-urls = ["pytest (>=6.2.5)", "pytest-subtests (>=0.8.0)"] -typing-extensions = ["typing-extensions (>=3.10.0.0)"] +typing-extensions = ["typing-extensions (>=3.10.0.0) ; python_version < \"3.10\""] urls = ["requests (>=2.18.4)"] [[package]] @@ -910,6 +973,8 @@ version = "0.20.0" description = "Python bindings for Jsonnet - The data templating language" optional = false python-versions = "*" +groups = ["main"] +markers = "os_name == \"posix\"" files = [ {file = "jsonnet-0.20.0.tar.gz", hash = "sha256:7e770c7bf3a366b97b650a39430450f77612e74406731eb75c5bd59f3f104d4f"}, ] @@ -920,6 +985,8 @@ version = "0.17.0" description = "An UNOFFICIAL Python interface to Jsonnet, available as whl packages for Mac, Linux and Windows." optional = false python-versions = "*" +groups = ["main"] +markers = "os_name != \"posix\"" files = [ {file = "jsonnet-binary-0.17.0.tar.gz", hash = "sha256:fbadf25f28161b0ccf29e0b72ef689790d14a9b23a681ab6846bd7cb12e17f1d"}, {file = "jsonnet_binary-0.17.0-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:5db15ed838b6e4d1373d5d772a8283cf3a62282056cc5a3643c65bf257efeda4"}, @@ -961,6 +1028,7 @@ version = "0.7.2" description = "Python logging made (stupidly) simple" optional = false python-versions = ">=3.5" +groups = ["main"] files = [ {file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"}, {file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"}, @@ -971,7 +1039,7 @@ colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} [package.extras] -dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"] +dev = ["Sphinx (==7.2.5) ; python_version >= \"3.9\"", "colorama (==0.4.5) ; python_version < \"3.8\"", "colorama (==0.4.6) ; python_version >= \"3.8\"", "exceptiongroup (==1.1.3) ; python_version >= \"3.7\" and python_version < \"3.11\"", "freezegun (==1.1.0) ; python_version < \"3.8\"", "freezegun (==1.2.2) ; python_version >= \"3.8\"", "mypy (==v0.910) ; python_version < \"3.6\"", "mypy (==v0.971) ; python_version == \"3.6\"", "mypy (==v1.4.1) ; python_version == \"3.7\"", "mypy (==v1.5.1) ; python_version >= \"3.8\"", "pre-commit (==3.4.0) ; python_version >= \"3.8\"", "pytest (==6.1.2) ; python_version < \"3.8\"", "pytest (==7.4.0) ; python_version >= \"3.8\"", "pytest-cov (==2.12.1) ; python_version < \"3.8\"", "pytest-cov (==4.1.0) ; python_version >= \"3.8\"", "pytest-mypy-plugins (==1.9.3) ; python_version >= \"3.6\" and python_version < \"3.8\"", "pytest-mypy-plugins (==3.0.0) ; python_version >= \"3.8\"", "sphinx-autobuild (==2021.3.14) ; python_version >= \"3.9\"", "sphinx-rtd-theme (==1.3.0) ; python_version >= \"3.9\"", "tox (==3.27.1) ; python_version < \"3.8\"", "tox (==4.11.0) ; python_version >= \"3.8\""] [[package]] name = "markupsafe" @@ -979,6 +1047,7 @@ version = "2.1.5" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, @@ -1048,35 +1117,19 @@ version = "0.7.0" description = "McCabe checker, plugin for flake8" optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] -[[package]] -name = "mkl" -version = "2021.4.0" -description = "Intel® oneAPI Math Kernel Library" -optional = false -python-versions = "*" -files = [ - {file = "mkl-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:67460f5cd7e30e405b54d70d1ed3ca78118370b65f7327d495e9c8847705e2fb"}, - {file = "mkl-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:636d07d90e68ccc9630c654d47ce9fdeb036bb46e2b193b3a9ac8cfea683cce5"}, - {file = "mkl-2021.4.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:398dbf2b0d12acaf54117a5210e8f191827f373d362d796091d161f610c1ebfb"}, - {file = "mkl-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:439c640b269a5668134e3dcbcea4350459c4a8bc46469669b2d67e07e3d330e8"}, - {file = "mkl-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:ceef3cafce4c009dd25f65d7ad0d833a0fbadc3d8903991ec92351fe5de1e718"}, -] - -[package.dependencies] -intel-openmp = "==2021.*" -tbb = "==2021.*" - [[package]] name = "mpmath" version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, @@ -1085,7 +1138,7 @@ files = [ [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4)"] +gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] tests = ["pytest (>=4.6)"] [[package]] @@ -1094,6 +1147,7 @@ version = "6.0.5" description = "multidict implementation" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"}, {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"}, @@ -1193,6 +1247,7 @@ version = "0.70.16" description = "better multiprocessing and multithreading in Python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"}, {file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"}, @@ -1217,6 +1272,7 @@ version = "1.10.0" description = "Optional static typing for Python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "mypy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2"}, {file = "mypy-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99"}, @@ -1264,6 +1320,7 @@ version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false python-versions = ">=3.5" +groups = ["dev"] files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, @@ -1275,6 +1332,7 @@ version = "3.3" description = "Python package for creating and manipulating graphs and networks" optional = false python-versions = ">=3.10" +groups = ["main"] files = [ {file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"}, {file = "networkx-3.3.tar.gz", hash = "sha256:0c127d8b2f4865f59ae9cb8aafcd60b5c70f3241ebd66f7defad7c4ab90126c9"}, @@ -1293,6 +1351,7 @@ version = "1.26.4" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, @@ -1334,56 +1393,149 @@ files = [ [[package]] name = "nvidia-cublas-cu12" -version = "12.1.3.1" +version = "12.6.4.1" +description = "CUBLAS native runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" +files = [ + {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08ed2686e9875d01b58e3cb379c6896df8e76c75e0d4a7f7dace3d7b6d9ef8eb"}, + {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:235f728d6e2a409eddf1df58d5b0921cf80cfa9e72b9f2775ccb7b4a87984668"}, + {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-win_amd64.whl", hash = "sha256:9e4fa264f4d8a4eb0cdbd34beadc029f453b3bafae02401e999cf3d5a5af75f8"}, +] + +[[package]] +name = "nvidia-cublas-cu12" +version = "12.8.4.1" description = "CUBLAS native runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +files = [ + {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0"}, + {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142"}, + {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af"}, +] + +[[package]] +name = "nvidia-cuda-cupti-cu12" +version = "12.6.80" +description = "CUDA profiling tools runtime libs." +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" files = [ - {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"}, - {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:166ee35a3ff1587f2490364f90eeeb8da06cd867bd5b701bf7f9a02b78bc63fc"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.whl", hash = "sha256:358b4a1d35370353d52e12f0a7d1769fc01ff74a191689d3870b2123156184c4"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6768bad6cab4f19e8292125e5f1ac8aa7d1718704012a0e3272a6f61c4bce132"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a3eff6cdfcc6a4c35db968a06fcadb061cbc7d6dde548609a941ff8701b98b73"}, + {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-win_amd64.whl", hash = "sha256:bbe6ae76e83ce5251b56e8c8e61a964f757175682bbad058b170b136266ab00a"}, ] [[package]] name = "nvidia-cuda-cupti-cu12" -version = "12.1.105" +version = "12.8.90" description = "CUDA profiling tools runtime libs." optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +files = [ + {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed"}, + {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182"}, + {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e"}, +] + +[[package]] +name = "nvidia-cuda-nvrtc-cu12" +version = "12.6.77" +description = "NVRTC native runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" files = [ - {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"}, - {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"}, + {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5847f1d6e5b757f1d2b3991a01082a44aad6f10ab3c5c0213fa3e25bddc25a13"}, + {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53"}, + {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:f7007dbd914c56bd80ea31bc43e8e149da38f68158f423ba845fc3292684e45a"}, ] [[package]] name = "nvidia-cuda-nvrtc-cu12" -version = "12.1.105" +version = "12.8.93" description = "NVRTC native runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +files = [ + {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994"}, + {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8"}, + {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909"}, +] + +[[package]] +name = "nvidia-cuda-runtime-cu12" +version = "12.6.77" +description = "CUDA Runtime native Libraries" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" files = [ - {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"}, - {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6116fad3e049e04791c0256a9778c16237837c08b27ed8c8401e2e45de8d60cd"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d461264ecb429c84c8879a7153499ddc7b19b5f8d84c204307491989a365588e"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ba3b56a4f896141e25e19ab287cd71e52a6a0f4b29d0d31609f60e3b4d5219b7"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a84d15d5e1da416dd4774cb42edf5e954a3e60cc945698dc1d5be02321c44dc8"}, + {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:86c58044c824bf3c173c49a2dbc7a6c8b53cb4e4dca50068be0bf64e9dab3f7f"}, ] [[package]] name = "nvidia-cuda-runtime-cu12" -version = "12.1.105" +version = "12.8.90" description = "CUDA Runtime native Libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +files = [ + {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d"}, + {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90"}, + {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8"}, +] + +[[package]] +name = "nvidia-cudnn-cu12" +version = "9.5.1.17" +description = "cuDNN runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" files = [ - {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"}, - {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"}, + {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9fd4584468533c61873e5fda8ca41bac3a38bcb2d12350830c69b0a96a7e4def"}, + {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2"}, + {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-win_amd64.whl", hash = "sha256:d7af0f8a4f3b4b9dbb3122f2ef553b45694ed9c384d5a75bab197b8eefb79ab8"}, ] +[package.dependencies] +nvidia-cublas-cu12 = "*" + [[package]] name = "nvidia-cudnn-cu12" -version = "8.9.2.26" +version = "9.10.2.21" description = "cuDNN runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" files = [ - {file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"}, + {file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8"}, + {file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8"}, + {file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e"}, ] [package.dependencies] @@ -1391,35 +1543,129 @@ nvidia-cublas-cu12 = "*" [[package]] name = "nvidia-cufft-cu12" -version = "11.0.2.54" +version = "11.3.0.4" +description = "CUFFT native runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" +files = [ + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d16079550df460376455cba121db6564089176d9bac9e4f360493ca4741b22a6"}, + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8510990de9f96c803a051822618d42bf6cb8f069ff3f48d93a8486efdacb48fb"}, + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ccba62eb9cef5559abd5e0d54ceed2d9934030f51163df018532142a8ec533e5"}, + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.whl", hash = "sha256:768160ac89f6f7b459bee747e8d175dbf53619cfe74b2a5636264163138013ca"}, + {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-win_amd64.whl", hash = "sha256:6048ebddfb90d09d2707efb1fd78d4e3a77cb3ae4dc60e19aab6be0ece2ae464"}, +] + +[package.dependencies] +nvidia-nvjitlink-cu12 = "*" + +[[package]] +name = "nvidia-cufft-cu12" +version = "11.3.3.83" description = "CUFFT native runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +files = [ + {file = "nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a"}, + {file = "nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74"}, + {file = "nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7"}, +] + +[package.dependencies] +nvidia-nvjitlink-cu12 = "*" + +[[package]] +name = "nvidia-cufile-cu12" +version = "1.11.1.6" +description = "cuFile GPUDirect libraries" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" +files = [ + {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159"}, + {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:8f57a0051dcf2543f6dc2b98a98cb2719c37d3cee1baba8965d57f3bbc90d4db"}, +] + +[[package]] +name = "nvidia-cufile-cu12" +version = "1.13.1.3" +description = "cuFile GPUDirect libraries" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +files = [ + {file = "nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc"}, + {file = "nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a"}, +] + +[[package]] +name = "nvidia-curand-cu12" +version = "10.3.7.77" +description = "CURAND native runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" files = [ - {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"}, - {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:6e82df077060ea28e37f48a3ec442a8f47690c7499bff392a5938614b56c98d8"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a42cd1344297f70b9e39a1e4f467a4e1c10f1da54ff7a85c12197f6c652c8bdf"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:99f1a32f1ac2bd134897fc7a203f779303261268a65762a623bf30cc9fe79117"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:7b2ed8e95595c3591d984ea3603dd66fe6ce6812b886d59049988a712ed06b6e"}, + {file = "nvidia_curand_cu12-10.3.7.77-py3-none-win_amd64.whl", hash = "sha256:6d6d935ffba0f3d439b7cd968192ff068fafd9018dbf1b85b37261b13cfc9905"}, ] [[package]] name = "nvidia-curand-cu12" -version = "10.3.2.106" +version = "10.3.9.90" description = "CURAND native runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +files = [ + {file = "nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd"}, + {file = "nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9"}, + {file = "nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec"}, +] + +[[package]] +name = "nvidia-cusolver-cu12" +version = "11.7.1.2" +description = "CUDA solver native runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" files = [ - {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"}, - {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0ce237ef60acde1efc457335a2ddadfd7610b892d94efee7b776c64bb1cac9e0"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6cf28f17f64107a0c4d7802be5ff5537b2130bfc112f25d5a30df227058ca0e6"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dbbe4fc38ec1289c7e5230e16248365e375c3673c9c8bac5796e2e20db07f56e"}, + {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-win_amd64.whl", hash = "sha256:6813f9d8073f555444a8705f3ab0296d3e1cb37a16d694c5fc8b862a0d8706d7"}, ] +[package.dependencies] +nvidia-cublas-cu12 = "*" +nvidia-cusparse-cu12 = "*" +nvidia-nvjitlink-cu12 = "*" + [[package]] name = "nvidia-cusolver-cu12" -version = "11.4.5.107" +version = "11.7.3.90" description = "CUDA solver native runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" files = [ - {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"}, - {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"}, + {file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0"}, + {file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450"}, + {file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34"}, ] [package.dependencies] @@ -1429,49 +1675,163 @@ nvidia-nvjitlink-cu12 = "*" [[package]] name = "nvidia-cusparse-cu12" -version = "12.1.0.106" +version = "12.5.4.2" +description = "CUSPARSE native runtime libraries" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" +files = [ + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d25b62fb18751758fe3c93a4a08eff08effedfe4edf1c6bb5afd0890fe88f887"}, + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7aa32fa5470cf754f72d1116c7cbc300b4e638d3ae5304cfa4a638a5b87161b1"}, + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7556d9eca156e18184b94947ade0fba5bb47d69cec46bf8660fd2c71a4b48b73"}, + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:23749a6571191a215cb74d1cdbff4a86e7b19f1200c071b3fcf844a5bea23a2f"}, + {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-win_amd64.whl", hash = "sha256:4acb8c08855a26d737398cba8fb6f8f5045d93f82612b4cfd84645a2332ccf20"}, +] + +[package.dependencies] +nvidia-nvjitlink-cu12 = "*" + +[[package]] +name = "nvidia-cusparse-cu12" +version = "12.5.8.93" description = "CUSPARSE native runtime libraries" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" files = [ - {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"}, - {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"}, + {file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc"}, + {file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b"}, + {file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd"}, ] [package.dependencies] nvidia-nvjitlink-cu12 = "*" +[[package]] +name = "nvidia-cusparselt-cu12" +version = "0.6.3" +description = "NVIDIA cuSPARSELt" +optional = false +python-versions = "*" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" +files = [ + {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8371549623ba601a06322af2133c4a44350575f5a3108fb75f3ef20b822ad5f1"}, + {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46"}, + {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-win_amd64.whl", hash = "sha256:3b325bcbd9b754ba43df5a311488fca11a6b5dc3d11df4d190c000cf1a0765c7"}, +] + +[[package]] +name = "nvidia-cusparselt-cu12" +version = "0.7.1" +description = "NVIDIA cuSPARSELt" +optional = false +python-versions = "*" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +files = [ + {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5"}, + {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623"}, + {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075"}, +] + +[[package]] +name = "nvidia-nccl-cu12" +version = "2.26.2" +description = "NVIDIA Collective Communication Library (NCCL) Runtime" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" +files = [ + {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c196e95e832ad30fbbb50381eb3cbd1fadd5675e587a548563993609af19522"}, + {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6"}, +] + [[package]] name = "nvidia-nccl-cu12" -version = "2.20.5" +version = "2.27.5" description = "NVIDIA Collective Communication Library (NCCL) Runtime" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +files = [ + {file = "nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a"}, + {file = "nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457"}, +] + +[[package]] +name = "nvidia-nvjitlink-cu12" +version = "12.6.85" +description = "Nvidia JIT LTO Library" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" files = [ - {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"}, - {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"}, + {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a"}, + {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41"}, + {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-win_amd64.whl", hash = "sha256:e61120e52ed675747825cdd16febc6a0730537451d867ee58bee3853b1b13d1c"}, ] [[package]] name = "nvidia-nvjitlink-cu12" -version = "12.5.82" +version = "12.8.93" description = "Nvidia JIT LTO Library" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +files = [ + {file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88"}, + {file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7"}, + {file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f"}, +] + +[[package]] +name = "nvidia-nvshmem-cu12" +version = "3.3.20" +description = "NVSHMEM creates a global address space that provides efficient and scalable communication for NVIDIA GPU clusters." +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +files = [ + {file = "nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b0b960da3842212758e4fa4696b94f129090b30e5122fea3c5345916545cff0"}, + {file = "nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5"}, +] + +[[package]] +name = "nvidia-nvtx-cu12" +version = "12.6.77" +description = "NVIDIA Tools Extension" +optional = false +python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" files = [ - {file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f9b37bc5c8cf7509665cb6ada5aaa0ce65618f2332b7d3e78e9790511f111212"}, - {file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-win_amd64.whl", hash = "sha256:e782564d705ff0bf61ac3e1bf730166da66dd2fe9012f111ede5fc49b64ae697"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f44f8d86bb7d5629988d61c8d3ae61dddb2015dee142740536bc7481b022fe4b"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:adcaabb9d436c9761fca2b13959a2d237c5f9fd406c8e4b723c695409ff88059"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b90bed3df379fa79afbd21be8e04a0314336b8ae16768b58f2d34cb1d04cd7d2"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1"}, + {file = "nvidia_nvtx_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:2fb11a4af04a5e6c84073e6404d26588a34afd35379f0855a99797897efa75c0"}, ] [[package]] name = "nvidia-nvtx-cu12" -version = "12.1.105" +version = "12.8.90" description = "NVIDIA Tools Extension" optional = false python-versions = ">=3" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" files = [ - {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"}, - {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"}, + {file = "nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615"}, + {file = "nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f"}, + {file = "nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e"}, ] [[package]] @@ -1480,6 +1840,7 @@ version = "1.32.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" +groups = ["main"] files = [ {file = "openai-1.32.0-py3-none-any.whl", hash = "sha256:953d57669f309002044fd2f678aba9f07a43256d74b3b00cd04afb5b185568ea"}, {file = "openai-1.32.0.tar.gz", hash = "sha256:a6df15a7ab9344b1bc2bc8d83639f68b7a7e2453c0f5e50c1666547eee86f0bd"}, @@ -1503,6 +1864,7 @@ version = "24.0" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" +groups = ["main", "dev"] files = [ {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, @@ -1514,6 +1876,7 @@ version = "2.2.2" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, @@ -1587,6 +1950,7 @@ version = "0.12.1" description = "Utility library for gitignore style pattern matching of file paths." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, @@ -1598,6 +1962,7 @@ version = "10.3.0" description = "Python Imaging Library (Fork)" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pillow-10.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45"}, {file = "pillow-10.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c"}, @@ -1675,18 +2040,19 @@ docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline fpx = ["olefile"] mic = ["olefile"] tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] -typing = ["typing-extensions"] +typing = ["typing-extensions ; python_version < \"3.10\""] xmp = ["defusedxml"] [[package]] name = "plac" -version = "1.4.3" +version = "1.4.5" description = "The smartest command line arguments parser in the world" optional = false python-versions = "*" +groups = ["main"] files = [ - {file = "plac-1.4.3-py2.py3-none-any.whl", hash = "sha256:8a84fde8f950c9de6588a2d53c9deeac3ba1ddb456d887a33228460cf6549750"}, - {file = "plac-1.4.3.tar.gz", hash = "sha256:d4cb3387b2113a28aebd509433d0264a4e5d9bb7c1a86db4fbd0a8f11af74eb3"}, + {file = "plac-1.4.5-py2.py3-none-any.whl", hash = "sha256:87187786b4e446688b1cf5112e18fed8a23ab3b316c25fe91266a10bd1736b16"}, + {file = "plac-1.4.5.tar.gz", hash = "sha256:5f05bf85235c017fcd76c73c8101d4ff8e96beb3dc58b9a37de49cac7de82d14"}, ] [[package]] @@ -1695,6 +2061,7 @@ version = "4.2.2" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, @@ -1711,6 +2078,7 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -1722,22 +2090,22 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "protobuf" -version = "5.27.1" +version = "6.33.0" description = "" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" +groups = ["main"] files = [ - {file = "protobuf-5.27.1-cp310-abi3-win32.whl", hash = "sha256:3adc15ec0ff35c5b2d0992f9345b04a540c1e73bfee3ff1643db43cc1d734333"}, - {file = "protobuf-5.27.1-cp310-abi3-win_amd64.whl", hash = "sha256:25236b69ab4ce1bec413fd4b68a15ef8141794427e0b4dc173e9d5d9dffc3bcd"}, - {file = "protobuf-5.27.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4e38fc29d7df32e01a41cf118b5a968b1efd46b9c41ff515234e794011c78b17"}, - {file = "protobuf-5.27.1-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:917ed03c3eb8a2d51c3496359f5b53b4e4b7e40edfbdd3d3f34336e0eef6825a"}, - {file = "protobuf-5.27.1-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:ee52874a9e69a30271649be88ecbe69d374232e8fd0b4e4b0aaaa87f429f1631"}, - {file = "protobuf-5.27.1-cp38-cp38-win32.whl", hash = "sha256:7a97b9c5aed86b9ca289eb5148df6c208ab5bb6906930590961e08f097258107"}, - {file = "protobuf-5.27.1-cp38-cp38-win_amd64.whl", hash = "sha256:f6abd0f69968792da7460d3c2cfa7d94fd74e1c21df321eb6345b963f9ec3d8d"}, - {file = "protobuf-5.27.1-cp39-cp39-win32.whl", hash = "sha256:dfddb7537f789002cc4eb00752c92e67885badcc7005566f2c5de9d969d3282d"}, - {file = "protobuf-5.27.1-cp39-cp39-win_amd64.whl", hash = "sha256:39309898b912ca6febb0084ea912e976482834f401be35840a008da12d189340"}, - {file = "protobuf-5.27.1-py3-none-any.whl", hash = "sha256:4ac7249a1530a2ed50e24201d6630125ced04b30619262f06224616e0030b6cf"}, - {file = "protobuf-5.27.1.tar.gz", hash = "sha256:df5e5b8e39b7d1c25b186ffdf9f44f40f810bbcc9d2b71d9d3156fee5a9adf15"}, + {file = "protobuf-6.33.0-cp310-abi3-win32.whl", hash = "sha256:d6101ded078042a8f17959eccd9236fb7a9ca20d3b0098bbcb91533a5680d035"}, + {file = "protobuf-6.33.0-cp310-abi3-win_amd64.whl", hash = "sha256:9a031d10f703f03768f2743a1c403af050b6ae1f3480e9c140f39c45f81b13ee"}, + {file = "protobuf-6.33.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:905b07a65f1a4b72412314082c7dbfae91a9e8b68a0cc1577515f8df58ecf455"}, + {file = "protobuf-6.33.0-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e0697ece353e6239b90ee43a9231318302ad8353c70e6e45499fa52396debf90"}, + {file = "protobuf-6.33.0-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:e0a1715e4f27355afd9570f3ea369735afc853a6c3951a6afe1f80d8569ad298"}, + {file = "protobuf-6.33.0-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:35be49fd3f4fefa4e6e2aacc35e8b837d6703c37a2168a55ac21e9b1bc7559ef"}, + {file = "protobuf-6.33.0-cp39-cp39-win32.whl", hash = "sha256:cd33a8e38ea3e39df66e1bbc462b076d6e5ba3a4ebbde58219d777223a7873d3"}, + {file = "protobuf-6.33.0-cp39-cp39-win_amd64.whl", hash = "sha256:c963e86c3655af3a917962c9619e1a6b9670540351d7af9439d06064e3317cc9"}, + {file = "protobuf-6.33.0-py3-none-any.whl", hash = "sha256:25c9e1963c6734448ea2d308cfa610e692b801304ba0908d7bfa564ac5132995"}, + {file = "protobuf-6.33.0.tar.gz", hash = "sha256:140303d5c8d2037730c548f8c7b93b20bb1dc301be280c378b82b8894589c954"}, ] [[package]] @@ -1746,6 +2114,7 @@ version = "6.0.0" description = "Cross-platform lib for process and system monitoring in Python." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +groups = ["main"] files = [ {file = "psutil-6.0.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a021da3e881cd935e64a3d0a20983bda0bb4cf80e4f74fa9bfcb1bc5785360c6"}, {file = "psutil-6.0.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:1287c2b95f1c0a364d23bc6f2ea2365a8d4d9b726a3be7294296ff7ba97c17f0"}, @@ -1767,7 +2136,7 @@ files = [ ] [package.extras] -test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] +test = ["enum34 ; python_version <= \"3.4\"", "ipaddress ; python_version < \"3.0\"", "mock ; python_version < \"3.0\"", "pywin32 ; sys_platform == \"win32\"", "wmi ; sys_platform == \"win32\""] [[package]] name = "py" @@ -1775,6 +2144,7 @@ version = "1.11.0" description = "library with cross-python path, ini-parsing, io, code, log facilities" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["main"] files = [ {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, @@ -1786,6 +2156,7 @@ version = "16.1.0" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pyarrow-16.1.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9"}, {file = "pyarrow-16.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a"}, @@ -1834,6 +2205,7 @@ version = "0.6" description = "" optional = false python-versions = ">=3.5" +groups = ["main"] files = [ {file = "pyarrow_hotfix-0.6-py3-none-any.whl", hash = "sha256:dcc9ae2d220dff0083be6a9aa8e0cdee5182ad358d4931fce825c545e5c89178"}, {file = "pyarrow_hotfix-0.6.tar.gz", hash = "sha256:79d3e030f7ff890d408a100ac16d6f00b14d44a502d7897cd9fc3e3a534e9945"}, @@ -1845,6 +2217,7 @@ version = "2.11.1" description = "Python style guide checker" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, @@ -1856,6 +2229,7 @@ version = "2.7.3" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pydantic-2.7.3-py3-none-any.whl", hash = "sha256:ea91b002777bf643bb20dd717c028ec43216b24a6001a280f83877fd2655d0b4"}, {file = "pydantic-2.7.3.tar.gz", hash = "sha256:c46c76a40bb1296728d7a8b99aa73dd70a48c3510111ff290034f860c99c419e"}, @@ -1875,6 +2249,7 @@ version = "2.18.4" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pydantic_core-2.18.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f76d0ad001edd426b92233d45c746fd08f467d56100fd8f30e9ace4b005266e4"}, {file = "pydantic_core-2.18.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:59ff3e89f4eaf14050c8022011862df275b552caef8082e37b542b066ce1ff26"}, @@ -1966,6 +2341,7 @@ version = "3.2.0" description = "passive checker of Python programs" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"}, {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"}, @@ -1977,6 +2353,7 @@ version = "7.1.3" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "pytest-7.1.3-py3-none-any.whl", hash = "sha256:1377bda3466d70b55e3f5cecfa55bb7cfcf219c7964629b967c37cf0bda818b7"}, {file = "pytest-7.1.3.tar.gz", hash = "sha256:4f365fec2dff9c1162f834d9f18af1ba13062db0c708bf7b946f8a5c76180c39"}, @@ -2000,6 +2377,7 @@ version = "3.14.0" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, @@ -2017,6 +2395,7 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -2031,6 +2410,7 @@ version = "2024.1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, @@ -2042,6 +2422,7 @@ version = "6.0.1" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, @@ -2101,6 +2482,7 @@ version = "2024.5.15" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a81e3cfbae20378d75185171587cbf756015ccb14840702944f014e0d93ea09f"}, {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b59138b219ffa8979013be7bc85bb60c6f7b7575df3d56dc1e403a438c7a3f6"}, @@ -2189,6 +2571,7 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, @@ -2210,6 +2593,7 @@ version = "1.3.0" description = "Yet another Python binding for Juman++/KNP/KWJA" optional = false python-versions = ">=3.7,<4.0" +groups = ["main"] files = [ {file = "rhoknp-1.3.0-py3-none-any.whl", hash = "sha256:41ee79bbd25e8e1142d555a2e714356fd810b9bf9bb610c75b3bcb704c37ac00"}, {file = "rhoknp-1.3.0.tar.gz", hash = "sha256:ccbac0bba6662b00a573f2d0361e64978901202c44c56b50b3ce2afa5dbb23b6"}, @@ -2224,6 +2608,7 @@ version = "0.4.3" description = "" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "safetensors-0.4.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:dcf5705cab159ce0130cd56057f5f3425023c407e170bca60b4868048bae64fd"}, {file = "safetensors-0.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bb4f8c5d0358a31e9a08daeebb68f5e161cdd4018855426d3f0c23bb51087055"}, @@ -2346,6 +2731,7 @@ version = "1.5.0" description = "A set of python modules for machine learning and data mining" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "scikit_learn-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:12e40ac48555e6b551f0a0a5743cc94cc5a765c9513fe708e01f0aa001da2801"}, {file = "scikit_learn-1.5.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f405c4dae288f5f6553b10c4ac9ea7754d5180ec11e296464adb5d6ac68b6ef5"}, @@ -2391,6 +2777,7 @@ version = "1.13.1" description = "Fundamental algorithms for scientific computing in Python" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca"}, {file = "scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f"}, @@ -2429,97 +2816,141 @@ test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "po [[package]] name = "sentence-transformers" -version = "3.0.0" -description = "Multilingual text embeddings" +version = "5.1.1" +description = "Embeddings, Retrieval, and Reranking" optional = false -python-versions = ">=3.8.0" +python-versions = ">=3.9" +groups = ["main"] files = [ - {file = "sentence_transformers-3.0.0-py3-none-any.whl", hash = "sha256:9bf851b688b796e5fb06c920921efd5e5e05ee616e85cb3026fbdfe4dcf15bf3"}, - {file = "sentence_transformers-3.0.0.tar.gz", hash = "sha256:52d4101654ed107a28e9fa5110fce399084b55e7838fd8256471353ddc299033"}, + {file = "sentence_transformers-5.1.1-py3-none-any.whl", hash = "sha256:5ed544629eafe89ca668a8910ebff96cf0a9c5254ec14b05c66c086226c892fd"}, + {file = "sentence_transformers-5.1.1.tar.gz", hash = "sha256:8af3f844b2ecf9a6c2dfeafc2c02938a87f61202b54329d70dfd7dfd7d17a84e"}, ] [package.dependencies] -huggingface-hub = ">=0.15.1" -numpy = "*" +huggingface-hub = ">=0.20.0" Pillow = "*" scikit-learn = "*" scipy = "*" torch = ">=1.11.0" tqdm = "*" -transformers = ">=4.34.0,<5.0.0" +transformers = ">=4.41.0,<5.0.0" +typing_extensions = ">=4.5.0" [package.extras] -dev = ["accelerate (>=0.20.3)", "datasets", "pre-commit", "pytest", "ruff (>=0.3.0)"] +dev = ["accelerate (>=0.20.3)", "datasets", "peft", "pre-commit", "pytest", "pytest-cov"] +onnx = ["optimum[onnxruntime] (>=1.23.1)"] +onnx-gpu = ["optimum[onnxruntime-gpu] (>=1.23.1)"] +openvino = ["optimum-intel[openvino] (>=1.20.0)"] train = ["accelerate (>=0.20.3)", "datasets"] [[package]] name = "sentencepiece" -version = "0.2.0" -description = "SentencePiece python wrapper" +version = "0.2.1" +description = "Unsupervised text tokenizer and detokenizer." optional = false -python-versions = "*" +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "sentencepiece-0.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e10fa50bdbaa5e2445dbd387979980d391760faf0ec99a09bd7780ff37eaec44"}, + {file = "sentencepiece-0.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f27ae6deea72efdb6f361750c92f6c21fd0ad087445082770cc34015213c526"}, + {file = "sentencepiece-0.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60937c959e6f44159fdd9f56fbdd302501f96114a5ba436829496d5f32d8de3f"}, + {file = "sentencepiece-0.2.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8b1d91545578852f128650b8cce4ec20f93d39b378ff554ebe66290f2dabb92"}, + {file = "sentencepiece-0.2.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27e38eee653abc3d387862e67bc5c8b6f428cd604e688b85d29170b7e725c26c"}, + {file = "sentencepiece-0.2.1-cp310-cp310-win32.whl", hash = "sha256:251874d720ac7f28024a168501f3c7bb15d1802245f6e66de565f18bbb9b5eaa"}, + {file = "sentencepiece-0.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:e52144670738b4b477fade6c2a9b6af71a8d0094514c9853ac9f6fc1fcfabae7"}, + {file = "sentencepiece-0.2.1-cp310-cp310-win_arm64.whl", hash = "sha256:9076430ac25dfa7147d9d05751dbc66a04bc1aaac371c07f84952979ea59f0d0"}, + {file = "sentencepiece-0.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6356d0986b8b8dc351b943150fcd81a1c6e6e4d439772e8584c64230e58ca987"}, + {file = "sentencepiece-0.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8f8ba89a3acb3dc1ae90f65ec1894b0b9596fdb98ab003ff38e058f898b39bc7"}, + {file = "sentencepiece-0.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:02593eca45440ef39247cee8c47322a34bdcc1d8ae83ad28ba5a899a2cf8d79a"}, + {file = "sentencepiece-0.2.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a0d15781a171d188b661ae4bde1d998c303f6bd8621498c50c671bd45a4798e"}, + {file = "sentencepiece-0.2.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f5a3e0d9f445ed9d66c0fec47d4b23d12cfc858b407a03c194c1b26c2ac2a63"}, + {file = "sentencepiece-0.2.1-cp311-cp311-win32.whl", hash = "sha256:6d297a1748d429ba8534eebe5535448d78b8acc32d00a29b49acf28102eeb094"}, + {file = "sentencepiece-0.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:82d9ead6591015f009cb1be1cb1c015d5e6f04046dbb8c9588b931e869a29728"}, + {file = "sentencepiece-0.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:39f8651bd10974eafb9834ce30d9bcf5b73e1fc798a7f7d2528f9820ca86e119"}, + {file = "sentencepiece-0.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:57cae326c8727de58c85977b175af132a7138d84c764635d7e71bbee7e774133"}, + {file = "sentencepiece-0.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:56dd39a3c4d6493db3cdca7e8cc68c6b633f0d4195495cbadfcf5af8a22d05a6"}, + {file = "sentencepiece-0.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d9381351182ff9888cc80e41c632e7e274b106f450de33d67a9e8f6043da6f76"}, + {file = "sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99f955df238021bf11f0fc37cdb54fd5e5b5f7fd30ecc3d93fb48b6815437167"}, + {file = "sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0cdfecef430d985f1c2bcbfff3defd1d95dae876fbd0173376012d2d7d24044b"}, + {file = "sentencepiece-0.2.1-cp312-cp312-win32.whl", hash = "sha256:a483fd29a34c3e34c39ac5556b0a90942bec253d260235729e50976f5dba1068"}, + {file = "sentencepiece-0.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:4cdc7c36234fda305e85c32949c5211faaf8dd886096c7cea289ddc12a2d02de"}, + {file = "sentencepiece-0.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:daeb5e9e9fcad012324807856113708614d534f596d5008638eb9b40112cd9e4"}, + {file = "sentencepiece-0.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dcd8161eee7b41aae57ded06272905dbd680a0a04b91edd0f64790c796b2f706"}, + {file = "sentencepiece-0.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c6c8f42949f419ff8c7e9960dbadcfbc982d7b5efc2f6748210d3dd53a7de062"}, + {file = "sentencepiece-0.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:097f3394e99456e9e4efba1737c3749d7e23563dd1588ce71a3d007f25475fff"}, + {file = "sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7b670879c370d350557edabadbad1f6561a9e6968126e6debca4029e5547820"}, + {file = "sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7f0fd2f2693309e6628aeeb2e2faf6edd221134dfccac3308ca0de01f8dab47"}, + {file = "sentencepiece-0.2.1-cp313-cp313-win32.whl", hash = "sha256:92b3816aa2339355fda2c8c4e021a5de92180b00aaccaf5e2808972e77a4b22f"}, + {file = "sentencepiece-0.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:10ed3dab2044c47f7a2e7b4969b0c430420cdd45735d78c8f853191fa0e3148b"}, + {file = "sentencepiece-0.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac650534e2251083c5f75dde4ff28896ce7c8904133dc8fef42780f4d5588fcd"}, + {file = "sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:8dd4b477a7b069648d19363aad0cab9bad2f4e83b2d179be668efa672500dc94"}, + {file = "sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0c0f672da370cc490e4c59d89e12289778310a0e71d176c541e4834759e1ae07"}, + {file = "sentencepiece-0.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ad8493bea8432dae8d6830365352350f3b4144415a1d09c4c8cb8d30cf3b6c3c"}, + {file = "sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b81a24733726e3678d2db63619acc5a8dccd074f7aa7a54ecd5ca33ca6d2d596"}, + {file = "sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0a81799d0a68d618e89063fb423c3001a034c893069135ffe51fee439ae474d6"}, + {file = "sentencepiece-0.2.1-cp313-cp313t-win32.whl", hash = "sha256:89a3ea015517c42c0341d0d962f3e6aaf2cf10d71b1932d475c44ba48d00aa2b"}, + {file = "sentencepiece-0.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:33f068c9382dc2e7c228eedfd8163b52baa86bb92f50d0488bf2b7da7032e484"}, + {file = "sentencepiece-0.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:b3616ad246f360e52c85781e47682d31abfb6554c779e42b65333d4b5f44ecc0"}, + {file = "sentencepiece-0.2.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:5d0350b686c320068702116276cfb26c066dc7e65cfef173980b11bb4d606719"}, + {file = "sentencepiece-0.2.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c7f54a31cde6fa5cb030370566f68152a742f433f8d2be458463d06c208aef33"}, + {file = "sentencepiece-0.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c83b85ab2d6576607f31df77ff86f28182be4a8de6d175d2c33ca609925f5da1"}, + {file = "sentencepiece-0.2.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1855f57db07b51fb51ed6c9c452f570624d2b169b36f0f79ef71a6e6c618cd8b"}, + {file = "sentencepiece-0.2.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01e6912125cb45d3792f530a4d38f8e21bf884d6b4d4ade1b2de5cf7a8d2a52b"}, + {file = "sentencepiece-0.2.1-cp314-cp314-win32.whl", hash = "sha256:c415c9de1447e0a74ae3fdb2e52f967cb544113a3a5ce3a194df185cbc1f962f"}, + {file = "sentencepiece-0.2.1-cp314-cp314-win_amd64.whl", hash = "sha256:881b2e44b14fc19feade3cbed314be37de639fc415375cefaa5bc81a4be137fd"}, + {file = "sentencepiece-0.2.1-cp314-cp314-win_arm64.whl", hash = "sha256:2005242a16d2dc3ac5fe18aa7667549134d37854823df4c4db244752453b78a8"}, + {file = "sentencepiece-0.2.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:a19adcec27c524cb7069a1c741060add95f942d1cbf7ad0d104dffa0a7d28a2b"}, + {file = "sentencepiece-0.2.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:e37e4b4c4a11662b5db521def4e44d4d30ae69a1743241412a93ae40fdcab4bb"}, + {file = "sentencepiece-0.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:477c81505db072b3ab627e7eab972ea1025331bd3a92bacbf798df2b75ea86ec"}, + {file = "sentencepiece-0.2.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:010f025a544ef770bb395091d57cb94deb9652d8972e0d09f71d85d5a0816c8c"}, + {file = "sentencepiece-0.2.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:733e59ff1794d26db706cd41fc2d7ca5f6c64a820709cb801dc0ea31780d64ab"}, + {file = "sentencepiece-0.2.1-cp314-cp314t-win32.whl", hash = "sha256:d3233770f78e637dc8b1fda2cd7c3b99ec77e7505041934188a4e7fe751de3b0"}, + {file = "sentencepiece-0.2.1-cp314-cp314t-win_amd64.whl", hash = "sha256:5e4366c97b68218fd30ea72d70c525e6e78a6c0a88650f57ac4c43c63b234a9d"}, + {file = "sentencepiece-0.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:105e36e75cbac1292642045458e8da677b2342dcd33df503e640f0b457cb6751"}, + {file = "sentencepiece-0.2.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:afefe50a0cdcb4f2fd9733cb52001a2c164181ee2d82c32d38f5b1b326a8528c"}, + {file = "sentencepiece-0.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:891ade6503dd93d418c03993f7d6a8aa20260c422cefff5096b9068185e67642"}, + {file = "sentencepiece-0.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:814978ac05130dd5812b4b03215c766bc6abaef13e7bd72bc534e4d1e12e9a4c"}, + {file = "sentencepiece-0.2.1-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:017f97b274d4b0baa84b2dc743bf4517be81156f413bb24f12aacacde378e5ab"}, + {file = "sentencepiece-0.2.1-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22c4ebcb3c6ab1496ab1c37c79ef7bb563b8726f29548c30773b7a4cb152df1a"}, + {file = "sentencepiece-0.2.1-cp39-cp39-win32.whl", hash = "sha256:caa4e560c72c151da80036aecc2159e51a7fd8ae9efebefd96860460ce6bd025"}, + {file = "sentencepiece-0.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:2af5a1fb05013332ad94343b8b5f3973e006a2dde2dfba55a819549e054e2f0f"}, + {file = "sentencepiece-0.2.1-cp39-cp39-win_arm64.whl", hash = "sha256:3d165fbb9bf8fba35f1946ba2617c3f9995679f07438325f07c026d53f33e746"}, + {file = "sentencepiece-0.2.1.tar.gz", hash = "sha256:8138cec27c2f2282f4a34d9a016e3374cd40e5c6e9cb335063db66a0a3b71fad"}, +] + +[package.extras] +test = ["pytest"] +testpaths = ["test"] + +[[package]] +name = "setuptools" +version = "80.9.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.9" +groups = ["main"] +markers = "python_version >= \"3.12\"" files = [ - {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:188779e1298a1c8b8253c7d3ad729cb0a9891e5cef5e5d07ce4592c54869e227"}, - {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bed9cf85b296fa2b76fc2547b9cbb691a523864cebaee86304c43a7b4cb1b452"}, - {file = "sentencepiece-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d7b67e724bead13f18db6e1d10b6bbdc454af574d70efbb36f27d90387be1ca3"}, - {file = "sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fde4b08cfe237be4484c6c7c2e2c75fb862cfeab6bd5449ce4caeafd97b767a"}, - {file = "sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c378492056202d1c48a4979650981635fd97875a00eabb1f00c6a236b013b5e"}, - {file = "sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1380ce6540a368de2ef6d7e6ba14ba8f3258df650d39ba7d833b79ee68a52040"}, - {file = "sentencepiece-0.2.0-cp310-cp310-win32.whl", hash = "sha256:a1151d6a6dd4b43e552394aed0edfe9292820272f0194bd56c7c1660a0c06c3d"}, - {file = "sentencepiece-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:d490142b0521ef22bc1085f061d922a2a6666175bb6b42e588ff95c0db6819b2"}, - {file = "sentencepiece-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:17982700c4f6dbb55fa3594f3d7e5dd1c8659a274af3738e33c987d2a27c9d5c"}, - {file = "sentencepiece-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7c867012c0e8bcd5bdad0f791609101cb5c66acb303ab3270218d6debc68a65e"}, - {file = "sentencepiece-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fd6071249c74f779c5b27183295b9202f8dedb68034e716784364443879eaa6"}, - {file = "sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f90c55a65013cbb8f4d7aab0599bf925cde4adc67ae43a0d323677b5a1c6cb"}, - {file = "sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b293734059ef656dcd65be62ff771507bea8fed0a711b6733976e1ed3add4553"}, - {file = "sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e58b47f933aca74c6a60a79dcb21d5b9e47416256c795c2d58d55cec27f9551d"}, - {file = "sentencepiece-0.2.0-cp311-cp311-win32.whl", hash = "sha256:c581258cf346b327c62c4f1cebd32691826306f6a41d8c4bec43b010dee08e75"}, - {file = "sentencepiece-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:0993dbc665f4113017892f1b87c3904a44d0640eda510abcacdfb07f74286d36"}, - {file = "sentencepiece-0.2.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ea5f536e32ea8ec96086ee00d7a4a131ce583a1b18d130711707c10e69601cb2"}, - {file = "sentencepiece-0.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0cb51f53b6aae3c36bafe41e86167c71af8370a039f542c43b0cce5ef24a68c"}, - {file = "sentencepiece-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3212121805afc58d8b00ab4e7dd1f8f76c203ddb9dc94aa4079618a31cf5da0f"}, - {file = "sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a3149e3066c2a75e0d68a43eb632d7ae728c7925b517f4c05c40f6f7280ce08"}, - {file = "sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:632f3594d3e7ac8b367bca204cb3fd05a01d5b21455acd097ea4c0e30e2f63d7"}, - {file = "sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f295105c6bdbb05bd5e1b0cafbd78ff95036f5d3641e7949455a3f4e5e7c3109"}, - {file = "sentencepiece-0.2.0-cp312-cp312-win32.whl", hash = "sha256:fb89f811e5efd18bab141afc3fea3de141c3f69f3fe9e898f710ae7fe3aab251"}, - {file = "sentencepiece-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a673a72aab81fef5ebe755c6e0cc60087d1f3a4700835d40537183c1703a45f"}, - {file = "sentencepiece-0.2.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:4547683f330289ec4f093027bfeb87f9ef023b2eb6f879fdc4a8187c7e0ffb90"}, - {file = "sentencepiece-0.2.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7cd6175f7eaec7142d2bf6f6597ce7db4c9ac89acf93fcdb17410c3a8b781eeb"}, - {file = "sentencepiece-0.2.0-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:859ba1acde782609a0910a26a60e16c191a82bf39b5621107552c0cd79fad00f"}, - {file = "sentencepiece-0.2.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcbbef6cc277f8f18f36959e305f10b1c620442d75addc79c21d7073ae581b50"}, - {file = "sentencepiece-0.2.0-cp36-cp36m-win32.whl", hash = "sha256:536b934e244829e3fe6c4f198652cd82da48adb9aa145c9f00889542726dee3d"}, - {file = "sentencepiece-0.2.0-cp36-cp36m-win_amd64.whl", hash = "sha256:0a91aaa3c769b52440df56fafda683b3aa48e3f2169cf7ee5b8c8454a7f3ae9b"}, - {file = "sentencepiece-0.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:787e480ca4c1d08c9985a7eb1eae4345c107729c99e9b5a9a00f2575fc7d4b4b"}, - {file = "sentencepiece-0.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4d158189eb2ecffea3a51edf6d25e110b3678ec47f1a40f2d541eafbd8f6250"}, - {file = "sentencepiece-0.2.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1e5ca43013e8935f25457a4fca47e315780172c3e821b4b13a890668911c792"}, - {file = "sentencepiece-0.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7140d9e5a74a0908493bb4a13f1f16a401297bd755ada4c707e842fbf6f0f5bf"}, - {file = "sentencepiece-0.2.0-cp37-cp37m-win32.whl", hash = "sha256:6cf333625234f247ab357b0bd9836638405ea9082e1543d5b8408f014979dcbf"}, - {file = "sentencepiece-0.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:ff88712338b01031910e8e61e7239aff3ce8869ee31a47df63cb38aadd591bea"}, - {file = "sentencepiece-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20813a68d4c221b1849c62c30e1281ea81687894d894b8d4a0f4677d9311e0f5"}, - {file = "sentencepiece-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:926ef920ae2e8182db31d3f5d081ada57804e3e1d3a8c4ef8b117f9d9fb5a945"}, - {file = "sentencepiece-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:89f65f69636b7e9c015b79dff9c9985a9bc7d19ded6f79ef9f1ec920fdd73ecf"}, - {file = "sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f67eae0dbe6f2d7d6ba50a354623d787c99965f068b81e145d53240198021b0"}, - {file = "sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:98501e075f35dd1a1d5a20f65be26839fcb1938752ec61539af008a5aa6f510b"}, - {file = "sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3d1d2cc4882e8d6a1adf9d5927d7716f80617fc693385661caff21888972269"}, - {file = "sentencepiece-0.2.0-cp38-cp38-win32.whl", hash = "sha256:b99a308a2e5e569031ab164b74e6fab0b6f37dfb493c32f7816225f4d411a6dd"}, - {file = "sentencepiece-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:cdb701eec783d3ec86b7cd4c763adad8eaf6b46db37ee1c36e5e6c44b3fe1b5f"}, - {file = "sentencepiece-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1e0f9c4d0a6b0af59b613175f019916e28ade076e21242fd5be24340d8a2f64a"}, - {file = "sentencepiece-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:298f21cc1366eb60311aedba3169d30f885c363ddbf44214b0a587d2908141ad"}, - {file = "sentencepiece-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3f1ec95aa1e5dab11f37ac7eff190493fd87770f7a8b81ebc9dd768d1a3c8704"}, - {file = "sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b06b70af54daa4b4904cbb90b4eb6d35c9f3252fdc86c9c32d5afd4d30118d8"}, - {file = "sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22e37bac44dd6603388cb598c64ff7a76e41ca774646f21c23aadfbf5a2228ab"}, - {file = "sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0461324897735512a32d222e3d886e24ad6a499761952b6bda2a9ee6e4313ea5"}, - {file = "sentencepiece-0.2.0-cp39-cp39-win32.whl", hash = "sha256:38aed822fb76435fa1f12185f10465a94ab9e51d5e8a9159e9a540ce926f0ffd"}, - {file = "sentencepiece-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:d8cf876516548b5a1d6ac4745d8b554f5c07891d55da557925e5c13ff0b4e6ad"}, - {file = "sentencepiece-0.2.0.tar.gz", hash = "sha256:a52c19171daaf2e697dc6cbe67684e0fa341b1248966f6aebb541de654d15843"}, + {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"}, + {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"}, ] +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""] +core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"] + [[package]] name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +groups = ["main"] files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, @@ -2531,6 +2962,7 @@ version = "7.0.4" description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" optional = false python-versions = "<4.0,>=3.7" +groups = ["main"] files = [ {file = "smart_open-7.0.4-py3-none-any.whl", hash = "sha256:4e98489932b3372595cddc075e6033194775165702887216b65eba760dfd8d47"}, {file = "smart_open-7.0.4.tar.gz", hash = "sha256:62b65852bdd1d1d516839fcb1f6bc50cd0f16e05b4ec44b52f43d38bcb838524"}, @@ -2556,6 +2988,7 @@ version = "1.3.1" description = "Sniff out which async library your code is running under" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, @@ -2563,13 +2996,14 @@ files = [ [[package]] name = "sudachidict-core" -version = "20240409" +version = "20251022" description = "Sudachi Dictionary for SudachiPy - Core Edition" optional = false python-versions = "*" +groups = ["main"] files = [ - {file = "SudachiDict-core-20240409.tar.gz", hash = "sha256:341eb2fdf1ce3a0db329213b01e0dea2f0e3db26ea1f5244c43c4a1cd739d41e"}, - {file = "SudachiDict_core-20240409-py3-none-any.whl", hash = "sha256:99b165574f9fe7a42c9caee2f4f274d22f8c99602eaba2863575bbc09020a2fb"}, + {file = "sudachidict_core-20251022-py3-none-any.whl", hash = "sha256:ca67fe366c4cf3a35f4feef019fdb98a0c17129c66e5c0ececeae30dc318c016"}, + {file = "sudachidict_core-20251022.tar.gz", hash = "sha256:2b25ffb00c7018c9d4af312f11d833fb710b680c4bfe8e3545da1e6fc18713fb"}, ] [package.dependencies] @@ -2577,48 +3011,52 @@ SudachiPy = ">=0.5,<0.7" [[package]] name = "sudachipy" -version = "0.6.8" +version = "0.6.10" description = "Python version of Sudachi, the Japanese Morphological Analyzer" optional = false python-versions = "*" -files = [ - {file = "SudachiPy-0.6.8-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:85f91a6ac347d2fbf478ae96e0e08efe7b8e47fb7cdfb770e90611be5669cabb"}, - {file = "SudachiPy-0.6.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:361ef3e3333ef4165b517668878dd80fbed6d3c443659b9dc3236132ea8f7fbb"}, - {file = "SudachiPy-0.6.8-cp310-cp310-win_amd64.whl", hash = "sha256:081c52918bdae35f564637db146389f0a48b3b5263f215859b4d1ae311a7a474"}, - {file = "SudachiPy-0.6.8-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:0a6cb506e402933023ea07035fc3e81d65880392afcdb2f09676027882b09e73"}, - {file = "SudachiPy-0.6.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d19db58be100b05362d00d0ad5cd29aff6da31807967b302f35bd43dd59e141f"}, - {file = "SudachiPy-0.6.8-cp311-cp311-win_amd64.whl", hash = "sha256:27833ae472220dc46f934edd9a8839b0134279c0113f7da01d67e424bfe2d0ab"}, - {file = "SudachiPy-0.6.8-cp312-cp312-macosx_10_12_universal2.whl", hash = "sha256:7f75d4627fa141bc02951c5ce17ec7055faf2e9424d10c697e923c27b7936369"}, - {file = "SudachiPy-0.6.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33afa2efa4d98ae3cbea0ab8cc09c71b0405d188074d0c4cef2b2080a51caafe"}, - {file = "SudachiPy-0.6.8-cp312-cp312-win_amd64.whl", hash = "sha256:2a2f22605093ed7994eb7edced2a21c8ac71b9ecc9877e94539414b1a60d172a"}, - {file = "SudachiPy-0.6.8-cp37-cp37m-macosx_10_12_universal2.whl", hash = "sha256:6ab54826d151dcf69dfd168e784887d2701c553cf3f455d28b171e64584a404d"}, - {file = "SudachiPy-0.6.8-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d9aa1890b3f43af0ff691f6de8f770ab9ea58506d9e1ee3c8bb9aae460c58d2"}, - {file = "SudachiPy-0.6.8-cp37-cp37m-win_amd64.whl", hash = "sha256:686a890a376589e78b606548f9d5427a43ce8492edc46bcd09c104d9df594f7c"}, - {file = "SudachiPy-0.6.8-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:8d19395daf8c96e4a14df18c4df634e1f7caa7790917ab089c174ffcbdcaf4c0"}, - {file = "SudachiPy-0.6.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cae943138ef2e9d0126a5a4110dca5d6e5d8f35dc3f909e3ef1aeff3aa565b"}, - {file = "SudachiPy-0.6.8-cp38-cp38-win_amd64.whl", hash = "sha256:e8de107715dcd1d566837c91c6a10572efc171d4969a505176ecb37efe65cb48"}, - {file = "SudachiPy-0.6.8-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:d52ddc5001b0125375419409adee012f8957b15ad1a4017e18f30c54ba69f9b7"}, - {file = "SudachiPy-0.6.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2a7c98f75567bd8488a1597c83f8f6abb4c15c577d0b5f92fa0c31c8304dae4"}, - {file = "SudachiPy-0.6.8-cp39-cp39-win_amd64.whl", hash = "sha256:1ae6e533f98e510e751d7355ec512aff3a7dac73539abb61c731cdcc316a183f"}, - {file = "SudachiPy-0.6.8.tar.gz", hash = "sha256:3d1c9086ff09afacc34d02fdb2112aab7cff1d78f0d4b81f78b9ba01c36d4888"}, +groups = ["main"] +files = [ + {file = "SudachiPy-0.6.10-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:418899c5794ec8fd86341d690bdd23bb85f35890540520624a001c751bcfdff0"}, + {file = "SudachiPy-0.6.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99aeaf4a7bbf4c473929f5a9812226123dac1457fb0d549c5e95192eda3f0859"}, + {file = "SudachiPy-0.6.10-cp310-cp310-win_amd64.whl", hash = "sha256:efd9c7584ed6dadf9f7d2f4ea616d06207b0d8a805861f9762072733b611b0db"}, + {file = "SudachiPy-0.6.10-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:e947d907542c8086b7e6d18669f45599b3964eec4e954ad7dd85e4acdaa94793"}, + {file = "SudachiPy-0.6.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e1c1d8c579cc3af591a6511bffba9f88662eedf5ba32868ca8e3ba3c1051d60"}, + {file = "SudachiPy-0.6.10-cp311-cp311-win_amd64.whl", hash = "sha256:8af8b3c91a9aaf0f300901967f85805d73e83297da6c56db50002dde3a4514fe"}, + {file = "SudachiPy-0.6.10-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:efb43fb3b46696ca4510b7dd4c3e490de8dbb7950d7172140dc27a4e69cd5811"}, + {file = "SudachiPy-0.6.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f8fd0ce37961401c9bdd78c126b2119a0a1669d376feb0b2427c35894ef1428"}, + {file = "SudachiPy-0.6.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a79b92b0776613481481c1ed0d2e92994b233ed5d29aa365789a1ba521de0a4"}, + {file = "SudachiPy-0.6.10-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:cc97b5d48f46f9989d97e105f7dd6419da2174888fcc42e55c0e4cd46597ed3b"}, + {file = "SudachiPy-0.6.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9794b73fffd8099d93e07882ba87eee5edbed0e4f1b94761db8f22c8e5da9904"}, + {file = "SudachiPy-0.6.10-cp313-cp313-win_amd64.whl", hash = "sha256:0fc5b60920a439c534688237e2651e15e4eaadc166a63182d6e24ac7ef3e4779"}, + {file = "SudachiPy-0.6.10-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:7455e5cbb4c2cf9294c82345c9d46b344774b4eb23eca917f305ed716d8d5168"}, + {file = "SudachiPy-0.6.10-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38d0de9e840ac8d199e714a40506792ea5237d0db0c966da16d51fbc74a508d6"}, + {file = "SudachiPy-0.6.10-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:de4fc5c155479f873f5f7cfb04989ffb41e6a187c566c59efdb7946fc87498fe"}, + {file = "SudachiPy-0.6.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5e2664dc436798d967c0fd92ae5186a175822eb38d294e2da7dad4417b8625c"}, + {file = "SudachiPy-0.6.10-cp39-cp39-win_amd64.whl", hash = "sha256:af941d5393b8389acbaf9ec5f50e7b2ef48cb0a875594d9d4347e78e86cf842a"}, + {file = "sudachipy-0.6.10.tar.gz", hash = "sha256:b8910a4610de98b2c3cb6dc3362fea93e3ba5059f1eb445a68baa9585278f31b"}, ] [package.extras] -tests = ["sudachidict-core", "tokenizers"] +tests = ["sudachidict_core", "tokenizers"] [[package]] name = "sympy" -version = "1.12.1" +version = "1.14.0" description = "Computer algebra system (CAS) in Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" +groups = ["main"] files = [ - {file = "sympy-1.12.1-py3-none-any.whl", hash = "sha256:9b2cbc7f1a640289430e13d2a56f02f867a1da0190f2f99d8968c2f74da0e515"}, - {file = "sympy-1.12.1.tar.gz", hash = "sha256:2877b03f998cd8c08f07cd0de5b767119cd3ef40d09f41c30d722f6686b0fb88"}, + {file = "sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5"}, + {file = "sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517"}, ] [package.dependencies] -mpmath = ">=1.1.0,<1.4.0" +mpmath = ">=1.1.0,<1.4" + +[package.extras] +dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"] [[package]] name = "tabulate" @@ -2626,6 +3064,7 @@ version = "0.9.0" description = "Pretty-print tabular data" optional = false python-versions = ">=3.7" +groups = ["main", "dev"] files = [ {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, @@ -2634,25 +3073,13 @@ files = [ [package.extras] widechars = ["wcwidth"] -[[package]] -name = "tbb" -version = "2021.13.0" -description = "Intel® oneAPI Threading Building Blocks (oneTBB)" -optional = false -python-versions = "*" -files = [ - {file = "tbb-2021.13.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:a2567725329639519d46d92a2634cf61e76601dac2f777a05686fea546c4fe4f"}, - {file = "tbb-2021.13.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:aaf667e92849adb012b8874d6393282afc318aca4407fc62f912ee30a22da46a"}, - {file = "tbb-2021.13.0-py3-none-win32.whl", hash = "sha256:6669d26703e9943f6164c6407bd4a237a45007e79b8d3832fe6999576eaaa9ef"}, - {file = "tbb-2021.13.0-py3-none-win_amd64.whl", hash = "sha256:3528a53e4bbe64b07a6112b4c5a00ff3c61924ee46c9c68e004a1ac7ad1f09c3"}, -] - [[package]] name = "threadpoolctl" version = "3.5.0" description = "threadpoolctl" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, @@ -2664,6 +3091,7 @@ version = "0.6.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "tiktoken-0.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:277de84ccd8fa12730a6b4067456e5cf72fef6300bea61d506c09e45658d41ac"}, {file = "tiktoken-0.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9c44433f658064463650d61387623735641dcc4b6c999ca30bc0f8ba3fccaf5c"}, @@ -2712,120 +3140,36 @@ blobfile = ["blobfile (>=2)"] [[package]] name = "tokenizers" -version = "0.19.1" +version = "0.22.1" description = "" optional = false -python-versions = ">=3.7" -files = [ - {file = "tokenizers-0.19.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:952078130b3d101e05ecfc7fc3640282d74ed26bcf691400f872563fca15ac97"}, - {file = "tokenizers-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82c8b8063de6c0468f08e82c4e198763e7b97aabfe573fd4cf7b33930ca4df77"}, - {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f03727225feaf340ceeb7e00604825addef622d551cbd46b7b775ac834c1e1c4"}, - {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:453e4422efdfc9c6b6bf2eae00d5e323f263fff62b29a8c9cd526c5003f3f642"}, - {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:02e81bf089ebf0e7f4df34fa0207519f07e66d8491d963618252f2e0729e0b46"}, - {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b07c538ba956843833fee1190cf769c60dc62e1cf934ed50d77d5502194d63b1"}, - {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28cab1582e0eec38b1f38c1c1fb2e56bce5dc180acb1724574fc5f47da2a4fe"}, - {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b01afb7193d47439f091cd8f070a1ced347ad0f9144952a30a41836902fe09e"}, - {file = "tokenizers-0.19.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7fb297edec6c6841ab2e4e8f357209519188e4a59b557ea4fafcf4691d1b4c98"}, - {file = "tokenizers-0.19.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2e8a3dd055e515df7054378dc9d6fa8c8c34e1f32777fb9a01fea81496b3f9d3"}, - {file = "tokenizers-0.19.1-cp310-none-win32.whl", hash = "sha256:7ff898780a155ea053f5d934925f3902be2ed1f4d916461e1a93019cc7250837"}, - {file = "tokenizers-0.19.1-cp310-none-win_amd64.whl", hash = "sha256:bea6f9947e9419c2fda21ae6c32871e3d398cba549b93f4a65a2d369662d9403"}, - {file = "tokenizers-0.19.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5c88d1481f1882c2e53e6bb06491e474e420d9ac7bdff172610c4f9ad3898059"}, - {file = "tokenizers-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddf672ed719b4ed82b51499100f5417d7d9f6fb05a65e232249268f35de5ed14"}, - {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:dadc509cc8a9fe460bd274c0e16ac4184d0958117cf026e0ea8b32b438171594"}, - {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfedf31824ca4915b511b03441784ff640378191918264268e6923da48104acc"}, - {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac11016d0a04aa6487b1513a3a36e7bee7eec0e5d30057c9c0408067345c48d2"}, - {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76951121890fea8330d3a0df9a954b3f2a37e3ec20e5b0530e9a0044ca2e11fe"}, - {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b342d2ce8fc8d00f376af068e3274e2e8649562e3bc6ae4a67784ded6b99428d"}, - {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d16ff18907f4909dca9b076b9c2d899114dd6abceeb074eca0c93e2353f943aa"}, - {file = "tokenizers-0.19.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:706a37cc5332f85f26efbe2bdc9ef8a9b372b77e4645331a405073e4b3a8c1c6"}, - {file = "tokenizers-0.19.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:16baac68651701364b0289979ecec728546133e8e8fe38f66fe48ad07996b88b"}, - {file = "tokenizers-0.19.1-cp311-none-win32.whl", hash = "sha256:9ed240c56b4403e22b9584ee37d87b8bfa14865134e3e1c3fb4b2c42fafd3256"}, - {file = "tokenizers-0.19.1-cp311-none-win_amd64.whl", hash = "sha256:ad57d59341710b94a7d9dbea13f5c1e7d76fd8d9bcd944a7a6ab0b0da6e0cc66"}, - {file = "tokenizers-0.19.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:621d670e1b1c281a1c9698ed89451395d318802ff88d1fc1accff0867a06f153"}, - {file = "tokenizers-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d924204a3dbe50b75630bd16f821ebda6a5f729928df30f582fb5aade90c818a"}, - {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4f3fefdc0446b1a1e6d81cd4c07088ac015665d2e812f6dbba4a06267d1a2c95"}, - {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9620b78e0b2d52ef07b0d428323fb34e8ea1219c5eac98c2596311f20f1f9266"}, - {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04ce49e82d100594715ac1b2ce87d1a36e61891a91de774755f743babcd0dd52"}, - {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5c2ff13d157afe413bf7e25789879dd463e5a4abfb529a2d8f8473d8042e28f"}, - {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3174c76efd9d08f836bfccaca7cfec3f4d1c0a4cf3acbc7236ad577cc423c840"}, - {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9d5b6c0e7a1e979bec10ff960fae925e947aab95619a6fdb4c1d8ff3708ce3"}, - {file = "tokenizers-0.19.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a179856d1caee06577220ebcfa332af046d576fb73454b8f4d4b0ba8324423ea"}, - {file = "tokenizers-0.19.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:952b80dac1a6492170f8c2429bd11fcaa14377e097d12a1dbe0ef2fb2241e16c"}, - {file = "tokenizers-0.19.1-cp312-none-win32.whl", hash = "sha256:01d62812454c188306755c94755465505836fd616f75067abcae529c35edeb57"}, - {file = "tokenizers-0.19.1-cp312-none-win_amd64.whl", hash = "sha256:b70bfbe3a82d3e3fb2a5e9b22a39f8d1740c96c68b6ace0086b39074f08ab89a"}, - {file = "tokenizers-0.19.1-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:bb9dfe7dae85bc6119d705a76dc068c062b8b575abe3595e3c6276480e67e3f1"}, - {file = "tokenizers-0.19.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:1f0360cbea28ea99944ac089c00de7b2e3e1c58f479fb8613b6d8d511ce98267"}, - {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:71e3ec71f0e78780851fef28c2a9babe20270404c921b756d7c532d280349214"}, - {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b82931fa619dbad979c0ee8e54dd5278acc418209cc897e42fac041f5366d626"}, - {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e8ff5b90eabdcdaa19af697885f70fe0b714ce16709cf43d4952f1f85299e73a"}, - {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e742d76ad84acbdb1a8e4694f915fe59ff6edc381c97d6dfdd054954e3478ad4"}, - {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d8c5d59d7b59885eab559d5bc082b2985555a54cda04dda4c65528d90ad252ad"}, - {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b2da5c32ed869bebd990c9420df49813709e953674c0722ff471a116d97b22d"}, - {file = "tokenizers-0.19.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:638e43936cc8b2cbb9f9d8dde0fe5e7e30766a3318d2342999ae27f68fdc9bd6"}, - {file = "tokenizers-0.19.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:78e769eb3b2c79687d9cb0f89ef77223e8e279b75c0a968e637ca7043a84463f"}, - {file = "tokenizers-0.19.1-cp37-none-win32.whl", hash = "sha256:72791f9bb1ca78e3ae525d4782e85272c63faaef9940d92142aa3eb79f3407a3"}, - {file = "tokenizers-0.19.1-cp37-none-win_amd64.whl", hash = "sha256:f3bbb7a0c5fcb692950b041ae11067ac54826204318922da754f908d95619fbc"}, - {file = "tokenizers-0.19.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:07f9295349bbbcedae8cefdbcfa7f686aa420be8aca5d4f7d1ae6016c128c0c5"}, - {file = "tokenizers-0.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:10a707cc6c4b6b183ec5dbfc5c34f3064e18cf62b4a938cb41699e33a99e03c1"}, - {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6309271f57b397aa0aff0cbbe632ca9d70430839ca3178bf0f06f825924eca22"}, - {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ad23d37d68cf00d54af184586d79b84075ada495e7c5c0f601f051b162112dc"}, - {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:427c4f0f3df9109314d4f75b8d1f65d9477033e67ffaec4bca53293d3aca286d"}, - {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e83a31c9cf181a0a3ef0abad2b5f6b43399faf5da7e696196ddd110d332519ee"}, - {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c27b99889bd58b7e301468c0838c5ed75e60c66df0d4db80c08f43462f82e0d3"}, - {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bac0b0eb952412b0b196ca7a40e7dce4ed6f6926489313414010f2e6b9ec2adf"}, - {file = "tokenizers-0.19.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8a6298bde623725ca31c9035a04bf2ef63208d266acd2bed8c2cb7d2b7d53ce6"}, - {file = "tokenizers-0.19.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:08a44864e42fa6d7d76d7be4bec62c9982f6f6248b4aa42f7302aa01e0abfd26"}, - {file = "tokenizers-0.19.1-cp38-none-win32.whl", hash = "sha256:1de5bc8652252d9357a666e609cb1453d4f8e160eb1fb2830ee369dd658e8975"}, - {file = "tokenizers-0.19.1-cp38-none-win_amd64.whl", hash = "sha256:0bcce02bf1ad9882345b34d5bd25ed4949a480cf0e656bbd468f4d8986f7a3f1"}, - {file = "tokenizers-0.19.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:0b9394bd204842a2a1fd37fe29935353742be4a3460b6ccbaefa93f58a8df43d"}, - {file = "tokenizers-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4692ab92f91b87769d950ca14dbb61f8a9ef36a62f94bad6c82cc84a51f76f6a"}, - {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6258c2ef6f06259f70a682491c78561d492e885adeaf9f64f5389f78aa49a051"}, - {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c85cf76561fbd01e0d9ea2d1cbe711a65400092bc52b5242b16cfd22e51f0c58"}, - {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:670b802d4d82bbbb832ddb0d41df7015b3e549714c0e77f9bed3e74d42400fbe"}, - {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:85aa3ab4b03d5e99fdd31660872249df5e855334b6c333e0bc13032ff4469c4a"}, - {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbf001afbbed111a79ca47d75941e9e5361297a87d186cbfc11ed45e30b5daba"}, - {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4c89aa46c269e4e70c4d4f9d6bc644fcc39bb409cb2a81227923404dd6f5227"}, - {file = "tokenizers-0.19.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:39c1ec76ea1027438fafe16ecb0fb84795e62e9d643444c1090179e63808c69d"}, - {file = "tokenizers-0.19.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c2a0d47a89b48d7daa241e004e71fb5a50533718897a4cd6235cb846d511a478"}, - {file = "tokenizers-0.19.1-cp39-none-win32.whl", hash = "sha256:61b7fe8886f2e104d4caf9218b157b106207e0f2a4905c9c7ac98890688aabeb"}, - {file = "tokenizers-0.19.1-cp39-none-win_amd64.whl", hash = "sha256:f97660f6c43efd3e0bfd3f2e3e5615bf215680bad6ee3d469df6454b8c6e8256"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3b11853f17b54c2fe47742c56d8a33bf49ce31caf531e87ac0d7d13d327c9334"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d26194ef6c13302f446d39972aaa36a1dda6450bc8949f5eb4c27f51191375bd"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e8d1ed93beda54bbd6131a2cb363a576eac746d5c26ba5b7556bc6f964425594"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca407133536f19bdec44b3da117ef0d12e43f6d4b56ac4c765f37eca501c7bda"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce05fde79d2bc2e46ac08aacbc142bead21614d937aac950be88dc79f9db9022"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:35583cd46d16f07c054efd18b5d46af4a2f070a2dd0a47914e66f3ff5efb2b1e"}, - {file = "tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:43350270bfc16b06ad3f6f07eab21f089adb835544417afda0f83256a8bf8b75"}, - {file = "tokenizers-0.19.1-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b4399b59d1af5645bcee2072a463318114c39b8547437a7c2d6a186a1b5a0e2d"}, - {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6852c5b2a853b8b0ddc5993cd4f33bfffdca4fcc5d52f89dd4b8eada99379285"}, - {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bcd266ae85c3d39df2f7e7d0e07f6c41a55e9a3123bb11f854412952deacd828"}, - {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecb2651956eea2aa0a2d099434134b1b68f1c31f9a5084d6d53f08ed43d45ff2"}, - {file = "tokenizers-0.19.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:b279ab506ec4445166ac476fb4d3cc383accde1ea152998509a94d82547c8e2a"}, - {file = "tokenizers-0.19.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:89183e55fb86e61d848ff83753f64cded119f5d6e1f553d14ffee3700d0a4a49"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2edbc75744235eea94d595a8b70fe279dd42f3296f76d5a86dde1d46e35f574"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:0e64bfde9a723274e9a71630c3e9494ed7b4c0f76a1faacf7fe294cd26f7ae7c"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0b5ca92bfa717759c052e345770792d02d1f43b06f9e790ca0a1db62838816f3"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f8a20266e695ec9d7a946a019c1d5ca4eddb6613d4f466888eee04f16eedb85"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63c38f45d8f2a2ec0f3a20073cccb335b9f99f73b3c69483cd52ebc75369d8a1"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dd26e3afe8a7b61422df3176e06664503d3f5973b94f45d5c45987e1cb711876"}, - {file = "tokenizers-0.19.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:eddd5783a4a6309ce23432353cdb36220e25cbb779bfa9122320666508b44b88"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:56ae39d4036b753994476a1b935584071093b55c7a72e3b8288e68c313ca26e7"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:f9939ca7e58c2758c01b40324a59c034ce0cebad18e0d4563a9b1beab3018243"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6c330c0eb815d212893c67a032e9dc1b38a803eccb32f3e8172c19cc69fbb439"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec11802450a2487cdf0e634b750a04cbdc1c4d066b97d94ce7dd2cb51ebb325b"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2b718f316b596f36e1dae097a7d5b91fc5b85e90bf08b01ff139bd8953b25af"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:ed69af290c2b65169f0ba9034d1dc39a5db9459b32f1dd8b5f3f32a3fcf06eab"}, - {file = "tokenizers-0.19.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f8a9c828277133af13f3859d1b6bf1c3cb6e9e1637df0e45312e6b7c2e622b1f"}, - {file = "tokenizers-0.19.1.tar.gz", hash = "sha256:ee59e6680ed0fdbe6b724cf38bd70400a0c1dd623b07ac729087270caeac88e3"}, +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "tokenizers-0.22.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:59fdb013df17455e5f950b4b834a7b3ee2e0271e6378ccb33aa74d178b513c73"}, + {file = "tokenizers-0.22.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:8d4e484f7b0827021ac5f9f71d4794aaef62b979ab7608593da22b1d2e3c4edc"}, + {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19d2962dd28bc67c1f205ab180578a78eef89ac60ca7ef7cbe9635a46a56422a"}, + {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:38201f15cdb1f8a6843e6563e6e79f4abd053394992b9bbdf5213ea3469b4ae7"}, + {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1cbe5454c9a15df1b3443c726063d930c16f047a3cc724b9e6e1a91140e5a21"}, + {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7d094ae6312d69cc2a872b54b91b309f4f6fbce871ef28eb27b52a98e4d0214"}, + {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afd7594a56656ace95cdd6df4cca2e4059d294c5cfb1679c57824b605556cb2f"}, + {file = "tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2ef6063d7a84994129732b47e7915e8710f27f99f3a3260b8a38fc7ccd083f4"}, + {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ba0a64f450b9ef412c98f6bcd2a50c6df6e2443b560024a09fa6a03189726879"}, + {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:331d6d149fa9c7d632cde4490fb8bbb12337fa3a0232e77892be656464f4b446"}, + {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:607989f2ea68a46cb1dfbaf3e3aabdf3f21d8748312dbeb6263d1b3b66c5010a"}, + {file = "tokenizers-0.22.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a0f307d490295717726598ef6fa4f24af9d484809223bbc253b201c740a06390"}, + {file = "tokenizers-0.22.1-cp39-abi3-win32.whl", hash = "sha256:b5120eed1442765cd90b903bb6cfef781fd8fe64e34ccaecbae4c619b7b12a82"}, + {file = "tokenizers-0.22.1-cp39-abi3-win_amd64.whl", hash = "sha256:65fd6e3fb11ca1e78a6a93602490f134d1fdeb13bcef99389d5102ea318ed138"}, + {file = "tokenizers-0.22.1.tar.gz", hash = "sha256:61de6522785310a309b3407bac22d99c4db5dba349935e99e4d15ea2226af2d9"}, ] [package.dependencies] -huggingface-hub = ">=0.16.4,<1.0" +huggingface-hub = ">=0.16.4,<2.0" [package.extras] dev = ["tokenizers[testing]"] docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] -testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "pytest-asyncio", "requests", "ruff"] [[package]] name = "tomli" @@ -2833,64 +3177,143 @@ version = "2.0.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.7" +groups = ["main", "dev"] files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +markers = {dev = "python_version < \"3.11\""} [[package]] name = "torch" -version = "2.3.1" +version = "2.7.1" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = false -python-versions = ">=3.8.0" -files = [ - {file = "torch-2.3.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:605a25b23944be5ab7c3467e843580e1d888b8066e5aaf17ff7bf9cc30001cc3"}, - {file = "torch-2.3.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:f2357eb0965583a0954d6f9ad005bba0091f956aef879822274b1bcdb11bd308"}, - {file = "torch-2.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:32b05fe0d1ada7f69c9f86c14ff69b0ef1957a5a54199bacba63d22d8fab720b"}, - {file = "torch-2.3.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:7c09a94362778428484bcf995f6004b04952106aee0ef45ff0b4bab484f5498d"}, - {file = "torch-2.3.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:b2ec81b61bb094ea4a9dee1cd3f7b76a44555375719ad29f05c0ca8ef596ad39"}, - {file = "torch-2.3.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:490cc3d917d1fe0bd027057dfe9941dc1d6d8e3cae76140f5dd9a7e5bc7130ab"}, - {file = "torch-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:5802530783bd465fe66c2df99123c9a54be06da118fbd785a25ab0a88123758a"}, - {file = "torch-2.3.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:a7dd4ed388ad1f3d502bf09453d5fe596c7b121de7e0cfaca1e2017782e9bbac"}, - {file = "torch-2.3.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:a486c0b1976a118805fc7c9641d02df7afbb0c21e6b555d3bb985c9f9601b61a"}, - {file = "torch-2.3.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:224259821fe3e4c6f7edf1528e4fe4ac779c77addaa74215eb0b63a5c474d66c"}, - {file = "torch-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:e5fdccbf6f1334b2203a61a0e03821d5845f1421defe311dabeae2fc8fbeac2d"}, - {file = "torch-2.3.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:3c333dc2ebc189561514eda06e81df22bf8fb64e2384746b2cb9f04f96d1d4c8"}, - {file = "torch-2.3.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:07e9ba746832b8d069cacb45f312cadd8ad02b81ea527ec9766c0e7404bb3feb"}, - {file = "torch-2.3.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:462d1c07dbf6bb5d9d2f3316fee73a24f3d12cd8dacf681ad46ef6418f7f6626"}, - {file = "torch-2.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:ff60bf7ce3de1d43ad3f6969983f321a31f0a45df3690921720bcad6a8596cc4"}, - {file = "torch-2.3.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:bee0bd33dc58aa8fc8a7527876e9b9a0e812ad08122054a5bff2ce5abf005b10"}, - {file = "torch-2.3.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:aaa872abde9a3d4f91580f6396d54888620f4a0b92e3976a6034759df4b961ad"}, - {file = "torch-2.3.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:3d7a7f7ef21a7520510553dc3938b0c57c116a7daee20736a9e25cbc0e832bdc"}, - {file = "torch-2.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:4777f6cefa0c2b5fa87223c213e7b6f417cf254a45e5829be4ccd1b2a4ee1011"}, - {file = "torch-2.3.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:2bb5af780c55be68fe100feb0528d2edebace1d55cb2e351de735809ba7391eb"}, +python-versions = ">=3.9.0" +groups = ["main"] +markers = "python_version >= \"3.12\"" +files = [ + {file = "torch-2.7.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a103b5d782af5bd119b81dbcc7ffc6fa09904c423ff8db397a1e6ea8fd71508f"}, + {file = "torch-2.7.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:fe955951bdf32d182ee8ead6c3186ad54781492bf03d547d31771a01b3d6fb7d"}, + {file = "torch-2.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:885453d6fba67d9991132143bf7fa06b79b24352f4506fd4d10b309f53454162"}, + {file = "torch-2.7.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d72acfdb86cee2a32c0ce0101606f3758f0d8bb5f8f31e7920dc2809e963aa7c"}, + {file = "torch-2.7.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:236f501f2e383f1cb861337bdf057712182f910f10aeaf509065d54d339e49b2"}, + {file = "torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:06eea61f859436622e78dd0cdd51dbc8f8c6d76917a9cf0555a333f9eac31ec1"}, + {file = "torch-2.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:8273145a2e0a3c6f9fd2ac36762d6ee89c26d430e612b95a99885df083b04e52"}, + {file = "torch-2.7.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:aea4fc1bf433d12843eb2c6b2204861f43d8364597697074c8d38ae2507f8730"}, + {file = "torch-2.7.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ea1e518df4c9de73af7e8a720770f3628e7f667280bce2be7a16292697e3fa"}, + {file = "torch-2.7.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c33360cfc2edd976c2633b3b66c769bdcbbf0e0b6550606d188431c81e7dd1fc"}, + {file = "torch-2.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:d8bf6e1856ddd1807e79dc57e54d3335f2b62e6f316ed13ed3ecfe1fc1df3d8b"}, + {file = "torch-2.7.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:787687087412c4bd68d315e39bc1223f08aae1d16a9e9771d95eabbb04ae98fb"}, + {file = "torch-2.7.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:03563603d931e70722dce0e11999d53aa80a375a3d78e6b39b9f6805ea0a8d28"}, + {file = "torch-2.7.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d632f5417b6980f61404a125b999ca6ebd0b8b4bbdbb5fbbba44374ab619a412"}, + {file = "torch-2.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:23660443e13995ee93e3d844786701ea4ca69f337027b05182f5ba053ce43b38"}, + {file = "torch-2.7.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:0da4f4dba9f65d0d203794e619fe7ca3247a55ffdcbd17ae8fb83c8b2dc9b585"}, + {file = "torch-2.7.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:e08d7e6f21a617fe38eeb46dd2213ded43f27c072e9165dc27300c9ef9570934"}, + {file = "torch-2.7.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:30207f672328a42df4f2174b8f426f354b2baa0b7cca3a0adb3d6ab5daf00dc8"}, + {file = "torch-2.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:79042feca1c634aaf6603fe6feea8c6b30dfa140a6bbc0b973e2260c7e79a22e"}, + {file = "torch-2.7.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:988b0cbc4333618a1056d2ebad9eb10089637b659eb645434d0809d8d937b946"}, + {file = "torch-2.7.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:e0d81e9a12764b6f3879a866607c8ae93113cbcad57ce01ebde63eb48a576369"}, + {file = "torch-2.7.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:8394833c44484547ed4a47162318337b88c97acdb3273d85ea06e03ffff44998"}, + {file = "torch-2.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:df41989d9300e6e3c19ec9f56f856187a6ef060c3662fe54f4b6baf1fc90bd19"}, + {file = "torch-2.7.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a737b5edd1c44a5c1ece2e9f3d00df9d1b3fb9541138bee56d83d38293fb6c9d"}, ] [package.dependencies] filelock = "*" fsspec = "*" jinja2 = "*" -mkl = {version = ">=2021.1.1,<=2021.4.0", markers = "platform_system == \"Windows\""} networkx = "*" -nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cudnn-cu12 = {version = "8.9.2.26", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -sympy = "*" -triton = {version = "2.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""} -typing-extensions = ">=4.8.0" +nvidia-cublas-cu12 = {version = "12.6.4.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-cupti-cu12 = {version = "12.6.80", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-nvrtc-cu12 = {version = "12.6.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-runtime-cu12 = {version = "12.6.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cudnn-cu12 = {version = "9.5.1.17", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cufft-cu12 = {version = "11.3.0.4", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cufile-cu12 = {version = "1.11.1.6", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-curand-cu12 = {version = "10.3.7.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusolver-cu12 = {version = "11.7.1.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusparse-cu12 = {version = "12.5.4.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusparselt-cu12 = {version = "0.6.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nccl-cu12 = {version = "2.26.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nvjitlink-cu12 = {version = "12.6.85", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nvtx-cu12 = {version = "12.6.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +setuptools = {version = "*", markers = "python_version >= \"3.12\""} +sympy = ">=1.13.3" +triton = {version = "3.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +typing-extensions = ">=4.10.0" + +[package.extras] +opt-einsum = ["opt-einsum (>=3.3)"] +optree = ["optree (>=0.13.0)"] + +[[package]] +name = "torch" +version = "2.9.0" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +optional = false +python-versions = ">=3.10" +groups = ["main"] +markers = "python_version <= \"3.11\"" +files = [ + {file = "torch-2.9.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:030bbfe367379ae6a4ae4042b6c44da25383343b8b3c68abaa9c7231efbaf2dd"}, + {file = "torch-2.9.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:51cb63902182a78e90886e8068befd8ea102af4b00e420263591a3d70c7d3c6c"}, + {file = "torch-2.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:3f6aad4d2f0ee2248bac25339d74858ff846c3969b27d14ac235821f055af83d"}, + {file = "torch-2.9.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:413e1654c9203733138858780e184d9fc59442f0b3b209e16f39354eb893db9b"}, + {file = "torch-2.9.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c596708b5105d0b199215acf0c9be7c1db5f1680d88eddadf4b75a299259a677"}, + {file = "torch-2.9.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:51de31219c97c51cf4bf2be94d622e3deb5dcc526c6dc00e97c17eaec0fc1d67"}, + {file = "torch-2.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd515c70059afd95f48b8192733764c08ca37a1d19803af6401b5ecad7c8676e"}, + {file = "torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:614a185e4986326d526a91210c8fc1397e76e8cfafa78baf6296a790e53a9eec"}, + {file = "torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642"}, + {file = "torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6"}, + {file = "torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b"}, + {file = "torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:33f58e9a102a91259af289d50525c30323b5c9ae1d31322b6447c0814da68695"}, + {file = "torch-2.9.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c30a17fc83eeab346913e237c64b15b5ba6407fff812f6c541e322e19bc9ea0e"}, + {file = "torch-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f25033b8667b57857dfd01458fbf2a9e6a6df1f8def23aef0dc46292f6aa642"}, + {file = "torch-2.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:d037f1b4ffd25013be4a7bf3651a0a910c68554956c7b2c92ebe87c76475dece"}, + {file = "torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e4e5b5cba837a2a8d1a497ba9a58dae46fa392593eaa13b871c42f71847503a5"}, + {file = "torch-2.9.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:64693568f5dc4dbd5f880a478b1cea0201cc6b510d91d1bc54fea86ac5d1a637"}, + {file = "torch-2.9.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:f8ed31ddd7d10bfb3fbe0b9fe01b1243577f13d75e6f4a0839a283915ce3791e"}, + {file = "torch-2.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:eff527d4e4846e6f70d2afd8058b73825761203d66576a7e04ea2ecfebcb4ab8"}, + {file = "torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:f8877779cf56d1ce431a7636703bdb13307f5960bb1af49716d8b179225e0e6a"}, + {file = "torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7e614fae699838038d888729f82b687c03413c5989ce2a9481f9a7e7a396e0bb"}, + {file = "torch-2.9.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:dfb5b8cd310ba3436c7e14e8b7833ef658cf3045e50d2bdaed23c8fc517065eb"}, + {file = "torch-2.9.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b3d29524993a478e46f5d598b249cd824b7ed98d7fba538bd9c4cde6c803948f"}, + {file = "torch-2.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:71c7578984f5ec0eb645eb4816ac8435fcf3e3e2ae1901bcd2f519a9cafb5125"}, + {file = "torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:71d9309aee457bbe0b164bce2111cd911c4ed4e847e65d5077dbbcd3aba6befc"}, + {file = "torch-2.9.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c08fb654d783899e204a32cca758a7ce8a45b2d78eeb89517cc937088316f78e"}, + {file = "torch-2.9.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ec8feb0099b2daa5728fbc7abb0b05730fd97e0f359ff8bda09865aaa7bd7d4b"}, + {file = "torch-2.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:695ba920f234ad4170c9c50e28d56c848432f8f530e6bc7f88fcb15ddf338e75"}, +] + +[package.dependencies] +filelock = "*" +fsspec = ">=0.8.5" +jinja2 = "*" +networkx = ">=2.5.1" +nvidia-cublas-cu12 = {version = "12.8.4.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-cupti-cu12 = {version = "12.8.90", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-nvrtc-cu12 = {version = "12.8.93", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-runtime-cu12 = {version = "12.8.90", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cudnn-cu12 = {version = "9.10.2.21", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cufft-cu12 = {version = "11.3.3.83", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cufile-cu12 = {version = "1.13.1.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-curand-cu12 = {version = "10.3.9.90", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusolver-cu12 = {version = "11.7.3.90", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusparse-cu12 = {version = "12.5.8.93", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusparselt-cu12 = {version = "0.7.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nccl-cu12 = {version = "2.27.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nvjitlink-cu12 = {version = "12.8.93", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nvshmem-cu12 = {version = "3.3.20", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nvtx-cu12 = {version = "12.8.90", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +sympy = ">=1.13.3" +triton = {version = "3.5.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +typing-extensions = ">=4.10.0" [package.extras] opt-einsum = ["opt-einsum (>=3.3)"] -optree = ["optree (>=0.9.1)"] +optree = ["optree (>=0.13.0)"] +pyyaml = ["pyyaml"] [[package]] name = "tqdm" @@ -2898,6 +3321,7 @@ version = "4.66.4" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "tqdm-4.66.4-py3-none-any.whl", hash = "sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644"}, {file = "tqdm-4.66.4.tar.gz", hash = "sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb"}, @@ -2914,19 +3338,20 @@ telegram = ["requests"] [[package]] name = "transformers" -version = "4.41.2" +version = "4.57.1" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = false -python-versions = ">=3.8.0" +python-versions = ">=3.9.0" +groups = ["main"] files = [ - {file = "transformers-4.41.2-py3-none-any.whl", hash = "sha256:05555d20e43f808de1ef211ab64803cdb513170cef70d29a888b589caebefc67"}, - {file = "transformers-4.41.2.tar.gz", hash = "sha256:80a4db216533d573e9cc7388646c31ed9480918feb7c55eb211249cb23567f87"}, + {file = "transformers-4.57.1-py3-none-any.whl", hash = "sha256:b10d05da8fa67dc41644dbbf9bc45a44cb86ae33da6f9295f5fbf5b7890bd267"}, + {file = "transformers-4.57.1.tar.gz", hash = "sha256:f06c837959196c75039809636cd964b959f6604b75b8eeec6fdfc0440b89cc55"}, ] [package.dependencies] filelock = "*" fugashi = {version = ">=1.0", optional = true, markers = "extra == \"ja\""} -huggingface-hub = ">=0.23.0,<1.0" +huggingface-hub = ">=0.34.0,<1.0" ipadic = {version = ">=1.0.0,<2.0", optional = true, markers = "extra == \"ja\""} numpy = ">=1.17" packaging = ">=20.0" @@ -2935,80 +3360,120 @@ pyyaml = ">=5.1" regex = "!=2019.12.17" requests = "*" rhoknp = {version = ">=1.1.0,<1.3.1", optional = true, markers = "extra == \"ja\""} -safetensors = ">=0.4.1" +safetensors = ">=0.4.3" sentencepiece = {version = ">=0.1.91,<0.1.92 || >0.1.92", optional = true, markers = "extra == \"sentencepiece\""} -sudachidict-core = {version = ">=20220729", optional = true, markers = "extra == \"ja\""} +sudachidict_core = {version = ">=20220729", optional = true, markers = "extra == \"ja\""} sudachipy = {version = ">=0.6.6", optional = true, markers = "extra == \"ja\""} -tokenizers = ">=0.19,<0.20" +tokenizers = ">=0.22.0,<=0.23.0" tqdm = ">=4.27" unidic = {version = ">=1.0.2", optional = true, markers = "extra == \"ja\""} -unidic-lite = {version = ">=1.0.7", optional = true, markers = "extra == \"ja\""} +unidic_lite = {version = ">=1.0.7", optional = true, markers = "extra == \"ja\""} [package.extras] -accelerate = ["accelerate (>=0.21.0)"] -agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"] -all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision"] +accelerate = ["accelerate (>=0.26.0)"] +all = ["Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "codecarbon (>=2.8.1)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "jinja2 (>=3.1.0)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<=0.9)", "librosa", "mistral-common[opencv] (>=1.6.3)", "num2words", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torchaudio", "torchvision"] audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] -codecarbon = ["codecarbon (==1.2.0)"] -deepspeed = ["accelerate (>=0.21.0)", "deepspeed (>=0.9.3)"] -deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.21.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] -dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] -dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.19,<0.20)", "urllib3 (<2.0.0)"] -dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +benchmark = ["optimum-benchmark (>=0.3.0)"] +chat-template = ["jinja2 (>=3.1.0)"] +codecarbon = ["codecarbon (>=2.8.1)"] +deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "optuna", "parameterized (>=0.9)", "protobuf", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"] +dev = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "av", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fastapi", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "jinja2 (>=3.1.0)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "kernels (>=0.6.1,<=0.9)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "onnxconverter-common", "openai (>=1.98.0)", "optax (>=0.0.8,<=0.1.4)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] +dev-tensorflow = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fastapi", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "onnxconverter-common", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "openai (>=1.98.0)", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "tf2onnx", "timeout-decorator", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "urllib3 (<2.0.0)", "uvicorn"] +dev-torch = ["GitPython (<3.1.19)", "GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (>=2.8.1)", "cookiecutter (==1.7.3)", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fastapi", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "kenlm", "kernels (>=0.6.1,<=0.9)", "libcst", "libcst", "librosa", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "num2words", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "openai (>=1.98.0)", "optuna", "pandas (<2.3.0)", "parameterized (>=0.9)", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (!=1.0.18,<=1.0.19)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "torch (>=2.2)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)", "urllib3 (<2.0.0)", "uvicorn"] flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"] flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] ftfy = ["ftfy"] -integrations = ["optuna", "ray[tune] (>=2.7.0)", "sigopt"] -ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] +hf-xet = ["hf_xet"] +hub-kernels = ["kernels (>=0.6.1,<=0.9)"] +integrations = ["kernels (>=0.6.1,<=0.9)", "optuna", "ray[tune] (>=2.7.0)"] +ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict_core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic_lite (>=1.0.7)"] +mistral-common = ["mistral-common[opencv] (>=1.6.3)"] modelcreation = ["cookiecutter (==1.7.3)"] natten = ["natten (>=0.14.6,<0.15.0)"] +num2words = ["num2words"] onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"] onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] +open-telemetry = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"] optuna = ["optuna"] -quality = ["GitPython (<3.1.19)", "datasets (!=2.5.0)", "isort (>=5.5.4)", "ruff (==0.1.5)", "urllib3 (<2.0.0)"] +quality = ["GitPython (<3.1.19)", "datasets (>=2.15.0)", "libcst", "pandas (<2.3.0)", "rich", "ruff (==0.13.1)", "urllib3 (<2.0.0)"] ray = ["ray[tune] (>=2.7.0)"] -retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] +retrieval = ["datasets (>=2.15.0)", "faiss-cpu"] +ruff = ["ruff (==0.13.1)"] sagemaker = ["sagemaker (>=2.31.0)"] sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"] -serving = ["fastapi", "pydantic", "starlette", "uvicorn"] +serving = ["accelerate (>=0.26.0)", "fastapi", "openai (>=1.98.0)", "pydantic (>=2)", "starlette", "torch (>=2.2)", "uvicorn"] sigopt = ["sigopt"] sklearn = ["scikit-learn"] speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] -testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] -tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"] -tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<2.16)", "tensorflow-text (<2.16)", "tf2onnx"] +testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (>=2.15.0)", "datasets (>=2.15.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fastapi", "libcst", "mistral-common[opencv] (>=1.6.3)", "nltk (<=3.8.1)", "openai (>=1.98.0)", "parameterized (>=0.9)", "psutil", "pydantic (>=2)", "pydantic (>=2)", "pytest (>=7.2.0)", "pytest-asyncio", "pytest-order", "pytest-rerunfailures (<16.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.13.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "starlette", "tensorboard", "timeout-decorator", "torch (>=2.2)", "uvicorn"] +tf = ["keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"] +tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1,<0.14.0)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<0.24)", "tensorflow-text (<2.16)", "tf2onnx"] tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] -timm = ["timm"] -tokenizers = ["tokenizers (>=0.19,<0.20)"] -torch = ["accelerate (>=0.21.0)", "torch"] +tiktoken = ["blobfile", "tiktoken"] +timm = ["timm (!=1.0.18,<=1.0.19)"] +tokenizers = ["tokenizers (>=0.22.0,<=0.23.0)"] +torch = ["accelerate (>=0.26.0)", "torch (>=2.2)"] torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"] -torchhub = ["filelock", "huggingface-hub (>=0.23.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.19,<0.20)", "torch", "tqdm (>=4.27)"] -video = ["av (==9.2.0)", "decord (==0.6.0)"] +torchhub = ["filelock", "huggingface-hub (>=0.34.0,<1.0)", "importlib_metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.22.0,<=0.23.0)", "torch (>=2.2)", "tqdm (>=4.27)"] +video = ["av"] vision = ["Pillow (>=10.0.1,<=15.0)"] [[package]] name = "triton" -version = "2.3.1" +version = "3.3.1" description = "A language and compiler for custom Deep Learning operations" optional = false python-versions = "*" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" files = [ - {file = "triton-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c84595cbe5e546b1b290d2a58b1494df5a2ef066dd890655e5b8a8a92205c33"}, - {file = "triton-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9d64ae33bcb3a7a18081e3a746e8cf87ca8623ca13d2c362413ce7a486f893e"}, - {file = "triton-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaf80e8761a9e3498aa92e7bf83a085b31959c61f5e8ac14eedd018df6fccd10"}, - {file = "triton-2.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b13bf35a2b659af7159bf78e92798dc62d877aa991de723937329e2d382f1991"}, - {file = "triton-2.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63381e35ded3304704ea867ffde3b7cfc42c16a55b3062d41e017ef510433d66"}, - {file = "triton-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d968264523c7a07911c8fb51b4e0d1b920204dae71491b1fe7b01b62a31e124"}, + {file = "triton-3.3.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b74db445b1c562844d3cfad6e9679c72e93fdfb1a90a24052b03bb5c49d1242e"}, + {file = "triton-3.3.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b31e3aa26f8cb3cc5bf4e187bf737cbacf17311e1112b781d4a059353dfd731b"}, + {file = "triton-3.3.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9999e83aba21e1a78c1f36f21bce621b77bcaa530277a50484a7cb4a822f6e43"}, + {file = "triton-3.3.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b89d846b5a4198317fec27a5d3a609ea96b6d557ff44b56c23176546023c4240"}, + {file = "triton-3.3.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3198adb9d78b77818a5388bff89fa72ff36f9da0bc689db2f0a651a67ce6a42"}, + {file = "triton-3.3.1-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f6139aeb04a146b0b8e0fbbd89ad1e65861c57cfed881f21d62d3cb94a36bab7"}, ] [package.dependencies] -filelock = "*" +setuptools = ">=40.8.0" [package.extras] build = ["cmake (>=3.20)", "lit"] -tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"] -tutorials = ["matplotlib", "pandas", "tabulate", "torch"] +tests = ["autopep8", "isort", "llnl-hatchet", "numpy", "pytest", "pytest-forked", "pytest-xdist", "scipy (>=1.7.1)"] +tutorials = ["matplotlib", "pandas", "tabulate"] + +[[package]] +name = "triton" +version = "3.5.0" +description = "A language and compiler for custom Deep Learning operations" +optional = false +python-versions = "<3.15,>=3.10" +groups = ["main"] +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +files = [ + {file = "triton-3.5.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6f90de6a6566bb619b4c0adc9855729e1b1b5e26533fca1bf6206e96b6d277a3"}, + {file = "triton-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5d3b3d480debf24eaa739623c9a42446b0b77f95593d30eb1f64cd2278cc1f0"}, + {file = "triton-3.5.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8457b22148defefdcb7fa8144b05ce211b9faefad650a1ce85b23df488d5549c"}, + {file = "triton-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f34bfa21c5b3a203c0f0eab28dcc1e49bd1f67d22724e77fb6665a659200a4ec"}, + {file = "triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da21fccceafc163e3a5e857abe34351ef76345af06cabf9637a914742671f0b"}, + {file = "triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833"}, + {file = "triton-3.5.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:188da5b81fa2f8322c27fec1627703eac24cb9bb7ab0dfbe9925973bc1b070d3"}, + {file = "triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6bb9aa5519c084a333acdba443789e50012a4b851cd486c54f0b8dc2a8d3a12"}, + {file = "triton-3.5.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03127d9b33aaf979c856676b394bc059ec1d68cb6da68ae03f62dd8ad77a04ae"}, + {file = "triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c83f2343e1a220a716c7b3ab9fccfcbe3ad4020d189549200e2d2e8d5868bed9"}, + {file = "triton-3.5.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:468936651d383f4a6d10068d34a627505e13af55be5d002b9f27b987e7a5f0ac"}, + {file = "triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da0fa67ccd76c3dcfb0bffe1b1c57c685136a6bd33d141c24d9655d4185b1289"}, + {file = "triton-3.5.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7ceef21410229ac23173a28eee5cfc0e37c1dfdb8b4bc11ecda2e3ecec7c686"}, + {file = "triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7"}, +] + +[package.extras] +build = ["cmake (>=3.20,<4.0)", "lit"] +tests = ["autopep8", "isort", "llnl-hatchet", "numpy", "pytest", "pytest-forked", "pytest-xdist", "scipy (>=1.7.1)"] +tutorials = ["matplotlib", "pandas", "tabulate"] [[package]] name = "typing-extensions" @@ -3016,6 +3481,7 @@ version = "4.12.1" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "typing_extensions-4.12.1-py3-none-any.whl", hash = "sha256:6024b58b69089e5a89c347397254e35f1bf02a907728ec7fee9bf0fe837d203a"}, {file = "typing_extensions-4.12.1.tar.gz", hash = "sha256:915f5e35ff76f56588223f15fdd5938f9a1cf9195c0de25130c627e4d597f6d1"}, @@ -3027,6 +3493,7 @@ version = "2024.1" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" +groups = ["main"] files = [ {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, @@ -3038,6 +3505,7 @@ version = "1.1.0" description = "UniDic packaged for Python" optional = false python-versions = ">=3.5" +groups = ["main"] files = [ {file = "unidic-1.1.0.tar.gz", hash = "sha256:0ab91c05de342c84d2a6314901fd3afb9061ecd7534dd4a0431dccbb87d921b7"}, ] @@ -3054,6 +3522,7 @@ version = "1.0.8" description = "A small version of UniDic packaged for Python" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "unidic-lite-1.0.8.tar.gz", hash = "sha256:db9d4572d9fdd4d00a97949d4b0741ec480ee05a7e7e2e32f547500dae27b245"}, ] @@ -3064,13 +3533,14 @@ version = "2.2.1" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] @@ -3081,6 +3551,7 @@ version = "0.10.1" description = "A lightweight console printing and formatting toolkit" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "wasabi-0.10.1-py3-none-any.whl", hash = "sha256:fe862cc24034fbc9f04717cd312ab884f71f51a8ecabebc3449b751c2a649d83"}, {file = "wasabi-0.10.1.tar.gz", hash = "sha256:c8e372781be19272942382b14d99314d175518d7822057cb7a97010c4259d249"}, @@ -3092,13 +3563,15 @@ version = "1.1.0" description = "A small Python utility to set file creation time on Windows" optional = false python-versions = ">=3.5" +groups = ["main"] +markers = "sys_platform == \"win32\"" files = [ {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, ] [package.extras] -dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] +dev = ["black (>=19.3b0) ; python_version >= \"3.6\"", "pytest (>=4.6.2)"] [[package]] name = "wrapt" @@ -3106,6 +3579,7 @@ version = "1.16.0" description = "Module for decorators, wrappers and monkey patching." optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, @@ -3185,6 +3659,7 @@ version = "3.4.1" description = "Python binding for xxHash" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "xxhash-3.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:91dbfa55346ad3e18e738742236554531a621042e419b70ad8f3c1d9c7a16e7f"}, {file = "xxhash-3.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:665a65c2a48a72068fcc4d21721510df5f51f1142541c890491afc80451636d2"}, @@ -3302,6 +3777,7 @@ version = "1.9.4" description = "Yet another URL library" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"}, {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"}, @@ -3400,6 +3876,6 @@ idna = ">=2.0" multidict = ">=4.0" [metadata] -lock-version = "2.0" +lock-version = "2.1" python-versions = ">=3.10,<4.0" -content-hash = "a2c9ed2cef63429fda1482752acb674fe3b39b94498bbe2c177d0b8ac9558c44" +content-hash = "f4ea38369c3560805eaf80e8b74de4909777dc51cba333401221fa6787c391bc" diff --git a/pyproject.toml b/pyproject.toml index b5d2296..28cea05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,11 +19,11 @@ python = ">=3.10,<4.0" jsonargparse = {extras = ["jsonnet"], version = "^4.27.5"} loguru = "^0.7.2" scikit-learn = "^1.3.2" -transformers = {extras = ["ja", "sentencepiece"], version = "^4.38.1"} +transformers = {version = "^4.57.1", extras = ["ja", "sentencepiece"]} datasets = ">=2.17" -sentence-transformers = "^3.0.0" +sentence-transformers = "5.1.1" pytest = "7.1.3" -torch = "^2.3" +torch = "^2.6" pydantic = "^2.6.3" eval-type-backport = "^0.1.3" smart-open = "^7.0.1" diff --git a/src/jmteb/__main__.py b/src/jmteb/__main__.py index ff10884..2dc9478 100644 --- a/src/jmteb/__main__.py +++ b/src/jmteb/__main__.py @@ -119,6 +119,7 @@ def main( ) if args.log_predictions: + logger.info("Prediction logging activated.") for k, v in args.evaluators.items(): if hasattr(v, "log_predictions"): args.evaluators[k].log_predictions = True diff --git a/src/jmteb/configs/jmteb.jsonnet b/src/jmteb/configs/jmteb.jsonnet index 66fd2dc..b27d021 100644 --- a/src/jmteb/configs/jmteb.jsonnet +++ b/src/jmteb/configs/jmteb.jsonnet @@ -3,14 +3,16 @@ (import './tasks/amazon_counterfactual_classification.jsonnet') + (import './tasks/massive_intent_classification.jsonnet') + (import './tasks/massive_scenario_classification.jsonnet') + +(import './tasks/japanese_sentiment_classification.jsonnet') + +(import './tasks/sib200_japanese_classification.jsonnet') + +(import './tasks/wrime_classification.jsonnet') + // Clustering (import './tasks/livedoor_news.jsonnet') + (import './tasks/mewsc16.jsonnet') + +(import './tasks/sib200_japanese_clustering.jsonnet') + // STS (import './tasks/jsts.jsonnet') + (import './tasks/jsick.jsonnet') + -// Pair Classification -(import './tasks/paws_x_ja.jsonnet') + // Retrieval (import './tasks/jagovfaqs_22k.jsonnet') + (import './tasks/mrtydi.jsonnet') + @@ -18,5 +20,14 @@ (import './tasks/nlp_journal_title_abs.jsonnet') + (import './tasks/nlp_journal_title_intro.jsonnet') + (import './tasks/nlp_journal_abs_intro.jsonnet') + +(import './tasks/nlp_journal_abs_article.jsonnet') + +(import './tasks/jacwir_retrieval.jsonnet') + +(import './tasks/miracl_retrieval.jsonnet') + +(import './tasks/mldr_retrieval.jsonnet') + +(import './tasks/mintaka_retrieval.jsonnet') + // Reranking -(import './tasks/esci.jsonnet') \ No newline at end of file +(import './tasks/esci.jsonnet') + +(import './tasks/jqara.jsonnet') + +(import './tasks/jacwir_reranking.jsonnet') + +(import './tasks/miracl_reranking.jsonnet') + +(import './tasks/mldr_reranking.jsonnet') \ No newline at end of file diff --git a/src/jmteb/configs/tasks/jacwir_reranking.jsonnet b/src/jmteb/configs/tasks/jacwir_reranking.jsonnet new file mode 100644 index 0000000..eb41d67 --- /dev/null +++ b/src/jmteb/configs/tasks/jacwir_reranking.jsonnet @@ -0,0 +1,31 @@ +{ + jacwir_reranking: { + class_path: 'RerankingEvaluator', + init_args: { + val_query_dataset: { + class_path: 'HfRerankingQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'validation', + name: 'jacwir-reranking-query', + }, + }, + test_query_dataset: { + class_path: 'HfRerankingQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'test', + name: 'jacwir-reranking-query', + }, + }, + doc_dataset: { + class_path: 'HfRerankingDocDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'corpus', + name: 'jacwir-reranking-corpus', + }, + }, + }, + }, +} diff --git a/src/jmteb/configs/tasks/jacwir_retrieval.jsonnet b/src/jmteb/configs/tasks/jacwir_retrieval.jsonnet new file mode 100644 index 0000000..8cdb416 --- /dev/null +++ b/src/jmteb/configs/tasks/jacwir_retrieval.jsonnet @@ -0,0 +1,32 @@ +{ + jacwir_retrieval: { + class_path: 'RetrievalEvaluator', + init_args: { + val_query_dataset: { + class_path: 'HfRetrievalQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'validation', + name: 'jacwir-retrieval-query', + }, + }, + test_query_dataset: { + class_path: 'HfRetrievalQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'test', + name: 'jacwir-retrieval-query', + }, + }, + doc_dataset: { + class_path: 'HfRetrievalDocDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'corpus', + name: 'jacwir-retrieval-corpus', + }, + }, + "doc_chunk_size":10000 + }, + }, +} diff --git a/src/jmteb/configs/tasks/japanese_sentiment_classification.jsonnet b/src/jmteb/configs/tasks/japanese_sentiment_classification.jsonnet new file mode 100644 index 0000000..f5a847c --- /dev/null +++ b/src/jmteb/configs/tasks/japanese_sentiment_classification.jsonnet @@ -0,0 +1,31 @@ +{ + japanese_sentiment_classification: { + class_path: 'ClassificationEvaluator', + init_args: { + train_dataset: { + class_path: 'HfClassificationDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'train', + name: 'japanese_sentiment_classification', + }, + }, + val_dataset: { + class_path: 'HfClassificationDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'validation', + name: 'japanese_sentiment_classification', + }, + }, + test_dataset: { + class_path: 'HfClassificationDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'test', + name: 'japanese_sentiment_classification', + }, + }, + }, + }, +} diff --git a/src/jmteb/configs/tasks/jqara.jsonnet b/src/jmteb/configs/tasks/jqara.jsonnet new file mode 100644 index 0000000..1c0ba64 --- /dev/null +++ b/src/jmteb/configs/tasks/jqara.jsonnet @@ -0,0 +1,31 @@ +{ + jqara: { + class_path: 'RerankingEvaluator', + init_args: { + val_query_dataset: { + class_path: 'HfRerankingQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'validation', + name: 'jqara-query', + }, + }, + test_query_dataset: { + class_path: 'HfRerankingQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'test', + name: 'jqara-query', + }, + }, + doc_dataset: { + class_path: 'HfRerankingDocDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'corpus', + name: 'jqara-corpus', + }, + }, + }, + }, +} diff --git a/src/jmteb/configs/tasks/mintaka_retrieval.jsonnet b/src/jmteb/configs/tasks/mintaka_retrieval.jsonnet new file mode 100644 index 0000000..6b17949 --- /dev/null +++ b/src/jmteb/configs/tasks/mintaka_retrieval.jsonnet @@ -0,0 +1,32 @@ +{ + mintaka_retrieval: { + class_path: 'RetrievalEvaluator', + init_args: { + val_query_dataset: { + class_path: 'HfRetrievalQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'validation', + name: 'mintaka-retrieval-query', + }, + }, + test_query_dataset: { + class_path: 'HfRetrievalQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'test', + name: 'mintaka-retrieval-query', + }, + }, + doc_dataset: { + class_path: 'HfRetrievalDocDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'corpus', + name: 'mintaka-retrieval-corpus', + }, + }, + "doc_chunk_size":10000 + }, + }, +} diff --git a/src/jmteb/configs/tasks/miracl_reranking.jsonnet b/src/jmteb/configs/tasks/miracl_reranking.jsonnet new file mode 100644 index 0000000..b91a341 --- /dev/null +++ b/src/jmteb/configs/tasks/miracl_reranking.jsonnet @@ -0,0 +1,31 @@ +{ + miracl_reranking: { + class_path: 'RerankingEvaluator', + init_args: { + val_query_dataset: { + class_path: 'HfRerankingQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'validation', + name: 'miracl-reranking-query', + }, + }, + test_query_dataset: { + class_path: 'HfRerankingQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'test', + name: 'miracl-reranking-query', + }, + }, + doc_dataset: { + class_path: 'HfRerankingDocDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'corpus', + name: 'miracl-reranking-corpus', + }, + }, + }, + }, +} diff --git a/src/jmteb/configs/tasks/miracl_retrieval.jsonnet b/src/jmteb/configs/tasks/miracl_retrieval.jsonnet new file mode 100644 index 0000000..9b73f4f --- /dev/null +++ b/src/jmteb/configs/tasks/miracl_retrieval.jsonnet @@ -0,0 +1,32 @@ +{ + miracl_retrieval: { + class_path: 'RetrievalEvaluator', + init_args: { + val_query_dataset: { + class_path: 'HfRetrievalQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'validation', + name: 'miracl-retrieval-query', + }, + }, + test_query_dataset: { + class_path: 'HfRetrievalQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'test', + name: 'miracl-retrieval-query', + }, + }, + doc_dataset: { + class_path: 'HfRetrievalDocDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'corpus', + name: 'miracl-retrieval-corpus', + }, + }, + "doc_chunk_size":10000 + }, + }, +} diff --git a/src/jmteb/configs/tasks/mldr_reranking.jsonnet b/src/jmteb/configs/tasks/mldr_reranking.jsonnet new file mode 100644 index 0000000..1cbc025 --- /dev/null +++ b/src/jmteb/configs/tasks/mldr_reranking.jsonnet @@ -0,0 +1,31 @@ +{ + mldr_reranking: { + class_path: 'RerankingEvaluator', + init_args: { + val_query_dataset: { + class_path: 'HfRerankingQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'validation', + name: 'mldr-reranking-query', + }, + }, + test_query_dataset: { + class_path: 'HfRerankingQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'test', + name: 'mldr-reranking-query', + }, + }, + doc_dataset: { + class_path: 'HfRerankingDocDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'corpus', + name: 'mldr-reranking-corpus', + }, + }, + }, + }, +} diff --git a/src/jmteb/configs/tasks/mldr_retrieval.jsonnet b/src/jmteb/configs/tasks/mldr_retrieval.jsonnet new file mode 100644 index 0000000..71c0bee --- /dev/null +++ b/src/jmteb/configs/tasks/mldr_retrieval.jsonnet @@ -0,0 +1,32 @@ +{ + mldr_retrieval: { + class_path: 'RetrievalEvaluator', + init_args: { + val_query_dataset: { + class_path: 'HfRetrievalQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'validation', + name: 'mldr-retrieval-query', + }, + }, + test_query_dataset: { + class_path: 'HfRetrievalQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'test', + name: 'mldr-retrieval-query', + }, + }, + doc_dataset: { + class_path: 'HfRetrievalDocDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'corpus', + name: 'mldr-retrieval-corpus', + }, + }, + "doc_chunk_size":10000 + }, + }, +} diff --git a/src/jmteb/configs/tasks/nlp_journal_abs_article.jsonnet b/src/jmteb/configs/tasks/nlp_journal_abs_article.jsonnet new file mode 100644 index 0000000..f2c175f --- /dev/null +++ b/src/jmteb/configs/tasks/nlp_journal_abs_article.jsonnet @@ -0,0 +1,31 @@ +{ + nlp_journal_abs_article: { + class_path: 'RetrievalEvaluator', + init_args: { + val_query_dataset: { + class_path: 'HfRetrievalQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'validation', + name: 'nlp_journal_abs_article-query', + }, + }, + test_query_dataset: { + class_path: 'HfRetrievalQueryDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'test', + name: 'nlp_journal_abs_article-query', + }, + }, + doc_dataset: { + class_path: 'HfRetrievalDocDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'corpus', + name: 'nlp_journal_abs_article-corpus', + }, + }, + }, + }, +} diff --git a/src/jmteb/configs/tasks/sib200_japanese_classification.jsonnet b/src/jmteb/configs/tasks/sib200_japanese_classification.jsonnet new file mode 100644 index 0000000..852505f --- /dev/null +++ b/src/jmteb/configs/tasks/sib200_japanese_classification.jsonnet @@ -0,0 +1,31 @@ +{ + sib200_japanese_classification: { + class_path: 'ClassificationEvaluator', + init_args: { + train_dataset: { + class_path: 'HfClassificationDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'train', + name: 'sib200_japanese_classification', + }, + }, + val_dataset: { + class_path: 'HfClassificationDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'validation', + name: 'sib200_japanese_classification', + }, + }, + test_dataset: { + class_path: 'HfClassificationDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'test', + name: 'sib200_japanese_classification', + }, + }, + }, + }, +} diff --git a/src/jmteb/configs/tasks/paws_x_ja.jsonnet b/src/jmteb/configs/tasks/sib200_japanese_clustering.jsonnet similarity index 53% rename from src/jmteb/configs/tasks/paws_x_ja.jsonnet rename to src/jmteb/configs/tasks/sib200_japanese_clustering.jsonnet index ee57b72..762d34a 100644 --- a/src/jmteb/configs/tasks/paws_x_ja.jsonnet +++ b/src/jmteb/configs/tasks/sib200_japanese_clustering.jsonnet @@ -1,21 +1,21 @@ { - paws_x_ja: { - class_path: 'PairClassificationEvaluator', + sib200_japanese_clustering: { + class_path: 'ClusteringEvaluator', init_args: { val_dataset: { - class_path: 'HfPairClassificationDataset', + class_path: 'HfClusteringDataset', init_args: { path: 'sbintuitions/JMTEB', split: 'validation', - name: 'paws_x_ja', + name: 'sib200_japanese_clustering', }, }, test_dataset: { - class_path: 'HfPairClassificationDataset', + class_path: 'HfClusteringDataset', init_args: { path: 'sbintuitions/JMTEB', split: 'test', - name: 'paws_x_ja', + name: 'sib200_japanese_clustering', }, }, }, diff --git a/src/jmteb/configs/tasks/wrime_classification.jsonnet b/src/jmteb/configs/tasks/wrime_classification.jsonnet new file mode 100644 index 0000000..7fb68b7 --- /dev/null +++ b/src/jmteb/configs/tasks/wrime_classification.jsonnet @@ -0,0 +1,31 @@ +{ + wrime_classification: { + class_path: 'ClassificationEvaluator', + init_args: { + train_dataset: { + class_path: 'HfClassificationDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'train', + name: 'wrime_classification', + }, + }, + val_dataset: { + class_path: 'HfClassificationDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'validation', + name: 'wrime_classification', + }, + }, + test_dataset: { + class_path: 'HfClassificationDataset', + init_args: { + path: 'sbintuitions/JMTEB', + split: 'test', + name: 'wrime_classification', + }, + }, + }, + }, +} diff --git a/src/jmteb/embedders/__init__.py b/src/jmteb/embedders/__init__.py index f28f038..5a3e19c 100644 --- a/src/jmteb/embedders/__init__.py +++ b/src/jmteb/embedders/__init__.py @@ -2,6 +2,8 @@ from jmteb.embedders.data_parallel_sbert_embedder import ( DataParallelSentenceBertEmbedder, ) +from jmteb.embedders.gemma_embedder import GemmaEmbedder from jmteb.embedders.openai_embedder import OpenAIEmbedder +from jmteb.embedders.plamo_embedder import PlamoEmbedder from jmteb.embedders.sbert_embedder import SentenceBertEmbedder from jmteb.embedders.transformers_embedder import TransformersEmbedder diff --git a/src/jmteb/embedders/base.py b/src/jmteb/embedders/base.py index ea078f1..42a5d54 100644 --- a/src/jmteb/embedders/base.py +++ b/src/jmteb/embedders/base.py @@ -144,3 +144,17 @@ def set_output_tensor(self): def set_output_numpy(self): self.convert_to_numpy = True self.convert_to_tensor = False + + def set_max_seq_length(self, max_seq_length: int | None = None) -> None: + if hasattr(self, "max_seq_length"): + self.max_seq_length = max_seq_length + else: + logger.warning("Embedder doesn't have a `max_seq_length` attribute!") + + def reset_max_seq_length(self): + orig_max_seq_length = getattr(self, "_orig_max_length", None) + if not orig_max_seq_length: + logger.warning("Failed to reset `max_seq_length`!") + else: + logger.info(f"Set `max_seq_length` to model default: {orig_max_seq_length}") + self.max_seq_length = orig_max_seq_length diff --git a/src/jmteb/embedders/data_parallel_sbert_embedder.py b/src/jmteb/embedders/data_parallel_sbert_embedder.py index 7416fe4..5d932e2 100644 --- a/src/jmteb/embedders/data_parallel_sbert_embedder.py +++ b/src/jmteb/embedders/data_parallel_sbert_embedder.py @@ -201,6 +201,7 @@ def __init__( ) self.dp_model = DPSentenceTransformer(sbert_model=model) self.model = self.dp_model.sbert + self._orig_max_length = self.model.max_seq_length if max_seq_length: self.model.max_seq_length = max_seq_length self.initital_batch_size = batch_size @@ -258,3 +259,7 @@ def _add_eos_func(self, text: str | list[str]) -> str | list[str]: def get_output_dim(self) -> int: return self.model.get_sentence_embedding_dimension() + + def reset_max_seq_length(self): + logger.info(f"Reset `max_seq_length` to {self._orig_max_length}") + self.model.max_seq_length = self._orig_max_length diff --git a/src/jmteb/embedders/gemma_embedder.py b/src/jmteb/embedders/gemma_embedder.py new file mode 100644 index 0000000..5949845 --- /dev/null +++ b/src/jmteb/embedders/gemma_embedder.py @@ -0,0 +1,219 @@ +from __future__ import annotations + +import numpy as np +import torch +from loguru import logger +from sentence_transformers import SentenceTransformer + +from jmteb.embedders.base import TextEmbedder + + +class GemmaEmbedder(TextEmbedder): + """ + Google EmbeddingGemma model embedder using SentenceTransformers. + + This class supports the EmbeddingGemma models from Google (e.g., embeddinggemma-300m). + It uses SentenceTransformers to load the model and provides specialized encode_query + and encode_document methods for optimal performance in different use cases. + """ + + def __init__( + self, + model_name_or_path: str = "google/embeddinggemma-300m", + batch_size: int = 32, + device: str | None = None, + normalize_embeddings: bool = True, + max_seq_length: int | None = None, + query_mode: bool = False, + add_eos: bool = False, + truncate_dim: int | None = None, + model_kwargs: dict | None = None, + tokenizer_kwargs: dict | None = None, + ) -> None: + """ + Initialize the EmbeddingGemma embedder using SentenceTransformers. + + Args: + model_name_or_path: Path or name of the EmbeddingGemma model + batch_size: Batch size for encoding + device: Device to use ('cuda', 'cpu', or None for auto) + normalize_embeddings: Whether to normalize embeddings (recommended for EmbeddingGemma) + max_seq_length: Maximum sequence length (default: model's max, typically 2048) + query_mode: Whether to use query encoding mode by default + add_eos: Whether to add EOS token to inputs + truncate_dim: Truncate embeddings to this dimension (supports 768, 512, 256, 128) + model_kwargs: Additional kwargs for model loading + tokenizer_kwargs: Additional kwargs for tokenizer loading + """ + model_kwargs = self._model_kwargs_parser(model_kwargs or {}) + + # Initialize SentenceTransformer + self.model = SentenceTransformer( + model_name_or_path, + trust_remote_code=True, + truncate_dim=truncate_dim, + model_kwargs=model_kwargs, + tokenizer_kwargs=tokenizer_kwargs or {}, + ) + + # Store original max length and set new one if provided + self._orig_max_length = self.model.max_seq_length + if max_seq_length: + self.model.max_seq_length = max_seq_length + + self.batch_size = batch_size + self.device = device + self.normalize_embeddings = normalize_embeddings + self.max_seq_length = getattr(self.model, "max_seq_length", None) + self.add_eos = add_eos + self.query_mode = query_mode + + # Set output format based on model kwargs + if model_kwargs and "torch_dtype" in model_kwargs: + self.set_output_tensor() + else: + self.set_output_numpy() + + logger.info(f"Loaded EmbeddingGemma model: {model_name_or_path}") + logger.info(f"Model device: {self.model.device}, Max seq length: {self.max_seq_length}") + + def encode(self, text: str | list[str], prefix: str | None = None, **kwargs) -> np.ndarray | torch.Tensor: + """ + Encode text into embeddings using EmbeddingGemma's specialized methods. + + This method is compatible with the base TextEmbedder interface and works + seamlessly with batch_encode_with_cache. + + Args: + text: Input text(s) to encode + prefix: Prefix to add to texts + **kwargs: Additional arguments (supports query_mode for specialized encoding) + + Returns: + Embeddings as numpy array or torch tensor + """ + if isinstance(text, str): + text = [text] + text_was_str = True + else: + text_was_str = False + + # Check for query_mode in kwargs, otherwise use instance default + use_query_mode = kwargs.get("query_mode", self.query_mode) + + # Apply prefix if provided + if prefix: + text = [prefix + t for t in text] + + if self.add_eos: + text = self._add_eos_func(text) + + # Use specialized encoding methods if available + if hasattr(self.model, "encode_query") and hasattr(self.model, "encode_document"): + if use_query_mode: + embeddings = self.model.encode_query(text) + else: + embeddings = self.model.encode_document(text) + + # Convert to appropriate format + if self.convert_to_numpy and isinstance(embeddings, torch.Tensor): + embeddings = embeddings.cpu().numpy() + elif not self.convert_to_numpy and isinstance(embeddings, np.ndarray): + embeddings = torch.from_numpy(embeddings) + else: + # Fallback to standard SentenceTransformer encode method + embeddings = self.model.encode( + text, + convert_to_numpy=self.convert_to_numpy, + convert_to_tensor=self.convert_to_tensor, + batch_size=self.batch_size, + device=self.device, + normalize_embeddings=self.normalize_embeddings, + **kwargs, + ) + + if text_was_str: + if isinstance(embeddings, np.ndarray) and embeddings.ndim > 1: + embeddings = embeddings[0] + elif isinstance(embeddings, torch.Tensor) and embeddings.ndim > 1: + embeddings = embeddings[0] + + return embeddings + + def encode_queries( + self, queries: str | list[str], prefix: str | None = None, **kwargs + ) -> np.ndarray | torch.Tensor: + """ + Convenience method to encode queries using query mode. + + Args: + queries: Query text(s) to encode + prefix: Prefix to add + **kwargs: Additional arguments + + Returns: + Query embeddings + """ + return self.encode(queries, prefix=prefix, query_mode=True, **kwargs) + + def encode_documents( + self, documents: str | list[str], prefix: str | None = None, **kwargs + ) -> np.ndarray | torch.Tensor: + """ + Convenience method to encode documents using document mode. + + Args: + documents: Document text(s) to encode + prefix: Prefix to add + **kwargs: Additional arguments + + Returns: + Document embeddings + """ + return self.encode(documents, prefix=prefix, query_mode=False, **kwargs) + + def set_query_mode(self, query_mode: bool = True) -> None: + """ + Set the default encoding mode. + + Args: + query_mode: True for query mode, False for document mode + """ + self.query_mode = query_mode + logger.info(f"Set default encoding mode to {'query' if query_mode else 'document'}") + + def _add_eos_func(self, text: str | list[str]) -> str | list[str]: + """Add EOS token to text if available.""" + try: + eos_token = getattr(self.model.tokenizer, "eos_token") + except AttributeError: + return text + + if isinstance(text, str): + return text + eos_token + elif isinstance(text, list): + return [t + eos_token for t in text] + return text + + def get_output_dim(self) -> int: + """Get the dimensionality of output embeddings.""" + return self.model.get_sentence_embedding_dimension() + + def set_max_seq_length(self, max_seq_length: int | None = None) -> None: + """Set maximum sequence length.""" + if max_seq_length: + self.model.max_seq_length = max_seq_length + self.max_seq_length = max_seq_length + logger.info(f"Set max_seq_length to {max_seq_length}") + + def reset_max_seq_length(self) -> None: + """Reset max sequence length to model's original value.""" + try: + logger.info(f"Reset max_seq_length to {self._orig_max_length}") + self.model.max_seq_length = self._orig_max_length + self.max_seq_length = self._orig_max_length + except AttributeError: + logger.warning("Failed to reset max_seq_length - original value not available") + + def __repr__(self) -> str: + return f"GemmaEmbedder(model='{self.model.model_name}', device='{self.model.device}')" diff --git a/src/jmteb/embedders/openai_embedder.py b/src/jmteb/embedders/openai_embedder.py index 6ea8b8f..631f0c6 100644 --- a/src/jmteb/embedders/openai_embedder.py +++ b/src/jmteb/embedders/openai_embedder.py @@ -1,9 +1,12 @@ from __future__ import annotations from dataclasses import dataclass +from os import PathLike +from pathlib import Path import numpy as np import tiktoken +import tqdm from loguru import logger from openai import OpenAI @@ -14,7 +17,7 @@ class OpenAIEmbedderConfig: max_output_dim: int encoder_name: str - max_token_length: int + max_seq_length: int OPENAI_EMBEDDERS = { @@ -28,7 +31,12 @@ class OpenAIEmbedderConfig: class OpenAIEmbedder(TextEmbedder): """Embedder via OpenAI API.""" - def __init__(self, model: str = "text-embedding-3-small", dim: int | None = None) -> None: + def __init__( + self, + model: str = "text-embedding-3-small", + dim: int | None = None, + max_seq_length: int | None = None, + ) -> None: """Setup. model and dim: see https://platform.openai.com/docs/models/embeddings `text-embedding-3-large` model: max 3072 dim @@ -44,13 +52,19 @@ def __init__(self, model: str = "text-embedding-3-small", dim: int | None = None Args: model (str, optional): Name of an OpenAI embedding model. Defaults to "text-embedding-3-small". dim (int, optional): Output dimension. Defaults to 1536. + max_seq_length (int, optional): Maximum length of sequences. Default to None. """ self.client = OpenAI() # API key written in .env assert model in OPENAI_EMBEDDERS.keys(), f"`model` must be one of {list(OPENAI_EMBEDDERS.keys())}!" self.model = model model_config = OPENAI_EMBEDDERS[model] self.encoding = tiktoken.get_encoding(model_config.encoder_name) - self.max_token_length = model_config.max_token_length + self._orig_max_length = model_config.max_seq_length + if max_seq_length: + self.max_seq_length = max_seq_length + else: + self.max_seq_length = model_config.max_seq_length + if not dim or model == "text-embedding-ada-002": self.dim = model_config.max_output_dim else: @@ -70,16 +84,22 @@ def encode(self, text: str | list[str], prefix: str | None = None) -> np.ndarray token_ids: list[int] = self.encode_and_truncate_text(text, prefix) else: token_ids: list[list[int]] = [self.encode_and_truncate_text(t, prefix) for t in text] - result = np.asarray( - [ - data.embedding - for data in self.client.embeddings.create( - input=token_ids, - model=self.model, - **kwargs, - ).data - ] - ) + try: + result = np.asarray( + [ + data.embedding + for data in self.client.embeddings.create( + input=token_ids, + model=self.model, + **kwargs, + ).data + ] + ) + except Exception as e: + logger.error(f"{len(text)=}") + logger.error(f"{len(token_ids)=}") + raise e + if result.shape[0] == 1: return result.reshape(-1) return result @@ -94,4 +114,86 @@ def encode_and_truncate_text(self, text: str, prefix: str | None = None) -> list text = " " logger.warning("Found empty string!") # Ignore prefix in OpenAIEmbedder - return self.encoding.encode(text)[: self.max_token_length] + return self.encoding.encode(text)[: self.max_seq_length] + + def _batch_encode_and_save_on_disk( + self, + text_list: list[str], + save_path: str | PathLike[str], + prefix: str | None = None, + batch_size: int = 256, + dtype: str = "float32", + **kwargs, + ) -> np.memmap: + """ + Encode a list of texts and save the embeddings on disk using memmap. + + Args: + text_list (list[str]): list of texts + save_path (str): path to save the embeddings + prefix (str, optional): the prefix to use for encoding. Default to None. + dtype (str, optional): data type. Defaults to "float32". + batch_size (int): batch size. Defaults to 64. + """ + + batch_size = 512 + num_samples = len(text_list) + output_dim = self.get_output_dim() + embeddings = np.memmap(save_path, dtype=dtype, mode="w+", shape=(num_samples, output_dim)) + + with tqdm.tqdm(total=num_samples, desc="Encoding") as pbar: + for i in range(0, num_samples, batch_size): + batch = text_list[i : i + batch_size] + try: + batch_embeddings: np.ndarray = self.encode(batch, prefix=prefix, **kwargs) + except Exception: + logger.error(f"{batch_size=}, {len(batch)=}") + logger.warning("Batch too large, retrying with batch size 16") + # Retry with batch size 16 + small_batch_size = 16 + batch_embeddings_list = [] + for j in range(0, len(batch), small_batch_size): + small_batch = batch[j : j + small_batch_size] + small_batch_embeddings = self.encode(small_batch, prefix=prefix, **kwargs) + batch_embeddings_list.append(small_batch_embeddings) + batch_embeddings = np.vstack(batch_embeddings_list) + embeddings[i : i + batch_size] = batch_embeddings + pbar.update(len(batch)) + + embeddings.flush() + return np.memmap(save_path, dtype=dtype, mode="r", shape=(num_samples, output_dim)) + + def batch_encode_with_cache( + self, + text_list: list[str], + prefix: str | None = None, + cache_path: str | PathLike[str] | None = None, + overwrite_cache: bool = False, + dtype: str = "float32", + **kwargs, + ) -> np.ndarray: + """ + Encode a list of texts and save the embeddings on disk using memmap if cache_path is provided. + + Args: + text_list (list[str]): list of texts + prefix (str, optional): the prefix to use for encoding. Default to None. + cache_path (str, optional): path to save the embeddings. Defaults to None. + overwrite_cache (bool, optional): whether to overwrite the cache. Defaults to False. + dtype (str, optional): data type. Defaults to "float32". + """ + + logger.warning(f"Encoding with OpenAI embedder. {kwargs=}") + if cache_path is None: + logger.info("Encoding embeddings") + return self.encode(text_list, prefix=prefix, **kwargs) + + if Path(cache_path).exists() and not overwrite_cache: + logger.info(f"Loading embeddings from {cache_path}") + return np.memmap(cache_path, dtype=dtype, mode="r", shape=(len(text_list), self.get_output_dim())) + + logger.info(f"Encoding and saving embeddings to {cache_path}") + embeddings = self._batch_encode_and_save_on_disk( + text_list, cache_path, prefix=prefix, batch_size=self._chunk_size, dtype=dtype, **kwargs + ) + return embeddings diff --git a/src/jmteb/embedders/plamo_embedder.py b/src/jmteb/embedders/plamo_embedder.py new file mode 100644 index 0000000..f2c6755 --- /dev/null +++ b/src/jmteb/embedders/plamo_embedder.py @@ -0,0 +1,251 @@ +import numpy as np +import torch +from loguru import logger +from transformers import AutoModel, AutoTokenizer, PreTrainedModel, PreTrainedTokenizer + +from jmteb.embedders.base import TextEmbedder + + +class PlamoEmbedder(TextEmbedder): + """ + PLaMO embedding model embedder with multi-GPU support. + + This class supports the PLaMO-Embedding-1B model from Preferred Networks. + It uses the model's specialized encode_query and encode_document methods + for optimal performance in different use cases. + """ + + def __init__( + self, + model_name_or_path: str = "pfnet/plamo-embedding-1b", + batch_size: int = 2, + device: str | None = None, + normalize_embeddings: bool = False, + max_seq_length: int | None = None, + query_mode: bool = False, + model_kwargs: dict = {}, + tokenizer_kwargs: dict = {}, + ) -> None: + """ + Initialize the PLaMO embedder. + + Args: + model_name_or_path: Path or name of the PLaMO model + batch_size: Batch size for encoding + device: Device to use ('cuda', 'cpu', or None for auto) + normalize_embeddings: Whether to normalize embeddings + max_seq_length: Maximum sequence length (default: model's max) + query_mode: Whether to use query encoding mode by default + model_kwargs: Additional kwargs for model loading + tokenizer_kwargs: Additional kwargs for tokenizer loading + """ + model_kwargs = self._model_kwargs_parser(model_kwargs) + + # Load model and tokenizer with trust_remote_code=True for PLaMO + self.model: PreTrainedModel = AutoModel.from_pretrained( + model_name_or_path, trust_remote_code=True, **model_kwargs + ) + self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained( + model_name_or_path, trust_remote_code=True, **tokenizer_kwargs + ) + + self.batch_size = batch_size + self.normalize_embeddings = normalize_embeddings + self.query_mode = query_mode + + # Set up device + if not device and torch.cuda.is_available(): + self.device = "cuda" + else: + self.device = device or "cpu" + + # Move model to device + self.model.to(self.device) + + # Enable simple multi-GPU support with DataParallel if multiple GPUs available + if torch.cuda.device_count() > 1 and self.device == "cuda": + logger.info(f"Using {torch.cuda.device_count()} GPUs with DataParallel") + self.model = torch.nn.DataParallel(self.model) + self.is_data_parallel = True + self.distributed_state = True # For compatibility with tests + else: + self.is_data_parallel = False + self.distributed_state = None + + # Store the device for easy access + self.model_device = next(self.model.parameters()).device + logger.info(f"Model device: {self.model_device}, GPU count: {torch.cuda.device_count()}") + + # Set up sequence length + self._orig_max_length = getattr( + self.model.config if not self.is_data_parallel else self.model.module.config, + "max_position_embeddings", + 4096, + ) + self.max_seq_length = max_seq_length or self._orig_max_length + + # PLaMO-Embedding-1B has 2048 embedding dimensions + self.output_dim = getattr( + self.model.config if not self.is_data_parallel else self.model.module.config, "hidden_size", 2048 + ) + + # Set output format based on model kwargs + if "torch_dtype" in model_kwargs: + self.set_output_tensor() + else: + self.set_output_numpy() + + def get_output_dim(self) -> int: + """Get the dimensionality of output embeddings.""" + return self.output_dim + + def encode(self, text: str | list[str], prefix: str | None = None, **kwargs) -> np.ndarray | torch.Tensor: + """ + Encode text into embeddings using PLaMO's specialized methods. + + This method is compatible with the base TextEmbedder interface and works + seamlessly with batch_encode_with_cache. + + Args: + text: Input text(s) to encode + prefix: Prefix to add to texts + **kwargs: Additional arguments (supports query_mode for specialized encoding) + + Returns: + Embeddings as numpy array or torch tensor + """ + if isinstance(text, str): + text = [text] + text_was_str = True + else: + text_was_str = False + + # Check for query_mode in kwargs, otherwise use instance default + use_query_mode = kwargs.get("query_mode", self.query_mode) + + # Apply prefix if provided + if prefix: + text = [prefix + t for t in text] + + # Encode using PLaMO's specialized methods + with torch.inference_mode(): + embeddings = self._encode_batch(text, use_query_mode) + + # Apply normalization if requested + if self.normalize_embeddings: + embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1) + + if text_was_str: + res = embeddings.view(-1) + else: + res = embeddings + + if self.convert_to_numpy: + return res.cpu().numpy() if res.is_cuda else res.numpy() + else: + return res + + def _encode_batch(self, text: list[str], query_mode: bool = False) -> torch.Tensor: + """ + Encode a batch of texts using PLaMO's specialized methods with memory optimization. + + Args: + text: List of texts to encode + query_mode: Whether to use query or document encoding + + Returns: + Batch embeddings as torch tensor + """ + if len(text) == 0: + return torch.empty(0, self.output_dim, device=self.model_device) + + # Process in reasonable chunks for PLaMO + chunk_size = self.batch_size + all_embeddings = [] + + # Get the actual model (handle DataParallel wrapper) + actual_model = self.model.module if self.is_data_parallel else self.model + + with torch.inference_mode(): + for i in range(0, len(text), chunk_size): + chunk = text[i : i + chunk_size] + + try: + if query_mode: + # Use PLaMO's encode_query method for queries + chunk_embeddings = actual_model.encode_query(chunk, self.tokenizer) + else: + # Use PLaMO's encode_document method for documents + chunk_embeddings = actual_model.encode_document(chunk, self.tokenizer) + + # Keep embeddings on device + all_embeddings.append(chunk_embeddings) + + except torch.cuda.OutOfMemoryError: + # If still OOM, try processing one by one + logger.warning(f"OOM with chunk size {len(chunk)}, falling back to single item processing") + torch.cuda.empty_cache() + + for single_text in chunk: + if query_mode: + single_embedding = actual_model.encode_query([single_text], self.tokenizer) + else: + single_embedding = actual_model.encode_document([single_text], self.tokenizer) + all_embeddings.append(single_embedding) + torch.cuda.empty_cache() + + # Concatenate all embeddings + if all_embeddings: + return torch.cat(all_embeddings, dim=0) + else: + return torch.empty(0, self.output_dim, device=self.model_device) + + def encode_queries( + self, queries: str | list[str], prefix: str | None = None, **kwargs + ) -> np.ndarray | torch.Tensor: + """ + Convenience method to encode queries using query mode. + + Args: + queries: Query text(s) to encode + prefix: Prefix to add + **kwargs: Additional arguments + + Returns: + Query embeddings + """ + return self.encode(queries, prefix=prefix, query_mode=True, **kwargs) + + def encode_documents( + self, documents: str | list[str], prefix: str | None = None, **kwargs + ) -> np.ndarray | torch.Tensor: + """ + Convenience method to encode documents using document mode. + + Args: + documents: Document text(s) to encode + prefix: Prefix to add + **kwargs: Additional arguments + + Returns: + Document embeddings + """ + return self.encode(documents, prefix=prefix, query_mode=False, **kwargs) + + def set_query_mode(self, query_mode: bool = True) -> None: + """ + Set the default encoding mode. + + Args: + query_mode: True for query mode, False for document mode + """ + self.query_mode = query_mode + logger.info(f"Set default encoding mode to {'query' if query_mode else 'document'}") + + def reset_max_seq_length(self) -> None: + """Reset max sequence length to model's original value.""" + if hasattr(self, "_orig_max_length") and self._orig_max_length: + self.max_seq_length = self._orig_max_length + logger.info(f"Reset max_seq_length to {self._orig_max_length}") + else: + logger.warning("Failed to reset max_seq_length - original value not available") diff --git a/src/jmteb/embedders/sbert_embedder.py b/src/jmteb/embedders/sbert_embedder.py index ba33a36..892f703 100644 --- a/src/jmteb/embedders/sbert_embedder.py +++ b/src/jmteb/embedders/sbert_embedder.py @@ -1,6 +1,7 @@ from __future__ import annotations import numpy as np +from loguru import logger from sentence_transformers import SentenceTransformer from jmteb.embedders.base import TextEmbedder @@ -29,6 +30,7 @@ def __init__( model_kwargs=model_kwargs, # https://github.com/UKPLab/sentence-transformers/blob/84f69fee6dcde023f46a8807e89bc99a7700ba82/sentence_transformers/SentenceTransformer.py#L81-L105 # noqa: E501 tokenizer_kwargs=tokenizer_kwargs, ) + self._orig_max_length = self.model.max_seq_length if max_seq_length: self.model.max_seq_length = max_seq_length @@ -70,3 +72,10 @@ def _add_eos_func(self, text: str | list[str]) -> str | list[str]: def get_output_dim(self) -> int: return self.model.get_sentence_embedding_dimension() + + def reset_max_seq_length(self): + try: + logger.info(f"Reset `max_seq_length` to {self._orig_max_length}") + self.model.max_seq_length = self._orig_max_length + except AttributeError: + pass diff --git a/src/jmteb/embedders/transformers_embedder.py b/src/jmteb/embedders/transformers_embedder.py index 0592061..721e0c9 100644 --- a/src/jmteb/embedders/transformers_embedder.py +++ b/src/jmteb/embedders/transformers_embedder.py @@ -48,6 +48,7 @@ def __init__( logger.info(f"{self.model.device=}, {torch.cuda.device_count()=}") self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(model_name_or_path, **tokenizer_kwargs) + self._orig_max_length = getattr(self.model, "max_seq_length", None) self.max_seq_length = getattr(self.model, "max_seq_length", None) if max_seq_length: self.max_seq_length = max_seq_length @@ -135,7 +136,9 @@ def _encode_batch(self, text: list[str], prefix: str | None = None) -> torch.Ten if self.add_eos: text = self._add_eos_func(text) - encoded_input = self.tokenizer(text, padding=True, truncation=True, return_tensors="pt").to(self.model.device) + encoded_input = self.tokenizer( + text, padding=True, truncation=True, return_tensors="pt", max_length=self.max_seq_length + ).to(self.model.device) model_output = self.model(**encoded_input) last_hidden_states = model_output["last_hidden_state"] features = { diff --git a/src/jmteb/evaluators/classification/evaluator.py b/src/jmteb/evaluators/classification/evaluator.py index c2b8836..bb3a4ca 100644 --- a/src/jmteb/evaluators/classification/evaluator.py +++ b/src/jmteb/evaluators/classification/evaluator.py @@ -66,13 +66,22 @@ def __call__( if cache_dir is not None: Path(cache_dir).mkdir(parents=True, exist_ok=True) + # Auto-optimize for PlamoEmbedder if no explicit kwargs provided + encode_kwargs = self.encode_kwargs.copy() + + # Check if this is a PlamoEmbedder and set optimal encoding mode + if model.__class__.__name__ in ("PlamoEmbedder", "GemmaEmbedder"): + if "query_mode" not in encode_kwargs: + encode_kwargs["query_mode"] = False # Use document mode for classification texts + logger.info(f"Auto-optimized {model.__class__.__name__}: query_mode=False for classification texts") + logger.info("Encoding training and validation sentences...") X_train = model.batch_encode_with_cache( [item.text for item in self.train_dataset], prefix=self.prefix, cache_path=Path(cache_dir) / "train_embeddings.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.encode_kwargs, + **encode_kwargs, ) y_train = [item.label for item in self.train_dataset] @@ -81,7 +90,7 @@ def __call__( prefix=self.prefix, cache_path=Path(cache_dir) / "val_embeddings.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.encode_kwargs, + **encode_kwargs, ) y_val = [item.label for item in self.val_dataset] @@ -95,7 +104,7 @@ def __call__( prefix=self.prefix, cache_path=Path(cache_dir) / "test_embeddings.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.encode_kwargs, + **encode_kwargs, ) y_test = [item.label for item in self.test_dataset] diff --git a/src/jmteb/evaluators/clustering/evaluator.py b/src/jmteb/evaluators/clustering/evaluator.py index 2b8cdf2..bbce269 100644 --- a/src/jmteb/evaluators/clustering/evaluator.py +++ b/src/jmteb/evaluators/clustering/evaluator.py @@ -14,6 +14,7 @@ MiniBatchKMeans, ) from sklearn.metrics import homogeneity_completeness_v_measure +from sklearn.preprocessing import normalize from jmteb.embedders.base import TextEmbedder from jmteb.evaluators.base import EmbeddingEvaluator, EvaluationResults @@ -57,13 +58,22 @@ def __call__( if cache_dir is not None: Path(cache_dir).mkdir(parents=True, exist_ok=True) + # Auto-optimize for PlamoEmbedder if no explicit kwargs provided + encode_kwargs = self.encode_kwargs.copy() + + # Check if this is a PlamoEmbedder and set optimal encoding mode + if model.__class__.__name__ in ("PlamoEmbedder", "GemmaEmbedder"): + if "query_mode" not in encode_kwargs: + encode_kwargs["query_mode"] = False # Use document mode for clustering texts + logger.info(f"Auto-optimized {model.__class__.__name__}: query_mode=False for clustering texts") + logger.info("Converting validation data to embeddings...") val_embeddings = model.batch_encode_with_cache( [item.text for item in self.val_dataset], prefix=self.prefix, cache_path=Path(cache_dir) / "val_embeddings.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.encode_kwargs, + **encode_kwargs, ) val_labels = [item.label for item in self.val_dataset] @@ -77,7 +87,7 @@ def __call__( prefix=self.prefix, cache_path=Path(cache_dir) / "test_embeddings.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.encode_kwargs, + **encode_kwargs, ) test_labels = [item.label for item in self.test_dataset] @@ -127,7 +137,19 @@ def __call__( def _evaluate_clustering_model( embeddings: np.ndarray, y_true: list[int], clustering_model: ClusterMixin ) -> tuple[dict[str, float], list[int]]: - y_pred = clustering_model.fit_predict(embeddings) + try: + # First try without normalization to preserve original behavior when possible + y_pred = clustering_model.fit_predict(embeddings) + except ValueError as e: + # If overflow error occurs, apply normalization and retry + if "infinity" in str(e).lower() or "too large" in str(e).lower(): + logger.warning(f"Overflow detected in clustering, applying L2 normalization: {e}") + embeddings_normalized = normalize(embeddings, norm="l2") + y_pred = clustering_model.fit_predict(embeddings_normalized) + else: + # Re-raise if it's a different ValueError + raise e + h_score, c_score, v_score = homogeneity_completeness_v_measure( labels_pred=y_pred, labels_true=np.array(y_true) ) diff --git a/src/jmteb/evaluators/pair_classification/evaluator.py b/src/jmteb/evaluators/pair_classification/evaluator.py index ef466bf..8fba017 100644 --- a/src/jmteb/evaluators/pair_classification/evaluator.py +++ b/src/jmteb/evaluators/pair_classification/evaluator.py @@ -49,8 +49,19 @@ def __call__( if cache_dir is not None: Path(cache_dir).mkdir(parents=True, exist_ok=True) + # Auto-optimize for PlamoEmbedder if no explicit kwargs provided + encode_kwargs = self.encode_kwargs.copy() + + # Check if this is a PlamoEmbedder and set optimal encoding mode + if model.__class__.__name__ in ("PlamoEmbedder", "GemmaEmbedder"): + if "query_mode" not in encode_kwargs: + encode_kwargs["query_mode"] = False # Use document mode for pair classification texts + from loguru import logger + + logger.info(f"Auto-optimized {model.__class__.__name__}: query_mode=False for pair classification texts") + val_embeddings1, val_embeddings2, val_golden_labels = self._convert_to_embeddings( - model, self.val_dataset, "dev", overwrite_cache, cache_dir + model, self.val_dataset, "dev", overwrite_cache, cache_dir, encode_kwargs ) if self.val_dataset == self.test_dataset: test_embeddings1, test_embeddings2, test_golden_labels = ( @@ -60,7 +71,7 @@ def __call__( ) else: test_embeddings1, test_embeddings2, test_golden_labels = self._convert_to_embeddings( - model, self.test_dataset, "test", overwrite_cache, cache_dir + model, self.test_dataset, "test", overwrite_cache, cache_dir, encode_kwargs ) val_results = {} @@ -119,20 +130,24 @@ def _convert_to_embeddings( split: str = "test", overwrite_cache: bool = False, cache_dir: str | None = None, + encode_kwargs: dict | None = None, ) -> tuple[np.ndarray, np.ndarray, list[float]]: + if encode_kwargs is None: + encode_kwargs = self.encode_kwargs + embeddings1 = model.batch_encode_with_cache( [item.sentence1 for item in dataset], prefix=self.sentence1_prefix, cache_path=Path(cache_dir) / f"{split}_embeddings1.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.encode_kwargs, + **encode_kwargs, ) embeddings2 = model.batch_encode_with_cache( [item.sentence2 for item in dataset], prefix=self.sentence2_prefix, cache_path=Path(cache_dir) / f"{split}_embeddings2.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.encode_kwargs, + **encode_kwargs, ) golden_labels = [item.label for item in dataset] return embeddings1, embeddings2, golden_labels diff --git a/src/jmteb/evaluators/reranking/evaluator.py b/src/jmteb/evaluators/reranking/evaluator.py index 144ed36..0d1be95 100644 --- a/src/jmteb/evaluators/reranking/evaluator.py +++ b/src/jmteb/evaluators/reranking/evaluator.py @@ -38,6 +38,8 @@ class RerankingEvaluator(EmbeddingEvaluator): query_prefix (str | None): prefix for queries. Defaults to None. doc_prefix (str | None): prefix for documents. Defaults to None. log_predictions (bool): whether to log predictions of each datapoint. Defaults to False. + force_max_length (bool): whether to overwrite the global max_length with model's maximum token length. + Defaults to False. top_n_docs_to_log (int): log only top n documents. Defaults to 5. query_encode_kwargs (dict): kwargs passed to embedder's encode function when encoding queries. Defaults to {}. doc_encode_kwargs (dict): kwargs passed to embedder's encode function when encoding documents. Defaults to {}. @@ -53,6 +55,7 @@ def __init__( doc_prefix: str | None = None, log_predictions: bool = False, top_n_docs_to_log: int = 5, + force_max_length: bool = False, query_encode_kwargs: dict = {}, doc_encode_kwargs: dict = {}, ) -> None: @@ -65,6 +68,7 @@ def __init__( self.doc_prefix = doc_prefix self.log_predictions = log_predictions self.top_n_docs_to_log = top_n_docs_to_log + self.force_max_length = force_max_length self.query_encode_kwargs = query_encode_kwargs self.doc_encode_kwargs = doc_encode_kwargs @@ -75,15 +79,33 @@ def __call__( overwrite_cache: bool = False, ) -> EvaluationResults: model.set_output_tensor() + if self.force_max_length: + model.reset_max_seq_length() + if cache_dir is not None: Path(cache_dir).mkdir(parents=True, exist_ok=True) + # Auto-optimize for PlamoEmbedder if no explicit kwargs provided + query_kwargs = self.query_encode_kwargs.copy() + doc_kwargs = self.doc_encode_kwargs.copy() + + # Check if this is a PlamoEmbedder and set optimal encoding modes + if model.__class__.__name__ in ("PlamoEmbedder", "GemmaEmbedder"): + if "query_mode" not in query_kwargs: + query_kwargs["query_mode"] = True # Use query mode for queries + if "query_mode" not in doc_kwargs: + doc_kwargs["query_mode"] = False # Use document mode for docs + logger.info( + f"Auto-optimized {model.__class__.__name__}: query_mode=True for queries," + "query_mode=False for documents" + ) + val_query_embeddings = model.batch_encode_with_cache( text_list=[item.query for item in self.val_query_dataset], prefix=self.query_prefix, cache_path=Path(cache_dir) / "val_query.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.query_encode_kwargs, + **query_kwargs, ) if self.val_query_dataset == self.test_query_dataset: test_query_embeddings = val_query_embeddings @@ -93,14 +115,14 @@ def __call__( prefix=self.query_prefix, cache_path=Path(cache_dir) / "test_query.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.query_encode_kwargs, + **query_kwargs, ) doc_embeddings = model.batch_encode_with_cache( text_list=[item.text for item in self.doc_dataset], prefix=self.doc_prefix, cache_path=Path(cache_dir) / "corpus.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.doc_encode_kwargs, + **doc_kwargs, ) logger.info("Start reranking") @@ -211,8 +233,6 @@ def _format_predictions( pred_docs: list[RerankingDoc] = [ doc_dataset[doc_dataset.docid_to_idx[pred_docid]] for pred_docid in pred_docids ] - logger.info(f"{golden_docs=}") - logger.info(f"{pred_docs=}") prediction = RerankingPrediction( query=q.query, relevant_docs=golden_docs, diff --git a/src/jmteb/evaluators/retrieval/evaluator.py b/src/jmteb/evaluators/retrieval/evaluator.py index 2fd6a21..fc7476e 100644 --- a/src/jmteb/evaluators/retrieval/evaluator.py +++ b/src/jmteb/evaluators/retrieval/evaluator.py @@ -41,6 +41,8 @@ class RetrievalEvaluator(EmbeddingEvaluator): query_prefix (str | None): prefix for queries. Defaults to None. doc_prefix (str | None): prefix for documents. Defaults to None. log_predictions (bool): whether to log predictions of each datapoint. Defaults to False. + force_max_length (bool): whether to overwrite the global max_length with model's maximum token length. + Defaults to False. top_n_docs_to_log (int): log only top n documents that are predicted as relevant. Defaults to 5. query_encode_kwargs (dict): kwargs passed to embedder's encode function when encoding queries. Defaults to {}. doc_encode_kwargs (dict): kwargs passed to embedder's encode function when encoding documents. Defaults to {}. @@ -58,6 +60,7 @@ def __init__( doc_prefix: str | None = None, log_predictions: bool = False, top_n_docs_to_log: int = 5, + force_max_length: bool = False, query_encode_kwargs: dict = {}, doc_encode_kwargs: dict = {}, ) -> None: @@ -67,7 +70,7 @@ def __init__( self.doc_chunk_size = doc_chunk_size - self.accuracy_at_k = accuracy_at_k or [1, 3, 5, 10] + self.accuracy_at_k = accuracy_at_k or [1, 3, 5, 10, 20, 30, 50] self.ndcg_at_k = ndcg_at_k or [10] self.max_top_k = max(sum([self.accuracy_at_k, self.ndcg_at_k], [])) self.main_metric = f"ndcg@{self.ndcg_at_k[0]}" @@ -76,6 +79,7 @@ def __init__( self.doc_prefix = doc_prefix self.log_predictions = log_predictions self.top_n_docs_to_log = top_n_docs_to_log + self.force_max_length = force_max_length self.query_encode_kwargs = query_encode_kwargs self.doc_encode_kwargs = doc_encode_kwargs @@ -86,15 +90,32 @@ def __call__( overwrite_cache: bool = False, ) -> EvaluationResults: model.set_output_tensor() + if self.force_max_length: + model.reset_max_seq_length() if cache_dir is not None: Path(cache_dir).mkdir(parents=True, exist_ok=True) + # Auto-optimize for PlamoEmbedder if no explicit kwargs provided + query_kwargs = self.query_encode_kwargs.copy() + doc_kwargs = self.doc_encode_kwargs.copy() + + # Check if this is a PlamoEmbedder and set optimal encoding modes + if model.__class__.__name__ in ("PlamoEmbedder", "GemmaEmbedder"): + if "query_mode" not in query_kwargs: + query_kwargs["query_mode"] = True # Use query mode for queries + if "query_mode" not in doc_kwargs: + doc_kwargs["query_mode"] = False # Use document mode for docs + logger.info( + f"Auto-optimized {model.__class__.__name__}: query_mode=True for queries," + "query_mode=False for documents" + ) + val_query_embeddings = model.batch_encode_with_cache( text_list=[item.query for item in self.val_query_dataset], prefix=self.query_prefix, cache_path=Path(cache_dir) / "val_query.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.query_encode_kwargs, + **query_kwargs, ) if self.val_query_dataset == self.test_query_dataset: test_query_embeddings = val_query_embeddings @@ -104,7 +125,7 @@ def __call__( prefix=self.query_prefix, cache_path=Path(cache_dir) / "test_query.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.query_encode_kwargs, + **query_kwargs, ) doc_embeddings = model.batch_encode_with_cache( @@ -112,7 +133,7 @@ def __call__( prefix=self.doc_prefix, cache_path=Path(cache_dir) / "corpus.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.doc_encode_kwargs, + **doc_kwargs, ) logger.info("Start retrieval") diff --git a/src/jmteb/evaluators/sts/evaluator.py b/src/jmteb/evaluators/sts/evaluator.py index 380ceea..f4d4359 100644 --- a/src/jmteb/evaluators/sts/evaluator.py +++ b/src/jmteb/evaluators/sts/evaluator.py @@ -52,8 +52,17 @@ def __call__( if cache_dir is not None: Path(cache_dir).mkdir(parents=True, exist_ok=True) + # Auto-optimize for PlamoEmbedder if no explicit kwargs provided + encode_kwargs = self.encode_kwargs.copy() + + # # Check if this is a PlamoEmbedder and set optimal encoding mode + # if model.__class__.__name__ == "PlamoEmbedder": + # if "query_mode" not in encode_kwargs: + # encode_kwargs["query_mode"] = False # Use document mode for STS texts + # logger.info("Auto-optimized PlamoEmbedder: query_mode=False for STS texts") + val_embeddings1, val_embeddings2, val_golden_scores = self._convert_to_embeddings( - model, self.val_dataset, "dev", overwrite_cache, cache_dir + model, self.val_dataset, "dev", overwrite_cache, cache_dir, encode_kwargs ) if self.val_dataset == self.test_dataset: test_embeddings1, test_embeddings2, test_golden_scores = ( @@ -62,7 +71,7 @@ def __call__( val_golden_scores, ) test_embeddings1, test_embeddings2, test_golden_scores = self._convert_to_embeddings( - model, self.test_dataset, "test", overwrite_cache, cache_dir + model, self.test_dataset, "test", overwrite_cache, cache_dir, encode_kwargs ) similarity_functions = { @@ -146,20 +155,24 @@ def _convert_to_embeddings( split: str = "test", overwrite_cache: bool = False, cache_dir: str | None = None, + encode_kwargs: dict | None = None, ) -> tuple[Tensor, Tensor, list[float]]: + if encode_kwargs is None: + encode_kwargs = self.encode_kwargs + embeddings1 = model.batch_encode_with_cache( [item.sentence1 for item in dataset], prefix=self.sentence1_prefix, cache_path=Path(cache_dir) / f"{split}_embeddings1.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.encode_kwargs, + **encode_kwargs, ) embeddings2 = model.batch_encode_with_cache( [item.sentence2 for item in dataset], prefix=self.sentence2_prefix, cache_path=Path(cache_dir) / f"{split}_embeddings2.bin" if cache_dir is not None else None, overwrite_cache=overwrite_cache, - **self.encode_kwargs, + **encode_kwargs, ) device = "cuda" if torch.cuda.is_available() else "cpu" embeddings1 = convert_to_tensor(embeddings1, device) diff --git a/src/jmteb/utils/score_recorder.py b/src/jmteb/utils/score_recorder.py index afbf22c..361c809 100644 --- a/src/jmteb/utils/score_recorder.py +++ b/src/jmteb/utils/score_recorder.py @@ -56,8 +56,21 @@ def record_predictions(self, results: EvaluationResults, dataset_name: str, task def record_summary(self): if not self.save_dir: return - summary: dict[str, dict[str, dict[str, float]]] = defaultdict(dict) + + summary_path = Path(self.save_dir) / "summary.json" + + # Load existing summary if it exists + if summary_path.exists(): + with open(summary_path, "r") as fin: + summary = json.load(fin) + else: + summary = {} + + # Merge new results into existing summary for task_name, task_scores in self.scores.items(): + if task_name not in summary: + summary[task_name] = {} for dataset_name, results in self.scores[task_name].items(): summary[task_name][dataset_name] = {results.metric_name: results.metric_value} - self.save_to_json(summary, Path(self.save_dir) / "summary.json") + + self.save_to_json(summary, summary_path) diff --git a/tests/embedders/test_openai.py b/tests/embedders/test_openai.py index 448dfaf..c1429da 100644 --- a/tests/embedders/test_openai.py +++ b/tests/embedders/test_openai.py @@ -75,9 +75,7 @@ def test_token_count(self): def test_truncate(self): assert len(self.model.encode_and_truncate_text(TEXT)) == 6 - assert ( - len(self.model.encode_and_truncate_text(TEXT * self.model.max_token_length)) == self.model.max_token_length - ) + assert len(self.model.encode_and_truncate_text(TEXT * self.model.max_seq_length)) == self.model.max_seq_length def test_nonexistent_model(self): with pytest.raises(AssertionError): @@ -89,9 +87,9 @@ def test_model_dim(self): assert OpenAIEmbedder(model="text-embedding-ada-002").dim == 1536 def test_model_max_token_length(self): - assert OpenAIEmbedder(model="text-embedding-3-large").max_token_length == 8191 - assert OpenAIEmbedder(model="text-embedding-3-small").max_token_length == 8191 - assert OpenAIEmbedder(model="text-embedding-ada-002").max_token_length == 8191 + assert OpenAIEmbedder(model="text-embedding-3-large").max_seq_length == 8191 + assert OpenAIEmbedder(model="text-embedding-3-small").max_seq_length == 8191 + assert OpenAIEmbedder(model="text-embedding-ada-002").max_seq_length == 8191 def test_model_encoder(self): assert OpenAIEmbedder(model="text-embedding-3-large").encoding.name == "cl100k_base"