diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4133e1d..088b834 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -38,7 +38,7 @@ jobs: cache: "poetry" - name: Install dependencies - run: poetry install + run: poetry install --all-extras - name: Download unidic artifacts run: | diff --git a/Dockerfile b/Dockerfile index 93a5aba..2643fea 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,36 +1,58 @@ -FROM nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04 as base - -### Install python 3.10 and set it as default python interpreter -RUN apt update && apt install software-properties-common -y && \ -add-apt-repository ppa:deadsnakes/ppa -y && apt update && \ -apt install curl python3.10 build-essential vim -y && \ -update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \ -update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \ -apt install python3.10-venv python3.10-dev -y && \ -curl -Ss https://bootstrap.pypa.io/get-pip.py | python3.10 && \ -apt-get clean && rm -rf /var/lib/apt/lists/ - -FROM base as build - -WORKDIR /tmp - -RUN pip install poetry - -COPY ./pyproject.toml ./poetry.lock* /tmp/ - -RUN poetry export -f requirements.txt --output requirements.txt --without-hashes - -FROM base as runtime +# Use CUDA 12.1 for better compatibility with PyTorch 2.6+ +FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04 AS base + +# Prevent interactive prompts during package installation +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 + +# Install Python 3.11 and essential tools +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.11 \ + python3.11-venv \ + python3.11-dev \ + python3-pip \ + build-essential \ + curl \ + git \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \ + && update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 \ + && curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 \ + && pip install --no-cache-dir --upgrade pip setuptools wheel \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Build stage: export dependencies +FROM base AS builder + +WORKDIR /build + +# Install poetry +RUN pip install --no-cache-dir poetry==1.8.2 + +# Copy only dependency files first for better layer caching +COPY pyproject.toml poetry.lock* ./ + +# Export dependencies to requirements.txt +RUN poetry export -f requirements.txt --output requirements.txt --without-hashes --only main + +# Runtime stage: install application +FROM base AS runtime WORKDIR /app -COPY --from=build /tmp/requirements.txt /code/requirements.txt +# Copy application code and dependency files +COPY src ./src +COPY README.md pyproject.toml poetry.lock* ./ -RUN pip install --no-cache-dir -r /code/requirements.txt -RUN python -m unidic download +# Install poetry temporarily to handle the installation +RUN pip install --no-cache-dir --upgrade pip setuptools wheel \ + && pip install --no-cache-dir poetry==1.8.2 \ + && poetry config virtualenvs.create false \ + && poetry install --only main --no-interaction --no-ansi \ + && pip uninstall -y poetry -# アプリケーションのコードをコピー -COPY src /app/src -COPY README.md /app/README.md -COPY ./pyproject.toml /app/ -RUN pip install --no-cache-dir -e . +# Set default command to run JMTEB +CMD ["python", "-m", "jmteb.v2"] diff --git a/README.md b/README.md index c2cb6ed..ae27793 100644 --- a/README.md +++ b/README.md @@ -1,87 +1,514 @@ -# JMTEB: Japanese Massive Text Embedding Benchmark +# JMTEB v2.0 - MTEB-Powered Japanese Text Embedding Benchmark -

-

- README | - leaderboard | - submission guideline -

-

+JMTEB v2.0 is a major update to the Japanese Massive Text Embedding Benchmark that integrates with the [MTEB (Massive Text Embedding Benchmark)](https://github.com/embeddings-benchmark/mteb) framework. -[JMTEB](https://huggingface.co/datasets/sbintuitions/JMTEB) is a benchmark for evaluating Japanese text embedding models. It consists of 5 tasks. +## Overview -This is an easy-to-use evaluation script designed for JMTEB evaluation. +JMTEB v2.0 provides: -JMTEB leaderboard is [here](leaderboard.md). If you would like to submit your model, please refer to the [submission guideline](submission.md). +- 🌐 **MTEB Compatibility**: Integration with MTEB tools, leaderboards, and ecosystem +- 🚀 **MTEB as Evaluation Engine**: Leverage MTEB's robust framework +- 🎯 **Simpler API**: Cleaner, more intuitive interface +- 📊 **28 Japanese Datasets**: Comprehensive evaluation across 5 task types +- ⚡ **High Performance**: Efficient caching and batch processing -## Quick start +> [!IMPORTANT] +> The leaderboard is now hosted on the [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard) (General Purpose → Language-specific → Japanese). We no longer maintain a separate leaderboard in this repository. + +## Quick Start + +### Installation + +**From source (Poetry):** ```bash -git clone git@github.com:sbintuitions/JMTEB +# Clone the repository +git clone https://github.com/sbintuitions/JMTEB.git cd JMTEB + +# Default (v2.0 with OpenAI support) poetry install -poetry run pytest tests -``` -The following command evaluate the specified model on the all the tasks in JMTEB. +# With v1.x support +poetry install --extras v1 -```bash -poetry run python -m jmteb \ - --embedder SentenceBertEmbedder \ - --embedder.model_name_or_path "" \ - --save_dir "output/" +# With everything +poetry install --all-extras ``` > [!NOTE] -> In order to gurantee the robustness of evaluation, a validation dataset is mandatorily required for hyperparameter tuning. -> For a dataset that doesn't have a validation set, we set the validation set the same as the test set. +> The package is not yet available on PyPI. Please install from source using the commands above. + +### Basic Usage + +```python +from jmteb.v2 import JMTEBModel, JMTEBV2Evaluator +from jmteb.v2.tasks import get_jmteb_tasks, get_jmteb_lite_benchmark + +# Create model +model = JMTEBModel.from_sentence_transformer("cl-nagoya/ruri-v3-30m") -By default, the evaluation tasks are read from `src/jmteb/configs/jmteb.jsonnet`. -If you want to evaluate the model on a specific task, you can specify the task via `--evaluators` option with the task config. +# Get datasets - Full JMTEB +tasks = get_jmteb_tasks() # All 28 JMTEB datasets + +# Or use JMTEB-lite for faster evaluation +# lite_benchmark = get_jmteb_lite_benchmark() +# tasks = lite_benchmark.tasks + +# Evaluate +evaluator = JMTEBV2Evaluator( + model=model, + tasks=tasks, + save_path="results_v2" +) +results = evaluator.run() +``` + +### CLI Usage ```bash -poetry run python -m jmteb \ - --evaluators "src/jmteb/configs/tasks/jsts.jsonnet" \ - --embedder SentenceBertEmbedder \ - --embedder.model_name_or_path "" \ - --save_dir "output/" +# Evaluate all datasets +python -m jmteb.v2 --model_name cl-nagoya/ruri-v3-30m --save_path results_v2 + +# Evaluate specific datasets +python -m jmteb.v2 \ + --model_name cl-nagoya/ruri-v3-30m \ + --include JSTS JSICK JaqketRetrieval \ + --save_path results_v2 + +# Use prompts (e.g., for Ruri-v3 models) +python -m jmteb.v2 \ + --model_name cl-nagoya/ruri-v3-30m \ + --prompt_profile src/jmteb/configs/prompts/ruri-v3.yaml \ + --save_path results_v2 ``` -> [!NOTE] -> Some tasks (e.g., AmazonReviewClassification in classification, JAQKET and Mr.TyDi-ja in retrieval, esci in reranking) are time-consuming and memory-consuming. Heavy retrieval tasks take hours to encode the large corpus, and use much memory for the storage of such vectors. If you want to exclude them, add `--eval_exclude "['amazon_review_classification', 'mrtydi', 'jaqket', 'esci']"`. Similarly, you can also use `--eval_include` to include only evaluation datasets you want. +## Architecture -> [!NOTE] -> If you want to log model predictions to further analyze the performance of your model, you may want to use `--log_predictions true` to enable all evaluators to log predictions. It is also available to set whether to log in the config of evaluators. +### Core Components + +``` +src/jmteb/v2/ +├── __init__.py # Main exports +├── __main__.py # CLI entry point +├── adapters.py # Model adapter (JMTEBModel) +├── evaluator.py # Evaluation orchestrator +├── tasks.py # Task definitions and utilities +└── utils.py # Helper functions +``` -## Multi-GPU support +### Key Classes -There are two ways to enable multi-GPU evaluation. +#### 1. JMTEBModel -* New class `DataParallelSentenceBertEmbedder` ([here](src/jmteb/embedders/data_parallel_sbert_embedder.py)). +Adapter that bridges models with MTEB's evaluation system. -```bash -poetry run python -m jmteb \ - --evaluators "src/jmteb/configs/tasks/jsts.jsonnet" \ - --embedder DataParallelSentenceBertEmbedder \ - --embedder.model_name_or_path "" \ - --save_dir "output/" +```python +from jmteb.v2 import JMTEBModel + +# From HuggingFace via SentenceTransformer +model = JMTEBModel.from_sentence_transformer("cl-nagoya/ruri-v3-30m") + +# From MTEB's unified model interface (recommended) +model = JMTEBModel.from_mteb("cl-nagoya/ruri-v3-30m") + +``` + +#### 2. JMTEBV2Evaluator + +Orchestrates evaluation across multiple tasks. + +```python +from jmteb.v2 import JMTEBV2Evaluator + +evaluator = JMTEBV2Evaluator( + model=model, + tasks=tasks, + save_path="results_v2", + batch_size=32, + task_batch_sizes={"JSTS": 128}, # Per-task overrides + overwrite_cache=False, + generate_summary=True, +) +results = evaluator.run() +``` + +#### 3. Task Utilities + +Functions for working with JMTEB tasks. + +```python +from jmteb.v2.tasks import ( + get_jmteb_benchmark, + get_jmteb_tasks, + get_task_by_name, + get_task_category, +) + +# Get all datasets +all_tasks = get_jmteb_tasks() + +# Filter by task type +retrieval_tasks = get_jmteb_tasks(task_types=["Retrieval"]) + +# Get specific datasets +specific_tasks = get_jmteb_tasks(task_names=["JSTS", "JSICK"]) + +# Get full benchmark +benchmark = get_jmteb_benchmark() +``` + +## Task Coverage + +JMTEB v2.0 includes 28 datasets across 5 task types. + +### JMTEB vs JMTEB-lite + +- **JMTEB (Full)**: Complete benchmark with full corpus sizes +- **JMTEB-lite**: Lightweight version with reduced corpus sizes for faster evaluation (~5x faster with high correlation to full JMTEB) + +Both versions include the same 28 datasets: + +### Classification (7 datasets) + +- AmazonReviewsClassification +- AmazonCounterfactualClassification +- MassiveIntentClassification +- MassiveScenarioClassification +- JapaneseSentimentClassification +- SIB200Classification +- WRIMEClassification + +### Clustering (3 datasets) + +- LivedoorNewsClustering.v2 +- MewsC16JaClustering +- SIB200ClusteringS2S + +### STS (2 datasets) + +- JSTS +- JSICK + +### Retrieval (11 datasets) + +- JaqketRetrieval (→ JaqketRetrievalLite in JMTEB-lite) +- MrTidyRetrieval (→ MrTyDiJaRetrievalLite in JMTEB-lite) +- JaGovFaqsRetrieval +- NLPJournalTitleAbsRetrieval.V2 +- NLPJournalTitleIntroRetrieval.V2 +- NLPJournalAbsIntroRetrieval.V2 +- NLPJournalAbsArticleRetrieval.V2 +- JaCWIRRetrieval (→ JaCWIRRetrievalLite in JMTEB-lite) +- MIRACLRetrieval (→ MIRACLJaRetrievalLite in JMTEB-lite) +- MintakaRetrieval +- MultiLongDocRetrieval + +### Reranking (5 datasets) + +- ESCIReranking +- JQaRAReranking (→ JQaRARerankingLite in JMTEB-lite) +- JaCWIRReranking (→ JaCWIRRerankingLite in JMTEB-lite) +- MIRACLReranking +- MultiLongDocReranking + +## Features + +### 1. Prompt Support + +Configure prompts for models that require them (e.g., E5): + +```yaml +# src/jmteb/configs/prompts/e5.yaml +query: "query: " +document: "passage: " +``` + +```python +from jmteb.v2.utils import load_prompts + +prompts = load_prompts("src/jmteb/configs/prompts/e5.yaml") +model = JMTEBModel.from_sentence_transformer( + "intfloat/multilingual-e5-base", + prompts=prompts +) +``` + +### 2. Batch Size Configuration + +Different tasks have varying memory requirements (e.g., retrieval tasks with long documents need smaller batches to avoid OOM), so per-task batch size configuration helps optimize performance. + +```yaml +# batch_sizes.yaml +JSTS: 128 +JSICK: 128 +JaqketRetrieval: 32 +MIRACLRetrieval: 16 +MultiLongDocRetrieval: 8 +``` + +```python +from jmteb.v2.utils import load_batch_sizes + +batch_sizes = load_batch_sizes("batch_sizes.yaml") +evaluator = JMTEBV2Evaluator( + model=model, + tasks=tasks, + task_batch_sizes=batch_sizes +) +``` + +### 3. Result Caching + +Results are automatically cached to avoid re-evaluation: + +```python +# First run: evaluates all tasks +evaluator.run() + +# Second run: loads from cache +evaluator.run() # Instant! + +# Force re-evaluation +evaluator = JMTEBV2Evaluator( + model=model, + tasks=tasks, + overwrite_cache=True +) +evaluator.run() +``` + +### 4. Summary Generation + +Automatically generates `summary.json` with main scores: + +```json +{ + "Classification": { + "amazon_review_classification": { + "main_metric": "accuracy", + "main_score": 67.32, + "eval_time (s)": "12.34" + } + }, + "STS": { + "jsts": { + "main_metric": "cosine_spearman", + "main_score": 82.14, + "eval_time (s)": "5.67" + } + } +} +``` + +### 5. Progress Tracking + +Real-time progress updates during evaluation: + +``` +[1/28] Task: JSTS (batch_size=128) +-------------------------------------------------------------------------------- +✓ Completed: JSTS (time: 5.67s) + → Updated summary: STS/jsts = 82.14 (time: 5.67s) + +[2/28] Task: JSICK (batch_size=128) +-------------------------------------------------------------------------------- +✓ Loaded from cache: JSICK + → Updated summary: STS/jsick = 76.89 (cached) +``` + +## Advanced Usage + +### Custom Model Implementation + +Implement your own model by following the encode interface: + +```python +import numpy as np +from jmteb.v2 import JMTEBModel + +class CustomModel: + def encode(self, sentences: list[str], batch_size: int = 32, **kwargs) -> np.ndarray: + # Your encoding logic here + embeddings = your_model.encode(sentences) + return np.array(embeddings) + +# Wrap for JMTEB v2 +model = JMTEBModel(sentence_transformer=CustomModel()) ``` -* With `torchrun`, multi-GPU in [`TransformersEmbedder`](src/jmteb/embedders/transformers_embedder.py) is available. For example, +### Dataset-Specific Evaluation + +Evaluate subsets of datasets: + +```python +from jmteb.v2.tasks import get_jmteb_tasks + +# Only datasets from STS task +sts_tasks = get_jmteb_tasks(task_types=["STS"]) + +# Specific retrieval datasets +retrieval_tasks = get_jmteb_tasks( + task_names=["JaqketRetrieval", "MIRACLRetrieval"] +) + +# All classification datasets +classification_tasks = get_jmteb_tasks(task_types=["Classification"]) +``` + +### Mixed Precision + +Use FP16/BF16 for faster evaluation: + +```python +import torch + +model = JMTEBModel.from_sentence_transformer( + "cl-nagoya/ruri-v3-30m", + model_kwargs={"torch_dtype": torch.bfloat16} +) +``` + +Or via CLI: ```bash -MODEL_NAME= -MODEL_KWARGS="\{\'torch_dtype\':\'torch.bfloat16\'\}" -torchrun \ - --nproc_per_node=$GPUS_PER_NODE --nnodes=1 \ - src/jmteb/__main__.py --embedder TransformersEmbedder \ - --embedder.model_name_or_path ${MODEL_NAME} \ - --embedder.pooling_mode cls \ - --embedder.batch_size 4096 \ - --embedder.model_kwargs ${MODEL_KWARGS} \ - --embedder.max_seq_length 512 \ - --save_dir "output/${MODEL_NAME}" \ - --evaluators src/jmteb/configs/jmteb.jsonnet -``` - -Note that the batch size here is global batch size (`per_device_batch_size` × `n_gpu`). +python -m jmteb.v2 \ + --model_name cl-nagoya/ruri-v3-30m \ + --bf16 true \ + --save_path results_v2 +``` + +## Comparison with v1.x + +| Feature | v1.x | v2.0 | +|---------|------|------| +| Evaluation Engine | Custom | MTEB | +| Dataset Count | 28 | 28 | +| Batch Configuration | Per-embedder | Global + Per-task | +| Prompt Support | Limited | Full | +| MTEB Compatibility | No | Yes | + +## Output Structure + +``` +results_v2/ +└── model_name/ + ├── JSTS.json # Individual dataset results + ├── JSICK.json + ├── JaqketRetrieval.json + ├── ... + └── summary.json # Aggregated summary +``` + +Each dataset result file contains: + +```json +{ + "test": [ + { + "main_score": 0.8214, + "metric1": value1, + "metric2": value2, + ... + } + ] +} +``` + +## Migration from v1.x + +See [MIGRATION_V2.md](./docs/MIGRATION_V2.md) for a comprehensive migration guide. + +Quick comparison: + +**v1.x:** + +```python +from jmteb.embedders import SentenceBertEmbedder +embedder = SentenceBertEmbedder(model_name_or_path="cl-nagoya/ruri-base") +# ... manual evaluator setup +``` + +**v2.0:** + +```python +from jmteb.v2 import JMTEBModel, JMTEBV2Evaluator +from jmteb.v2.tasks import get_jmteb_tasks + +model = JMTEBModel.from_sentence_transformer("cl-nagoya/ruri-v3-30m") +tasks = get_jmteb_tasks() +evaluator = JMTEBV2Evaluator(model=model, tasks=tasks) +evaluator.run() +``` + +## Performance Tips + +1. **Use appropriate batch sizes**: Larger models need smaller batches +2. **Enable caching**: Don't overwrite unless necessary +3. **Use mixed precision**: BF16 can significantly speed up evaluation +4. **Filter datasets**: Evaluate only what you need for faster iteration +5. **Per-dataset batch sizes**: Optimize for memory requirements + +## Examples + +See the `docs/examples/v2/` directory for complete examples: + +- `v2_basic_evaluation.py`: Basic usage +- `v2_cli_evaluation.sh`: CLI examples +- `v2_jmteb_lite.py`: JMTEB-lite usage +- `v2_mteb_model_loader.py`: Using MTEB's model loader + +For v1.x examples, see `docs/examples/v1/`. + +## Requirements + +- Python >= 3.10 +- PyTorch >= 2.0 +- MTEB >= 1.22.0 +- sentence-transformers >= 5.0 +- Other dependencies in `pyproject.toml` + +## Citation + +If you use JMTEB v2.0, please cite both JMTEB and MTEB: + +```bibtex +@article{jmteb2024, + title={JMTEB: Japanese Massive Text Embedding Benchmark}, + author={...}, + journal={...}, + year={2024} +} + +@article{mteb2024, + title={MTEB: Massive Text Embedding Benchmark}, + author={Muennighoff, Niklas and others}, + journal={arXiv preprint arXiv:2210.07316}, + year={2024} +} +``` + +## License + +Same as JMTEB v1.x. See LICENSE file. + +## Contributing + +Contributions are welcome! Please: + +1. Follow existing code style +2. Add tests for new features +3. Update documentation + +## Support + +- **Issues**: GitHub Issues +- **Discussions**: GitHub Discussions +- **Documentation**: See `docs/` directory +- **Migration Help**: See `docs/MIGRATION_V2.md` + +## Acknowledgments + +- MTEB team for the excellent evaluation framework +- All contributors to JMTEB v1.x +- Japanese NLP community for dataset contributions diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 522d330..7d78978 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -2,7 +2,6 @@ ## Important classes - ```mermaid classDiagram class EmbeddingEvaluator { @@ -28,6 +27,7 @@ poetry run python main.py \ ``` The overview of `main.py` is as follows: + - there are multiple `EmbeddingEvaluator` instances - each `EmbeddingEvaluator` instance is responsible for evaluating `TextEmbedder` on a specific task and a dataset - `EmbeddingEvaluator` outputs metrics and these results are logged @@ -41,8 +41,9 @@ You can see the classes implementing `EmbeddingEvaluator` under `src/evaluators/ These classes typically have some dataset to load instances. `EmbeddingEvaluator` takes a `TextEmbedder` instance with `__call__`, and the following happens: + - extract text data from datasets and pass them to `TextEmbedder` - get the embeddings from `TextEmbedder` - optionally cache the embeddings or load them from cache - perform computation with the embeddings to solve the task -- compute metrics and return them \ No newline at end of file +- compute metrics and return them diff --git a/docs/CUSTOM_DATASET_GUIDE.md b/docs/CUSTOM_DATASET_GUIDE.md new file mode 100644 index 0000000..5983003 --- /dev/null +++ b/docs/CUSTOM_DATASET_GUIDE.md @@ -0,0 +1,446 @@ +# Using Custom Datasets with JMTEB + +This guide explains how to evaluate embedding models on your own datasets using the JMTEB framework. + +## Overview + +JMTEB v2 is built on top of MTEB (Massive Text Embedding Benchmark), which provides a flexible framework for creating custom evaluation tasks. You can create custom tasks for your own datasets and evaluate them using JMTEB's evaluation infrastructure. + +## Quick Start + +Here's a simple example of evaluating a custom retrieval dataset: + +```python +from mteb import AbsTaskRetrieval +from jmteb.v2 import JMTEBModel, JMTEBV2Evaluator + +# 1. Define your custom task +class MyCustomRetrieval(AbsTaskRetrieval): + metadata = { + "name": "MyCustomRetrieval", + "type": "Retrieval", + "description": "My custom retrieval dataset", + "main_score": "ndcg_at_10", + } + + def load_data(self, **kwargs): + # Load your dataset here + # Return format: dict with splits (train/dev/test) containing: + # - queries: dict[str, str] # query_id -> query_text + # - corpus: dict[str, dict] # doc_id -> {"text": doc_text} + # - relevant_docs: dict[str, dict[str, int]] # query_id -> {doc_id: relevance_score} + + queries = { + "q1": "What is machine learning?", + "q2": "How does deep learning work?", + } + + corpus = { + "d1": {"text": "Machine learning is a subset of AI..."}, + "d2": {"text": "Deep learning uses neural networks..."}, + "d3": {"text": "Artificial intelligence encompasses..."}, + } + + relevant_docs = { + "q1": {"d1": 1, "d3": 1}, + "q2": {"d2": 1}, + } + + return { + "test": { + "queries": queries, + "corpus": corpus, + "relevant_docs": relevant_docs, + } + } + +# 2. Create your model +model = JMTEBModel.from_sentence_transformer("your-model-name") + +# 3. Instantiate the custom task +custom_task = MyCustomRetrieval() + +# 4. Evaluate +evaluator = JMTEBV2Evaluator( + model=model, + tasks=[custom_task], + save_path="results/custom_evaluation", + batch_size=32, +) + +results = evaluator.run() +``` + +## Task Types + +MTEB supports several task types. Here's how to create each: + +### 1. Retrieval Task + +```python +from mteb import AbsTaskRetrieval + +class MyRetrieval(AbsTaskRetrieval): + metadata = { + "name": "MyRetrieval", + "type": "Retrieval", + "description": "Description of your retrieval task", + "main_score": "ndcg_at_10", # or "map", "recall_at_k", etc. + } + + def load_data(self, **kwargs): + return { + "test": { + "queries": dict[str, str], + "corpus": dict[str, dict[str, str]], + "relevant_docs": dict[str, dict[str, int]], + } + } +``` + +### 2. Classification Task + +```python +from mteb import AbsTaskClassification + +class MyClassification(AbsTaskClassification): + metadata = { + "name": "MyClassification", + "type": "Classification", + "description": "Description of your classification task", + "main_score": "accuracy", + } + + def load_data(self, **kwargs): + return { + "test": [ + {"text": "Sample text 1", "label": 0}, + {"text": "Sample text 2", "label": 1}, + # ... + ] + } +``` + +### 3. Clustering Task + +```python +from mteb import AbsTaskClustering + +class MyClustering(AbsTaskClustering): + metadata = { + "name": "MyClustering", + "type": "Clustering", + "description": "Description of your clustering task", + "main_score": "v_measure", + } + + def load_data(self, **kwargs): + return { + "test": [ + {"sentence": "Text 1", "label": 0}, + {"sentence": "Text 2", "label": 1}, + {"sentence": "Text 3", "label": 0}, + # ... + ] + } +``` + +### 4. STS (Semantic Textual Similarity) Task + +```python +from mteb import AbsTaskSTS + +class MySTS(AbsTaskSTS): + metadata = { + "name": "MySTS", + "type": "STS", + "description": "Description of your STS task", + "main_score": "cosine_spearman", + } + + def load_data(self, **kwargs): + return { + "test": [ + {"sentence1": "Text A", "sentence2": "Text B", "score": 0.8}, + {"sentence1": "Text C", "sentence2": "Text D", "score": 0.3}, + # ... + ] + } +``` + +### 5. Reranking Task + +```python +from mteb import AbsTaskReranking + +class MyReranking(AbsTaskReranking): + metadata = { + "name": "MyReranking", + "type": "Reranking", + "description": "Description of your reranking task", + "main_score": "map", + } + + def load_data(self, **kwargs): + return { + "test": { + "queries": dict[str, str], + "corpus": dict[str, dict[str, str]], + "relevant_docs": dict[str, dict[str, int]], + } + } +``` + +### 6. Pair Classification Task + +```python +from mteb import AbsTaskPairClassification + +class MyPairClassification(AbsTaskPairClassification): + metadata = { + "name": "MyPairClassification", + "type": "PairClassification", + "description": "Description of your pair classification task", + "main_score": "cosine_ap", + } + + def load_data(self, **kwargs): + return { + "test": [ + {"sent1": "Text 1", "sent2": "Text 2", "labels": 1}, + {"sent1": "Text 3", "sent2": "Text 4", "labels": 0}, + # ... + ] + } +``` + +## Loading Data from Files + +You can load your dataset from various formats: + +### From JSONL Files + +```python +import json +from mteb import AbsTaskRetrieval + +class MyRetrieval(AbsTaskRetrieval): + metadata = { + "name": "MyRetrieval", + "type": "Retrieval", + "description": "My custom retrieval task", + "main_score": "ndcg_at_10", + } + + def load_data(self, **kwargs): + # Load queries + queries = {} + with open("data/queries.jsonl", "r") as f: + for line in f: + item = json.loads(line) + queries[item["id"]] = item["text"] + + # Load corpus + corpus = {} + with open("data/corpus.jsonl", "r") as f: + for line in f: + item = json.loads(line) + corpus[item["id"]] = {"text": item["text"]} + + # Load relevance judgments + relevant_docs = {} + with open("data/qrels.jsonl", "r") as f: + for line in f: + item = json.loads(line) + query_id = item["query_id"] + if query_id not in relevant_docs: + relevant_docs[query_id] = {} + relevant_docs[query_id][item["doc_id"]] = item["score"] + + return { + "test": { + "queries": queries, + "corpus": corpus, + "relevant_docs": relevant_docs, + } + } +``` + +### From Hugging Face Datasets + +```python +from datasets import load_dataset +from mteb import AbsTaskClassification + +class MyHFClassification(AbsTaskClassification): + metadata = { + "name": "MyHFClassification", + "type": "Classification", + "description": "Classification task from HF dataset", + "main_score": "accuracy", + } + + def load_data(self, **kwargs): + # Load from Hugging Face + dataset = load_dataset("your-username/your-dataset") + + # Convert to MTEB format + test_data = [ + {"text": item["text"], "label": item["label"]} + for item in dataset["test"] + ] + + return {"test": test_data} +``` + +## Advanced: Multiple Splits + +You can provide train/validation/test splits: + +```python +def load_data(self, **kwargs): + return { + "train": [...], + "validation": [...], + "test": [...], + } +``` + +## Evaluating Multiple Custom Tasks + +```python +from jmteb.v2 import JMTEBModel, JMTEBV2Evaluator + +# Create model +model = JMTEBModel.from_sentence_transformer("your-model") + +# Create multiple custom tasks +task1 = MyRetrieval() +task2 = MyClassification() +task3 = MySTS() + +# Evaluate all tasks +evaluator = JMTEBV2Evaluator( + model=model, + tasks=[task1, task2, task3], + save_path="results/custom_evaluation", + batch_size=32, +) + +results = evaluator.run() +``` + +## Combining Custom Tasks with JMTEB Tasks + +You can evaluate custom tasks alongside standard JMTEB tasks: + +```python +from jmteb.v2 import JMTEBModel, JMTEBV2Evaluator, get_jmteb_tasks + +# Create model +model = JMTEBModel.from_sentence_transformer("your-model") + +# Get some JMTEB tasks +jmteb_tasks = get_jmteb_tasks(task_names=["JSTS", "JSICK"]) + +# Add your custom task +custom_task = MyCustomRetrieval() + +# Evaluate both +evaluator = JMTEBV2Evaluator( + model=model, + tasks=jmteb_tasks + [custom_task], + save_path="results/combined_evaluation", + batch_size=32, +) + +results = evaluator.run() +``` + +## Complete Example: Custom Japanese Retrieval Task + +```python +import json +from mteb import AbsTaskRetrieval +from jmteb.v2 import JMTEBModel, JMTEBV2Evaluator + +class JapaneseQARetrieval(AbsTaskRetrieval): + """Custom Japanese QA retrieval task.""" + + metadata = { + "name": "JapaneseQARetrieval", + "type": "Retrieval", + "description": "Japanese question answering retrieval", + "main_score": "ndcg_at_10", + "languages": ["jpn"], + } + + def load_data(self, **kwargs): + # Example: Load your Japanese QA data + queries = { + "q1": "日本の首都はどこですか?", + "q2": "富士山の高さは?", + } + + corpus = { + "d1": {"text": "東京は日本の首都です。"}, + "d2": {"text": "富士山は標高3,776メートルの日本最高峰の山です。"}, + "d3": {"text": "大阪は日本の主要都市の一つです。"}, + } + + relevant_docs = { + "q1": {"d1": 1}, + "q2": {"d2": 1}, + } + + return { + "test": { + "queries": queries, + "corpus": corpus, + "relevant_docs": relevant_docs, + } + } + +# Evaluate +model = JMTEBModel.from_sentence_transformer("cl-nagoya/ruri-base") +task = JapaneseQARetrieval() + +evaluator = JMTEBV2Evaluator( + model=model, + tasks=[task], + save_path="results/japanese_qa", + batch_size=32, +) + +results = evaluator.run() +print(f"NDCG@10: {results['JapaneseQARetrieval']['test']['ndcg_at_10']}") +``` + +## Tips and Best Practices + +1. **Use descriptive task names**: Make sure your task name clearly describes what it evaluates + +2. **Provide metadata**: Include a clear description and specify the main evaluation metric + +3. **Handle data loading errors**: Add error handling in your `load_data()` method + +4. **Test with small data first**: Start with a small subset to ensure your task works correctly + +5. **Follow MTEB conventions**: Use the same data format as standard MTEB tasks for consistency + +6. **Document your task**: Add comments explaining the dataset and evaluation setup + +## Further Resources + +- [MTEB Documentation](https://github.com/embeddings-benchmark/mteb) +- [MTEB Task Examples](https://github.com/embeddings-benchmark/mteb/tree/main/mteb/tasks) +- [JMTEB Example Scripts](../examples/) + +## Need Help? + +If you encounter issues: + +1. Check that your data format matches MTEB requirements +2. Look at existing JMTEB task implementations for reference +3. Ensure your task metadata is complete and correct +4. Test with a small dataset first before scaling up diff --git a/docs/MIGRATION_V2.md b/docs/MIGRATION_V2.md new file mode 100644 index 0000000..153d86c --- /dev/null +++ b/docs/MIGRATION_V2.md @@ -0,0 +1,397 @@ +# JMTEB v2.0 Migration Guide + +This guide helps you migrate from JMTEB v1.x to v2.0. + +> [!NOTE] +> For complete v2.0 documentation, see the main [README.md](../README.md). This guide focuses specifically on migration from v1.x. + +## Quick Start + +### Installation + +JMTEB v2.0 is now the default. See [README.md](../README.md#installation) for installation instructions. + +**TL;DR:** + +```bash +# v2.0 only (default) +poetry install + +# v2.0 + v1.x support +poetry install --extras v1 +``` + +### Do I Need to Migrate? + +**Yes, eventually.** While v1.x code will continue to work for now, the v1.x API is deprecated and will no longer be actively maintained. We recommend migrating to v2.0 for: + +- Continued support and bug fixes +- New features and improvements +- Better performance and MTEB ecosystem integration + +## Migration Examples + +### Example 1: Basic STS Dataset Evaluation + +
+v1.x Code + +```python +from jmteb.embedders import SentenceBertEmbedder +from jmteb.evaluators import STSEvaluator +from datasets import load_dataset + +# Create embedder +embedder = SentenceBertEmbedder(model_name_or_path="cl-nagoya/ruri-v3-30m") + +# Load dataset and create evaluator +dataset = load_dataset("sbintuitions/JMTEB", name="jsts") +evaluator = STSEvaluator(val_dataset=dataset["validation"]) + +# Run evaluation +metrics = evaluator(embedder) +print(metrics) +``` + +
+ +
+v2.0 Code + +```python +from jmteb.v2 import JMTEBModel, JMTEBV2Evaluator +from jmteb.v2.tasks import get_jmteb_tasks + +# Create model +model = JMTEBModel.from_sentence_transformer("cl-nagoya/ruri-v3-30m") + +# Get datasets +tasks = get_jmteb_tasks(task_names=["JSTS"]) + +# Create evaluator and run +evaluator = JMTEBV2Evaluator( + model=model, + tasks=tasks, + save_path="results_v2/ruri-v3-30m" +) +results = evaluator.run() +``` + +
+ +**Key Differences:** + +- No manual dataset loading required +- Single evaluator handles all tasks +- Automatic caching and summary generation + +### Example 2: Multiple Datasets from Different Task Types + +
+v1.x Code + +```python +from jmteb.embedders import SentenceBertEmbedder +from jmteb.evaluators import STSEvaluator, RetrievalEvaluator, ClassificationEvaluator + +embedder = SentenceBertEmbedder(model_name_or_path="cl-nagoya/ruri-v3-30m") + +# Manual evaluation of each task +jsts_eval = STSEvaluator(...) +jsts_metrics = jsts_eval(embedder) + +jaqket_eval = RetrievalEvaluator(...) +jaqket_metrics = jaqket_eval(embedder) + +amazon_eval = ClassificationEvaluator(...) +amazon_metrics = amazon_eval(embedder) +``` + +
+ +
+v2.0 Code + +```python +from jmteb.v2 import JMTEBModel, JMTEBV2Evaluator +from jmteb.v2.tasks import get_jmteb_tasks + +model = JMTEBModel.from_sentence_transformer("cl-nagoya/ruri-v3-30m") + +# Evaluate multiple datasets at once +tasks = get_jmteb_tasks( + task_names=["JSTS", "JaqketRetrieval", "AmazonReviewsClassification"] +) + +evaluator = JMTEBV2Evaluator( + model=model, + tasks=tasks, + save_path="results_v2/ruri-v3-30m" +) +results = evaluator.run() +``` + +
+ +### Example 3: Wrapping v1 Embedders + +You can wrap existing v1 embedders for use with v2: + +```python +from jmteb.embedders import SentenceBertEmbedder +from jmteb.v2 import JMTEBModel, JMTEBV2Evaluator +from jmteb.v2.tasks import get_jmteb_tasks + +# Create v1 embedder (your existing code) +v1_embedder = SentenceBertEmbedder(model_name_or_path="cl-nagoya/ruri-v3-30m") + +# Wrap for v2 +model = JMTEBModel.from_jmteb_embedder(v1_embedder) + +# Use with v2 evaluator (all datasets) +tasks = get_jmteb_tasks() +evaluator = JMTEBV2Evaluator(model=model, tasks=tasks) +results = evaluator.run() +``` + +### Example 4: CLI Comparison + + + + + + + + + + +
v1.x CLIv2.0 CLI
+ +```bash +python -m jmteb \ + --embedder SentenceBertEmbedder \ + --embedder.model_name_or_path cl-nagoya/ruri-v3-30m \ + --save_dir results/ruri-v3-30m +``` + + + +```bash +python -m jmteb.v2 \ + --model_name cl-nagoya/ruri-v3-30m \ + --save_path results_v2 +``` + +
+ +## Dataset Name Mapping + +Dataset names have been updated to match MTEB conventions. Use the helper function for conversion: + +```python +from jmteb.v2.tasks import convert_v1_task_name + +v2_name = convert_v1_task_name("jsts") # Returns "JSTS" +v2_name = convert_v1_task_name("jaqket") # Returns "JaqketRetrieval" +``` + +
+Complete Dataset Name Mapping Table + +| v1.x Name | v2.0 Name | +|-----------|-----------| +| `livedoor_news` | `LivedoorNewsClustering.v2` | +| `mewsc16` | `MewsC16JaClustering` | +| `jsts` | `JSTS` | +| `jsick` | `JSICK` | +| `jaqket` | `JaqketRetrieval` | +| `mrtydi` | `MrTidyRetrieval` | +| `jagovfaqs_22k` | `JaGovFaqsRetrieval` | +| `nlp_journal_title_abs` | `NLPJournalTitleAbsRetrieval.V2` | +| `nlp_journal_title_intro` | `NLPJournalTitleIntroRetrieval.V2` | +| `nlp_journal_abs_intro` | `NLPJournalAbsIntroRetrieval.V2` | +| `nlp_journal_abs_article` | `NLPJournalAbsArticleRetrieval.V2` | +| `jacwir_retrieval` | `JaCWIRRetrieval` | +| `miracl_retrieval` | `MIRACLRetrieval` | +| `esci` | `ESCIReranking` | +| `jqara` | `JQaRAReranking` | +| `jacwir_reranking` | `JaCWIRReranking` | +| `miracl_reranking` | `MIRACLReranking` | +| `amazon_review_classification` | `AmazonReviewsClassification` | +| `amazon_counterfactual_classification` | `AmazonCounterfactualClassification` | +| `massive_intent_classification` | `MassiveIntentClassification` | +| `massive_scenario_classification` | `MassiveScenarioClassification` | + +See `src/jmteb/v2/tasks.py` for the complete mapping in code. + +
+ +## Configuration Migration + +### Prompts + +**v2.0 uses YAML configuration files:** + +```yaml +# src/jmteb/configs/prompts/e5.yaml +query: "query: " +passage: "passage: " +``` + +Load and use: + +```python +from jmteb.v2.utils import load_prompts + +prompts = load_prompts("src/jmteb/configs/prompts/e5.yaml") +model = JMTEBModel.from_sentence_transformer( + "intfloat/multilingual-e5-base", + prompts=prompts +) +``` + +Or via CLI: + +```bash +python -m jmteb.v2 \ + --model_name intfloat/multilingual-e5-base \ + --prompt_profile src/jmteb/configs/prompts/e5.yaml \ + --save_path results_v2 +``` + +### Batch Sizes + +**v2.0 supports per-task batch size configuration:** + +```yaml +# batch_sizes.yaml +JSTS: 128 +JSICK: 128 +JaqketRetrieval: 32 +MIRACLRetrieval: 16 +MultiLongDocRetrieval: 8 +``` + +```python +from jmteb.v2.utils import load_batch_sizes + +batch_sizes = load_batch_sizes("batch_sizes.yaml") +evaluator = JMTEBV2Evaluator( + model=model, + tasks=tasks, + task_batch_sizes=batch_sizes +) +``` + +## Results Format Changes + +### v1.x Results Structure + +```json +{ + "spearman": 0.8234, + "pearson": 0.8156 +} +``` + +### v2.0 Results Structure + +```json +{ + "validation": [ + { + "main_score": 0.8234, + "spearman": 0.8234, + "pearson": 0.8156, + ... + } + ] +} +``` + +**Key Changes:** + +- Results organized by split (test/validation/dev) +- `main_score` field indicates the primary metric +- More detailed metrics included +- MTEB-compatible format + +**Extracting Main Score:** + +```python +# v2.0 +with open("results_v2/model/JSTS.json") as f: + results = json.load(f) + main_score = results["validation"][0]["main_score"] +``` + +## Migration FAQ + +### Do the results change between v1 and v2? + +The evaluation logic is identical for most tasks, so results are directly comparable. Minor differences may occur for: + +- `MultiLongDocRetrieval` and `MultiLongDocReranking` (corpus size differences) + +### Can I use both v1 and v2 in the same project? + +Yes! They are separate modules: + +```python +# v1.x +from jmteb.embedders import SentenceBertEmbedder + +# v2.0 +from jmteb.v2 import JMTEBModel +``` + +### What happens to my v1 code when I install v2? + +Nothing! v1 code continues to work as-is. Install v2 dependencies with: + +```bash +poetry install --extras v1 # Includes both v1 and v2 +``` + +### Can I convert v1 results to v2 format? + +The underlying metrics are the same, so you can manually convert if needed. However, re-running with v2 is recommended for consistency. + +### What about OpenAI/custom embedders? + +They work in v2: + +```python +# OpenAI +from jmteb.embedders import OpenAIEmbedder +from jmteb.v2 import JMTEBModel + +embedder = OpenAIEmbedder(model="text-embedding-3-small") +model = JMTEBModel.from_jmteb_embedder(embedder) + +# Custom +class CustomEmbedder: + def encode(self, texts, batch_size=32): + # Your logic + return embeddings + +model = JMTEBModel.from_jmteb_embedder(CustomEmbedder()) +``` + +## Next Steps + +1. **Read the main README**: See [README.md](../README.md) for complete v2.0 documentation +2. **Try the examples**: Check `examples/v2_*.py` for working code +3. **Start small**: Migrate one evaluation at a time +4. **Ask for help**: Open an issue if you encounter problems + +## Benefits of Migrating + +- 🚀 Simpler, cleaner API +- ⚡ Better performance and caching +- 🌐 Access to MTEB ecosystem +- 📊 Automatic summary generation +- 🔧 Per-task batch size configuration +- 🎯 Progress tracking and better logging + +Happy migrating! diff --git a/docs/examples/v1/README.md b/docs/examples/v1/README.md new file mode 100644 index 0000000..8d400f7 --- /dev/null +++ b/docs/examples/v1/README.md @@ -0,0 +1,27 @@ +# Example scripts + +We provide some example scripts for different scenarios. + +#### [sentencebert_1gpu.sh](docs/examples/sentencebert_1gpu.sh) + +For all-task evaluation with a model that can be loaded with [`SentenceTransformer`](https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/SentenceTransformer.py) with single GPU, and `fp16` enabled. The corresponding class in `JMTEB` is [`SentenceBertEmbedder`](src/jmteb/embedders/sbert_embedder.py). + +#### [sentencebert_8gpu.sh](docs/examples/sentencebert_8gpu.sh) + +For all-task evaluation with a model that can be loaded with [`SentenceTransformer`](https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/SentenceTransformer.py) with 8 GPUs in a node, and `fp16` enabled. The corresponding class in `JMTEB` is [`DataParallelSentenceBertEmbedder`](src/jmteb/embedders/data_parallel_sbert_embedder.py). + +#### [transformers_embedder_multigpu.sh](docs/examples/transformers_embedder_multigpu.sh) + +For all-task evaluation with a model that can be loaded with `AutoModel` in Hugging Face Transformers (even your DIY model as long as it is registered to `AutoModel`, as `trust_remote_code` is set as `True`) with 8 GPUs in a node, and `bf16` enabled. Note that to enable parallelism, `torchrun` is needed. The corresponding class in `JMTEB` is [`TransformersEmbedder`](src/jmteb/embedders/transformers_embedder.py). + +#### [openai_embedder.sh](docs/examples/openai_embedder.sh) + +For all-task evaluation with an OpenAI embedding model through API. Note that you must export your OpenAI API key before the evaluation. The corresponding class in `JMTEB` is [`OpenAIEmbedder`](src/jmteb/embedders/openai_embedder.py). + +#### [exclude.sh](docs/examples/exclude.sh) + +Exclude some slow tasks based on [sentencebert_1gpu.sh](docs/examples/sentencebert_1gpu.sh). + +#### [include.sh](docs/examples/include.sh) + +Specify a few tasks to be run based on [sentencebert_1gpu.sh](docs/examples/sentencebert_1gpu.sh). diff --git a/docs/examples/v1/exclude.sh b/docs/examples/v1/exclude.sh new file mode 100644 index 0000000..2b116e0 --- /dev/null +++ b/docs/examples/v1/exclude.sh @@ -0,0 +1,21 @@ +model=$1 + +echo "Running model: $model" + +echo "start" +date "+%Y-%m-%d %H:%M:%S" +echo "" + +poetry run python -m jmteb \ + --embedder SentenceBertEmbedder \ + --embedder.model_name_or_path "$model" \ + --embedder.model_kwargs '{"torch_dtype": "torch.float16"}' \ + --embedder.device cuda \ + --save_dir "results/${model//\//_}" \ + --overwrite_cache false \ + --evaluators src/jmteb/configs/jmteb.jsonnet \ + --eval_exclude "['amazon_review_classification', 'mrtydi', 'jaqket', 'esci']" + +echo "" +date "+%Y-%m-%d %H:%M:%S" +echo "end" \ No newline at end of file diff --git a/docs/examples/v1/include.sh b/docs/examples/v1/include.sh new file mode 100644 index 0000000..193e01c --- /dev/null +++ b/docs/examples/v1/include.sh @@ -0,0 +1,21 @@ +model=$1 + +echo "Running model: $model" + +echo "start" +date "+%Y-%m-%d %H:%M:%S" +echo "" + +poetry run python -m jmteb \ + --embedder SentenceBertEmbedder \ + --embedder.model_name_or_path "$model" \ + --embedder.model_kwargs '{"torch_dtype": "torch.float16"}' \ + --embedder.device cuda \ + --save_dir "results/${model//\//_}" \ + --overwrite_cache false \ + --evaluators src/jmteb/configs/jmteb.jsonnet \ + --eval_include "['livedoor_news', 'esci']" + +echo "" +date "+%Y-%m-%d %H:%M:%S" +echo "end" \ No newline at end of file diff --git a/docs/examples/v1/openai_embedder.sh b/docs/examples/v1/openai_embedder.sh new file mode 100644 index 0000000..3963b21 --- /dev/null +++ b/docs/examples/v1/openai_embedder.sh @@ -0,0 +1,20 @@ +model=$1 + +export OPENAI_API_KEY= + +echo "Running OpenAI model: $model" + +echo "start" +date "+%Y-%m-%d %H:%M:%S" +echo "" + +poetry run python -m jmteb \ + --embedder OpenAIEmbedder \ + --embedder.model "$model" \ + --save_dir "results/${model//\//_}" \ + --overwrite_cache false \ + --evaluators src/jmteb/configs/jmteb.jsonnet + +echo "" +date "+%Y-%m-%d %H:%M:%S" +echo "end" \ No newline at end of file diff --git a/docs/examples/v1/sentencebert_1gpu.sh b/docs/examples/v1/sentencebert_1gpu.sh new file mode 100644 index 0000000..39c180a --- /dev/null +++ b/docs/examples/v1/sentencebert_1gpu.sh @@ -0,0 +1,20 @@ +model=$1 + +echo "Running model: $model" + +echo "start" +date "+%Y-%m-%d %H:%M:%S" +echo "" + +poetry run python -m jmteb \ + --embedder SentenceBertEmbedder \ + --embedder.model_name_or_path "$model" \ + --embedder.model_kwargs '{"torch_dtype": "torch.float16"}' \ + --embedder.device cuda \ + --save_dir "results/${model//\//_}" \ + --overwrite_cache false \ + --evaluators src/jmteb/configs/jmteb.jsonnet + +echo "" +date "+%Y-%m-%d %H:%M:%S" +echo "end" \ No newline at end of file diff --git a/docs/examples/v1/sentencebert_8gpu.sh b/docs/examples/v1/sentencebert_8gpu.sh new file mode 100644 index 0000000..d17c404 --- /dev/null +++ b/docs/examples/v1/sentencebert_8gpu.sh @@ -0,0 +1,21 @@ +model=$1 + +echo "Running model: $model" + +echo "start" +date "+%Y-%m-%d %H:%M:%S" +echo "" + +# Data parallel +poetry run python -m jmteb \ + --embedder DataParallelSentenceBertEmbedder \ + --embedder.model_name_or_path "$model" \ + --embedder.model_kwargs '{"torch_dtype": "torch.float16"}' \ + --embedder.device cuda \ + --save_dir "results/${model//\//_}" \ + --overwrite_cache false \ + --evaluators src/jmteb/configs/jmteb.jsonnet + +echo "" +date "+%Y-%m-%d %H:%M:%S" +echo "end" \ No newline at end of file diff --git a/docs/examples/v1/transformers_embedder_multigpu.sh b/docs/examples/v1/transformers_embedder_multigpu.sh new file mode 100644 index 0000000..3b7d861 --- /dev/null +++ b/docs/examples/v1/transformers_embedder_multigpu.sh @@ -0,0 +1,26 @@ +model=$1 + +echo "Running model: $model" + +echo "start" +date "+%Y-%m-%d %H:%M:%S" +echo "" + +MODEL_KWARGS="\{\'torch_dtype\':\'torch.bfloat16\'\}" + +# embedder.batch_size is global batch size + +torchrun \ + --nproc_per_node=$GPUS_PER_NODE --nnodes=1 \ + src/jmteb/__main__.py --embedder TransformersEmbedder \ + --embedder.model_name_or_path ${MODEL_NAME} \ + --embedder.pooling_mode cls \ + --embedder.batch_size 4096 \ + --embedder.model_kwargs ${MODEL_KWARGS} \ + --embedder.max_seq_length 512 \ + --save_dir "results/${MODEL_NAME}" \ + --evaluators src/jmteb/configs/jmteb.jsonnet + +echo "" +date "+%Y-%m-%d %H:%M:%S" +echo "end" \ No newline at end of file diff --git a/docs/examples/v2/v2_basic_evaluation.py b/docs/examples/v2/v2_basic_evaluation.py new file mode 100644 index 0000000..ff8812c --- /dev/null +++ b/docs/examples/v2/v2_basic_evaluation.py @@ -0,0 +1,29 @@ +""" +Basic JMTEB v2.0 evaluation example. + +This script demonstrates how to evaluate a model on JMTEB v2.0 tasks using the MTEB framework. +""" + +from jmteb.v2 import JMTEBModel, JMTEBV2Evaluator +from jmteb.v2.tasks import get_jmteb_tasks + +# Create model from HuggingFace (using small model for faster testing) +model = JMTEBModel.from_sentence_transformer( + model_name_or_path="cl-nagoya/ruri-v3-30m", +) + +# Get all JMTEB tasks (or specify task_names for specific tasks) +tasks = get_jmteb_tasks() + +# Create evaluator +evaluator = JMTEBV2Evaluator( + model=model, + tasks=tasks, + save_path="results_v2/ruri-v3-30m", + batch_size=32, +) + +# Run evaluation +results = evaluator.run() + +print("Evaluation complete! Check results_v2/ruri-v3-30m/ for detailed results and summary.json") diff --git a/docs/examples/v2/v2_cli_evaluation.sh b/docs/examples/v2/v2_cli_evaluation.sh new file mode 100644 index 0000000..26faa6e --- /dev/null +++ b/docs/examples/v2/v2_cli_evaluation.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# JMTEB v2.0 CLI evaluation examples + +# Basic evaluation - all tasks (using small model for faster testing) +python -m jmteb.v2 \ + --model_name cl-nagoya/ruri-v3-30m \ + --save_path results_v2 \ + --batch_size 32 + +# Evaluate specific tasks only +python -m jmteb.v2 \ + --model_name cl-nagoya/ruri-v3-30m \ + --include JSTS JSICK JaqketRetrieval \ + --save_path results_v2 \ + --batch_size 32 + +# Evaluate with prompts (e.g., for E5 models) +python -m jmteb.v2 \ + --model_name intfloat/multilingual-e5-base \ + --prompt_profile prompts/e5.yaml \ + --save_path results_v2 \ + --batch_size 64 + +# Evaluate with per-task batch sizes +python -m jmteb.v2 \ + --model_name cl-nagoya/ruri-large \ + --task_batch_sizes batch_sizes.yaml \ + --save_path results_v2 + +# Evaluate only retrieval tasks +python -m jmteb.v2 \ + --model_name cl-nagoya/ruri-v3-30m \ + --task_types Retrieval \ + --save_path results_v2 \ + --batch_size 32 + +# Evaluate with FP16 precision +python -m jmteb.v2 \ + --model_name cl-nagoya/ruri-v3-30m \ + --fp16 true \ + --save_path results_v2 \ + --batch_size 64 + +# Overwrite existing cached results +python -m jmteb.v2 \ + --model_name cl-nagoya/ruri-v3-30m \ + --overwrite_cache true \ + --save_path results_v2 \ + --batch_size 32 diff --git a/docs/examples/v2/v2_jmteb_lite.py b/docs/examples/v2/v2_jmteb_lite.py new file mode 100644 index 0000000..d594428 --- /dev/null +++ b/docs/examples/v2/v2_jmteb_lite.py @@ -0,0 +1,35 @@ +""" +JMTEB-lite evaluation example. + +This script demonstrates how to use JMTEB-lite for faster evaluation. +JMTEB-lite provides ~5x speedup with 0.97 Spearman correlation to full JMTEB. +""" + +from jmteb.v2 import JMTEBModel, JMTEBV2Evaluator, get_jmteb_lite_benchmark + +# Create model from HuggingFace (using small model for faster testing) +model = JMTEBModel.from_sentence_transformer( + model_name_or_path="cl-nagoya/ruri-v3-30m", +) + +# Get JMTEB-lite benchmark (reduced corpus sizes) +lite_benchmark = get_jmteb_lite_benchmark() +print(f"JMTEB-lite contains {len(lite_benchmark.tasks)} tasks") + +# Create evaluator +evaluator = JMTEBV2Evaluator( + model=model, + tasks=lite_benchmark.tasks, + save_path="results_v2_lite/ruri-v3-30m", + batch_size=32, +) + +# Run evaluation (much faster than full JMTEB!) +results = evaluator.run() + +print("\n" + "=" * 80) +print("JMTEB-lite evaluation complete!") +print("Results saved to: results_v2_lite/ruri-v3-30m/") +print("=" * 80) +print("\nNote: JMTEB-lite is ~5x faster than full JMTEB") +print("with 0.97 Spearman correlation to full JMTEB results.") diff --git a/docs/examples/v2/v2_mteb_model_loader.py b/docs/examples/v2/v2_mteb_model_loader.py new file mode 100644 index 0000000..ff5b061 --- /dev/null +++ b/docs/examples/v2/v2_mteb_model_loader.py @@ -0,0 +1,38 @@ +""" +JMTEB v2.0 example using MTEB's get_model interface. + +This example demonstrates using mteb.get_model() to load models, +which provides a unified interface for various model types. +""" + +from jmteb.v2 import JMTEBModel, JMTEBV2Evaluator +from jmteb.v2.tasks import get_jmteb_tasks + +# Load model using MTEB's unified interface (recommended) +# This automatically handles model-specific configurations +model = JMTEBModel.from_mteb("sentence-transformers/all-MiniLM-L6-v2") + +# You can also specify additional arguments +# model = JMTEBModel.from_mteb( +# "intfloat/multilingual-e5-base", +# revision="main", +# device="cuda" +# ) + +# Get specific tasks for quick testing +tasks = get_jmteb_tasks(task_names=["JSTS", "JSICK"]) + +# Create evaluator +evaluator = JMTEBV2Evaluator( + model=model, + tasks=tasks, + save_path="results_v2/all-MiniLM-L6-v2", + batch_size=128, # Can use larger batch for smaller models +) + +# Run evaluation +print("Starting evaluation with MTEB-loaded model...") +results = evaluator.run() + +print("\nEvaluation complete!") +print("Results saved to: results_v2/all-MiniLM-L6-v2/") diff --git a/docs/results/BAAI/bge-m3/summary.json b/docs/results/BAAI/bge-m3/summary.json deleted file mode 100644 index 72a5ee8..0000000 --- a/docs/results/BAAI/bge-m3/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.718621425743256 - }, - "amazon_review_classification": { - "macro_f1": 0.5664555524508175 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9441075327867781 - }, - "massive_intent_classification": { - "macro_f1": 0.7868184551588373 - }, - "massive_scenario_classification": { - "macro_f1": 0.8970320222457714 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8424907003170607 - }, - "wrime_classification": { - "macro_f1": 0.4316630478439933 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9327323748768209 - }, - "jacwir_reranking": { - "ndcg@10": 0.8955144849023412 - }, - "jqara": { - "ndcg@10": 0.5391637817603238 - }, - "miracl_reranking": { - "ndcg@10": 0.8596271423829606 - }, - "mldr_reranking": { - "ndcg@10": 0.9778261029468881 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.851348898788452 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.6906829361885021 - }, - "jaqket": { - "ndcg@10": 0.5659460589444328 - }, - "mintaka_retrieval": { - "ndcg@10": 0.32175483024897333 - }, - "miracl_retrieval": { - "ndcg@10": 0.734809783755516 - }, - "mldr_retrieval": { - "ndcg@10": 0.5126063501865914 - }, - "mrtydi": { - "ndcg@10": 0.45179452203971654 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.9521915103722084 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9752948774973371 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9602075886902439 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.9197525363243463 - } - }, - "STS": { - "jsick": { - "spearman": 0.7926524802982091 - }, - "jsts": { - "spearman": 0.8020865982595183 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5475619174246511 - }, - "mewsc16": { - "v_measure_score": 0.4200457612686986 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.3991288954568376 - } - } -} \ No newline at end of file diff --git a/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json b/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json deleted file mode 100644 index beacb01..0000000 --- a/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7769528027441275 - }, - "amazon_review_classification": { - "macro_f1": 0.5146406875677701 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8844781754440035 - }, - "massive_intent_classification": { - "macro_f1": 0.7872353730798753 - }, - "massive_scenario_classification": { - "macro_f1": 0.8639715373498098 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8350488266987821 - }, - "wrime_classification": { - "macro_f1": 0.3815230965003785 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.909518320556229 - }, - "jacwir_reranking": { - "ndcg@10": 0.5981293078380808 - }, - "jqara": { - "ndcg@10": 0.3719557553111225 - }, - "miracl_reranking": { - "ndcg@10": 0.6789908587925922 - }, - "mldr_reranking": { - "ndcg@10": 0.8281088898171538 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.4085978545476503 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.43879890119990833 - }, - "jaqket": { - "ndcg@10": 0.3555985699236658 - }, - "mintaka_retrieval": { - "ndcg@10": 0.1997740482697841 - }, - "miracl_retrieval": { - "ndcg@10": 0.16521386136598404 - }, - "mldr_retrieval": { - "ndcg@10": 0.12060735418211223 - }, - "mrtydi": { - "ndcg@10": 0.07107405961190999 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.5430415601583998 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.5585881454407594 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.629620778788499 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.3517328767423871 - } - }, - "STS": { - "jsick": { - "spearman": 0.7775668305928584 - }, - "jsts": { - "spearman": 0.7563460117163054 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.4601335671191492 - }, - "mewsc16": { - "v_measure_score": 0.3922006290468797 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.3456006554316726 - } - } -} \ No newline at end of file diff --git a/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json b/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json deleted file mode 100644 index 6a83eb2..0000000 --- a/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7779156199278396 - }, - "amazon_review_classification": { - "macro_f1": 0.5111451768867725 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8782111274457993 - }, - "massive_intent_classification": { - "macro_f1": 0.7796973463634825 - }, - "massive_scenario_classification": { - "macro_f1": 0.8634142669499835 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8506408877596591 - }, - "wrime_classification": { - "macro_f1": 0.3656175961601361 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9092446252246911 - }, - "jacwir_reranking": { - "ndcg@10": 0.605113846464576 - }, - "jqara": { - "ndcg@10": 0.36840730960684165 - }, - "miracl_reranking": { - "ndcg@10": 0.693114284522583 - }, - "mldr_reranking": { - "ndcg@10": 0.8530771666734125 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.42431895793525753 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.43601956332213093 - }, - "jaqket": { - "ndcg@10": 0.37354035206874886 - }, - "mintaka_retrieval": { - "ndcg@10": 0.2518443007449429 - }, - "miracl_retrieval": { - "ndcg@10": 0.14756204576714857 - }, - "mldr_retrieval": { - "ndcg@10": 0.16862391555076126 - }, - "mrtydi": { - "ndcg@10": 0.07770347901718931 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.5689006657309228 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.5911474254499767 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.618101892252404 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.3287673013916751 - } - }, - "STS": { - "jsick": { - "spearman": 0.7893346270810556 - }, - "jsts": { - "spearman": 0.7657111966582518 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.48558605187442483 - }, - "mewsc16": { - "v_measure_score": 0.4319848997472401 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.3860004176729398 - } - } -} \ No newline at end of file diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json deleted file mode 100644 index ebc1037..0000000 --- a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7430232193667698 - }, - "amazon_review_classification": { - "macro_f1": 0.5196833867285527 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8969457721352727 - }, - "massive_intent_classification": { - "macro_f1": 0.7782504182162112 - }, - "massive_scenario_classification": { - "macro_f1": 0.8459551634050977 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8382321236746973 - }, - "wrime_classification": { - "macro_f1": 0.3814631725334783 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.906706098295787 - }, - "jacwir_reranking": { - "ndcg@10": 0.581551030502223 - }, - "jqara": { - "ndcg@10": 0.3666097794082717 - }, - "miracl_reranking": { - "ndcg@10": 0.6908907697836885 - }, - "mldr_reranking": { - "ndcg@10": 0.8615323536010276 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.39917758524262303 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.4460371569059824 - }, - "jaqket": { - "ndcg@10": 0.3845053301501902 - }, - "mintaka_retrieval": { - "ndcg@10": 0.2239147895010841 - }, - "miracl_retrieval": { - "ndcg@10": 0.13942471586306499 - }, - "mldr_retrieval": { - "ndcg@10": 0.139069576010256 - }, - "mrtydi": { - "ndcg@10": 0.07299085059942924 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.5835049460335981 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.5863133806218087 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.5743459511193183 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.32465205260710006 - } - }, - "STS": { - "jsick": { - "spearman": 0.7525289500265361 - }, - "jsts": { - "spearman": 0.7466329702466956 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.45840176801621957 - }, - "mewsc16": { - "v_measure_score": 0.4407932537977668 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.38669286929581886 - } - } -} \ No newline at end of file diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json deleted file mode 100644 index 46f5e26..0000000 --- a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7640029182013914 - }, - "amazon_review_classification": { - "macro_f1": 0.5165133824101508 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8785996540635361 - }, - "massive_intent_classification": { - "macro_f1": 0.7815141648175687 - }, - "massive_scenario_classification": { - "macro_f1": 0.8643739735863134 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8179797886754027 - }, - "wrime_classification": { - "macro_f1": 0.37929751450328747 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9116742957456255 - }, - "jacwir_reranking": { - "ndcg@10": 0.6540921936468603 - }, - "jqara": { - "ndcg@10": 0.3839109493881204 - }, - "miracl_reranking": { - "ndcg@10": 0.7018821974047713 - }, - "mldr_reranking": { - "ndcg@10": 0.8442037101394532 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.4895140949755706 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.48413330907538854 - }, - "jaqket": { - "ndcg@10": 0.3872950509227257 - }, - "mintaka_retrieval": { - "ndcg@10": 0.25723625707011927 - }, - "miracl_retrieval": { - "ndcg@10": 0.2159968215066114 - }, - "mldr_retrieval": { - "ndcg@10": 0.18105368261359917 - }, - "mrtydi": { - "ndcg@10": 0.11016096912346693 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.5890880676571459 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.6005134171957127 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.691482229451667 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.377200379602747 - } - }, - "STS": { - "jsick": { - "spearman": 0.7914302448138066 - }, - "jsts": { - "spearman": 0.7677275529386515 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.4879255424919774 - }, - "mewsc16": { - "v_measure_score": 0.42611073323310256 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.2641681900458691 - } - } -} \ No newline at end of file diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json deleted file mode 100644 index dad1d0c..0000000 --- a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7767065011282246 - }, - "amazon_review_classification": { - "macro_f1": 0.5348080733659045 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8928165629175933 - }, - "massive_intent_classification": { - "macro_f1": 0.7678594675802368 - }, - "massive_scenario_classification": { - "macro_f1": 0.8624414954250645 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8376983111767246 - }, - "wrime_classification": { - "macro_f1": 0.4088843388537483 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9093431066849924 - }, - "jacwir_reranking": { - "ndcg@10": 0.6144762455614383 - }, - "jqara": { - "ndcg@10": 0.42466871751866847 - }, - "miracl_reranking": { - "ndcg@10": 0.7065312090166875 - }, - "mldr_reranking": { - "ndcg@10": 0.8742363417086798 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.4627911424268102 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.4824617060944974 - }, - "jaqket": { - "ndcg@10": 0.4416882664197474 - }, - "mintaka_retrieval": { - "ndcg@10": 0.28888654887615833 - }, - "miracl_retrieval": { - "ndcg@10": 0.1951539369285861 - }, - "mldr_retrieval": { - "ndcg@10": 0.18656064853165188 - }, - "mrtydi": { - "ndcg@10": 0.11438786651077741 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.439694854198857 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.40326645532241284 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.6048895627840009 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.36508949429446635 - } - }, - "STS": { - "jsick": { - "spearman": 0.7876474308902304 - }, - "jsts": { - "spearman": 0.7782114794698556 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5129910499369752 - }, - "mewsc16": { - "v_measure_score": 0.46267377071476495 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.3603960521680572 - } - } -} \ No newline at end of file diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json deleted file mode 100644 index 3101473..0000000 --- a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7655145272700131 - }, - "amazon_review_classification": { - "macro_f1": 0.5273281594091623 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8821782850442395 - }, - "massive_intent_classification": { - "macro_f1": 0.772169445045981 - }, - "massive_scenario_classification": { - "macro_f1": 0.8625146467158739 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8145447793317748 - }, - "wrime_classification": { - "macro_f1": 0.40382215327142257 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9130235242422614 - }, - "jacwir_reranking": { - "ndcg@10": 0.6513884390883999 - }, - "jqara": { - "ndcg@10": 0.44959095699445484 - }, - "miracl_reranking": { - "ndcg@10": 0.7121442551193732 - }, - "mldr_reranking": { - "ndcg@10": 0.8679395106334268 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.5316167737103407 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.5120263378587457 - }, - "jaqket": { - "ndcg@10": 0.45810454318653493 - }, - "mintaka_retrieval": { - "ndcg@10": 0.30420713299186014 - }, - "miracl_retrieval": { - "ndcg@10": 0.260782337674165 - }, - "mldr_retrieval": { - "ndcg@10": 0.23652695166828322 - }, - "mrtydi": { - "ndcg@10": 0.1306190778426387 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.5464834936384055 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.5213267121181618 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.7412764112062588 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.4220927003134505 - } - }, - "STS": { - "jsick": { - "spearman": 0.7985649981589037 - }, - "jsts": { - "spearman": 0.7813825399856615 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5159318544938132 - }, - "mewsc16": { - "v_measure_score": 0.4267958807672512 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.3178045302473092 - } - } -} \ No newline at end of file diff --git a/docs/results/OpenAI/text-embedding-3-large/summary.json b/docs/results/OpenAI/text-embedding-3-large/summary.json deleted file mode 100644 index cf5b699..0000000 --- a/docs/results/OpenAI/text-embedding-3-large/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7789727938896414 - }, - "amazon_review_classification": { - "macro_f1": 0.6043632319384946 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9689111460113327 - }, - "massive_intent_classification": { - "macro_f1": 0.8090871295952566 - }, - "massive_scenario_classification": { - "macro_f1": 0.9108443051510002 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8785070638424861 - }, - "wrime_classification": { - "macro_f1": 0.45837220696591946 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9358042266852659 - }, - "jacwir_reranking": { - "ndcg@10": 0.8678014849879991 - }, - "jqara": { - "ndcg@10": 0.5688951496540466 - }, - "miracl_reranking": { - "ndcg@10": 0.8379796888542357 - }, - "mldr_reranking": { - "ndcg@10": 0.9423911330344104 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8290267731484572 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7240937077183436 - }, - "jaqket": { - "ndcg@10": 0.48208863565793814 - }, - "mintaka_retrieval": { - "ndcg@10": 0.6351669096573943 - }, - "miracl_retrieval": { - "ndcg@10": 0.6056623188124566 - }, - "mldr_retrieval": { - "ndcg@10": 0.4526315025094686 - }, - "mrtydi": { - "ndcg@10": 0.3488438390945784 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.923732838888777 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9932811349540317 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9655113335080678 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.9547126796600445 - } - }, - "STS": { - "jsick": { - "spearman": 0.8126909906411093 - }, - "jsts": { - "spearman": 0.8376863979620452 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5708905391944281 - }, - "mewsc16": { - "v_measure_score": 0.4955424351458981 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.4882897499806697 - } - } -} \ No newline at end of file diff --git a/docs/results/OpenAI/text-embedding-3-small/summary.json b/docs/results/OpenAI/text-embedding-3-small/summary.json deleted file mode 100644 index ccfdccb..0000000 --- a/docs/results/OpenAI/text-embedding-3-small/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7000818608185178 - }, - "amazon_review_classification": { - "macro_f1": 0.5592259673654241 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8997314741995592 - }, - "massive_intent_classification": { - "macro_f1": 0.7766119663088307 - }, - "massive_scenario_classification": { - "macro_f1": 0.8866536867311439 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8472270726472407 - }, - "wrime_classification": { - "macro_f1": 0.4005292604550654 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9291728102678644 - }, - "jacwir_reranking": { - "ndcg@10": 0.8472076343603366 - }, - "jqara": { - "ndcg@10": 0.3858424853310068 - }, - "miracl_reranking": { - "ndcg@10": 0.7761045097931168 - }, - "mldr_reranking": { - "ndcg@10": 0.9261211375496474 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.7958409152797974 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.640150048193537 - }, - "jaqket": { - "ndcg@10": 0.3394304922804131 - }, - "mintaka_retrieval": { - "ndcg@10": 0.3243993062339528 - }, - "miracl_retrieval": { - "ndcg@10": 0.4844750116221409 - }, - "mldr_retrieval": { - "ndcg@10": 0.35067885909631535 - }, - "mrtydi": { - "ndcg@10": 0.2002984123046011 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.8583248954344459 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9846617848570168 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9170440283351765 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.9017272741306225 - } - }, - "STS": { - "jsick": { - "spearman": 0.8083062989093882 - }, - "jsts": { - "spearman": 0.7808357024283473 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5457015968799334 - }, - "mewsc16": { - "v_measure_score": 0.4755374215259236 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.44591888262353296 - } - } -} \ No newline at end of file diff --git a/docs/results/OpenAI/text-embedding-ada-002/summary.json b/docs/results/OpenAI/text-embedding-ada-002/summary.json deleted file mode 100644 index 851c798..0000000 --- a/docs/results/OpenAI/text-embedding-ada-002/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.6441904761904762 - }, - "amazon_review_classification": { - "macro_f1": 0.5312953134953877 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8876337189807528 - }, - "massive_intent_classification": { - "macro_f1": 0.7457150118928685 - }, - "massive_scenario_classification": { - "macro_f1": 0.8689044829586676 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8039306302437722 - }, - "wrime_classification": { - "macro_f1": 0.3757375090991345 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9303611831749345 - }, - "jacwir_reranking": { - "ndcg@10": 0.8391440408595291 - }, - "jqara": { - "ndcg@10": 0.37540986441296365 - }, - "miracl_reranking": { - "ndcg@10": 0.7282642345185789 - }, - "mldr_reranking": { - "ndcg@10": 0.9082852722613336 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.7807563383397835 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.6102270226904314 - }, - "jaqket": { - "ndcg@10": 0.4256467956806472 - }, - "mintaka_retrieval": { - "ndcg@10": 0.27093020670377677 - }, - "mrtydi": { - "ndcg@10": 0.1450739420851161 - }, - "miracl_retrieval": { - "ndcg@10": 0.3453600176817199 - }, - "mldr_retrieval": { - "ndcg@10": 0.3189777971587629 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.9750618854208265 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9499224324391132 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9123300358752942 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.8197798210453923 - } - }, - "STS": { - "jsick": { - "spearman": 0.7909435250482901 - }, - "jsts": { - "spearman": 0.7894052744557472 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.4967445737496721 - }, - "mewsc16": { - "v_measure_score": 0.4691938182964486 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.49744578060685957 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-base-v2/summary.json b/docs/results/cl-nagoya/ruri-base-v2/summary.json deleted file mode 100644 index c090ce8..0000000 --- a/docs/results/cl-nagoya/ruri-base-v2/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7597182825660609 - }, - "amazon_review_classification": { - "macro_f1": 0.5554544939941979 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9235657959062215 - }, - "massive_intent_classification": { - "macro_f1": 0.8092593406289539 - }, - "massive_scenario_classification": { - "macro_f1": 0.8886710878440421 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8926416828413609 - }, - "wrime_classification": { - "macro_f1": 0.461674192977988 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9317155624145913 - }, - "jacwir_reranking": { - "ndcg@10": 0.8576025511447865 - }, - "jqara": { - "ndcg@10": 0.6066458919871698 - }, - "miracl_reranking": { - "ndcg@10": 0.842561072326263 - }, - "mldr_reranking": { - "ndcg@10": 0.8846847676615118 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8101096413526069 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7590325308586044 - }, - "jaqket": { - "ndcg@10": 0.5700921243106366 - }, - "mintaka_retrieval": { - "ndcg@10": 0.4417665675636218 - }, - "miracl_retrieval": { - "ndcg@10": 0.6821942595823656 - }, - "mldr_retrieval": { - "ndcg@10": 0.3773323411085737 - }, - "mrtydi": { - "ndcg@10": 0.4088554217076187 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.8805294567802572 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.8973083823806287 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9696059096853805 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.789314612552914 - } - }, - "STS": { - "jsick": { - "spearman": 0.8262585834114126 - }, - "jsts": { - "spearman": 0.8343314248100878 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5437561090974637 - }, - "mewsc16": { - "v_measure_score": 0.5060934807171409 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.3553392136864812 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-base/summary.json b/docs/results/cl-nagoya/ruri-base/summary.json deleted file mode 100644 index 591ccd2..0000000 --- a/docs/results/cl-nagoya/ruri-base/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7665550732749669 - }, - "amazon_review_classification": { - "macro_f1": 0.5602315794414631 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.916854859845768 - }, - "massive_intent_classification": { - "macro_f1": 0.8122217429688374 - }, - "massive_scenario_classification": { - "macro_f1": 0.8861454528496383 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8773434580133629 - }, - "wrime_classification": { - "macro_f1": 0.4546702469392619 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9291919623555276 - }, - "jacwir_reranking": { - "ndcg@10": 0.8723926273423869 - }, - "jqara": { - "ndcg@10": 0.5415330056104515 - }, - "miracl_reranking": { - "ndcg@10": 0.7921821114257664 - }, - "mldr_reranking": { - "ndcg@10": 0.8801076117078023 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8247892121220626 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7550451217031677 - }, - "jaqket": { - "ndcg@10": 0.5023277717264268 - }, - "mintaka_retrieval": { - "ndcg@10": 0.45371270319906437 - }, - "miracl_retrieval": { - "ndcg@10": 0.5488453168704391 - }, - "mldr_retrieval": { - "ndcg@10": 0.35421737773497164 - }, - "mrtydi": { - "ndcg@10": 0.3558845666232437 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.8664858820958761 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.8723253192804757 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.952690372948545 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.7624967518065642 - } - }, - "STS": { - "jsick": { - "spearman": 0.8232158602892652 - }, - "jsts": { - "spearman": 0.8343499347567392 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5669485444435229 - }, - "mewsc16": { - "v_measure_score": 0.5205022529269108 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.3854934527391879 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-large-v2/summary.json b/docs/results/cl-nagoya/ruri-large-v2/summary.json deleted file mode 100644 index e4a22b7..0000000 --- a/docs/results/cl-nagoya/ruri-large-v2/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7950890220234579 - }, - "amazon_review_classification": { - "macro_f1": 0.5708906806011181 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.935661827685557 - }, - "massive_intent_classification": { - "macro_f1": 0.8087242075730218 - }, - "massive_scenario_classification": { - "macro_f1": 0.8970775785938794 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8471804883814585 - }, - "wrime_classification": { - "macro_f1": 0.47233151152826275 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9321133927024134 - }, - "jacwir_reranking": { - "ndcg@10": 0.8529056816630052 - }, - "jqara": { - "ndcg@10": 0.644692559122629 - }, - "miracl_reranking": { - "ndcg@10": 0.857799148388121 - }, - "mldr_reranking": { - "ndcg@10": 0.9068464851749977 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8048616669652183 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7822527313926262 - }, - "jaqket": { - "ndcg@10": 0.6561070613824674 - }, - "mintaka_retrieval": { - "ndcg@10": 0.5040548535978852 - }, - "miracl_retrieval": { - "ndcg@10": 0.7046000072363299 - }, - "mldr_retrieval": { - "ndcg@10": 0.36969618230893564 - }, - "mrtydi": { - "ndcg@10": 0.4636780745156557 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.9085158509835447 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9114732359476821 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.977434890774318 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.8232131912662143 - } - }, - "STS": { - "jsick": { - "spearman": 0.8212250726981067 - }, - "jsts": { - "spearman": 0.8424300570470996 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5562089376369613 - }, - "mewsc16": { - "v_measure_score": 0.509675337301281 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.4605817648504685 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-large/summary.json b/docs/results/cl-nagoya/ruri-large/summary.json deleted file mode 100644 index 2e2cead..0000000 --- a/docs/results/cl-nagoya/ruri-large/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7950391460082398 - }, - "amazon_review_classification": { - "macro_f1": 0.5685184036314727 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9356380708493385 - }, - "massive_intent_classification": { - "macro_f1": 0.8209962603450597 - }, - "massive_scenario_classification": { - "macro_f1": 0.9002551808707712 - }, - "sib200_japanese_classification": { - "macro_f1": 0.852564312646895 - }, - "wrime_classification": { - "macro_f1": 0.46447181564392015 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9298778327436324 - }, - "jacwir_reranking": { - "ndcg@10": 0.8661076138203823 - }, - "jqara": { - "ndcg@10": 0.5958950681984889 - }, - "miracl_reranking": { - "ndcg@10": 0.8022791978749706 - }, - "mldr_reranking": { - "ndcg@10": 0.8690504682983363 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8169123630823522 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7763829985024149 - }, - "jaqket": { - "ndcg@10": 0.617343261611166 - }, - "mintaka_retrieval": { - "ndcg@10": 0.5106450721691843 - }, - "miracl_retrieval": { - "ndcg@10": 0.5547009159538185 - }, - "mldr_retrieval": { - "ndcg@10": 0.3476835812045506 - }, - "mrtydi": { - "ndcg@10": 0.38120908812619875 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.8652992529882778 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.8891161860918603 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9617411892426375 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.7922108957487803 - } - }, - "STS": { - "jsick": { - "spearman": 0.8199569498182433 - }, - "jsts": { - "spearman": 0.8426241685487486 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5443732953428371 - }, - "mewsc16": { - "v_measure_score": 0.5058998835740889 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.44757212682292163 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-small-v2/summary.json b/docs/results/cl-nagoya/ruri-small-v2/summary.json deleted file mode 100644 index eec64ee..0000000 --- a/docs/results/cl-nagoya/ruri-small-v2/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7767065011282246 - }, - "amazon_review_classification": { - "macro_f1": 0.5559888936165459 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8863640825159859 - }, - "massive_intent_classification": { - "macro_f1": 0.8199647165894474 - }, - "massive_scenario_classification": { - "macro_f1": 0.8816435555944846 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8156946375922746 - }, - "wrime_classification": { - "macro_f1": 0.452255956789983 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9320364061675573 - }, - "jacwir_reranking": { - "ndcg@10": 0.8818198634914105 - }, - "jqara": { - "ndcg@10": 0.5670420631375501 - }, - "miracl_reranking": { - "ndcg@10": 0.8332825788093644 - }, - "mldr_reranking": { - "ndcg@10": 0.9009377977029078 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8303842720270221 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7401670430071696 - }, - "jaqket": { - "ndcg@10": 0.6225429070303006 - }, - "mintaka_retrieval": { - "ndcg@10": 0.3530718504041533 - }, - "miracl_retrieval": { - "ndcg@10": 0.6689773236918534 - }, - "mldr_retrieval": { - "ndcg@10": 0.32577528652704146 - }, - "mrtydi": { - "ndcg@10": 0.42400768916861914 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.9064650891678154 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9041671364705328 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9729556994161748 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.7821156819492701 - } - }, - "STS": { - "jsick": { - "spearman": 0.8387675357095226 - }, - "jsts": { - "spearman": 0.8193470885317312 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5260577746749562 - }, - "mewsc16": { - "v_measure_score": 0.4947076915300828 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.47820319421479446 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-small/summary.json b/docs/results/cl-nagoya/ruri-small/summary.json deleted file mode 100644 index 079db3e..0000000 --- a/docs/results/cl-nagoya/ruri-small/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.8055421233612723 - }, - "amazon_review_classification": { - "macro_f1": 0.5541385299441624 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8885932202820669 - }, - "massive_intent_classification": { - "macro_f1": 0.8108237159349728 - }, - "massive_scenario_classification": { - "macro_f1": 0.8800077744996155 - }, - "sib200_japanese_classification": { - "macro_f1": 0.839667353042202 - }, - "wrime_classification": { - "macro_f1": 0.4595261443020403 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9301438020851305 - }, - "jacwir_reranking": { - "ndcg@10": 0.8766726074179287 - }, - "jqara": { - "ndcg@10": 0.5325863556709908 - }, - "miracl_reranking": { - "ndcg@10": 0.7783787989685144 - }, - "mldr_reranking": { - "ndcg@10": 0.8813650067339368 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.825837748200516 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.740126693753929 - }, - "jaqket": { - "ndcg@10": 0.4844203596195783 - }, - "mintaka_retrieval": { - "ndcg@10": 0.3723496207549938 - }, - "miracl_retrieval": { - "ndcg@10": 0.5222032466588368 - }, - "mldr_retrieval": { - "ndcg@10": 0.2898890422890513 - }, - "mrtydi": { - "ndcg@10": 0.3351374258570715 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.8689213841203763 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.8723259697162892 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9619567235021281 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.7608782792491423 - } - }, - "STS": { - "jsick": { - "spearman": 0.8344934497771457 - }, - "jsts": { - "spearman": 0.8213145808052514 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5289736036070719 - }, - "mewsc16": { - "v_measure_score": 0.4936801242208388 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.46507426407220503 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-v3-130m/summary.json b/docs/results/cl-nagoya/ruri-v3-130m/summary.json deleted file mode 100644 index 5700f32..0000000 --- a/docs/results/cl-nagoya/ruri-v3-130m/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7674793827265536 - }, - "amazon_review_classification": { - "macro_f1": 0.5955994619477079 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9500285886600925 - }, - "massive_intent_classification": { - "macro_f1": 0.807938642045445 - }, - "massive_scenario_classification": { - "macro_f1": 0.8790346026671575 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8287806075978352 - }, - "wrime_classification": { - "macro_f1": 0.46634901067800855 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9336981049156847 - }, - "jacwir_reranking": { - "ndcg@10": 0.8864670177419038 - }, - "jqara": { - "ndcg@10": 0.663018840039673 - }, - "miracl_reranking": { - "ndcg@10": 0.865876689917921 - }, - "mldr_reranking": { - "ndcg@10": 0.9362058245511219 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8421113535976967 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7532393338902414 - }, - "jaqket": { - "ndcg@10": 0.730979460582779 - }, - "mintaka_retrieval": { - "ndcg@10": 0.5177034569356731 - }, - "miracl_retrieval": { - "ndcg@10": 0.7100959869376436 - }, - "mldr_retrieval": { - "ndcg@10": 0.45158335316076936 - }, - "mrtydi": { - "ndcg@10": 0.4780012151028164 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.995144547086835 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9887952520028016 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9795152116360624 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.9628103840588119 - } - }, - "STS": { - "jsick": { - "spearman": 0.7885956280300046 - }, - "jsts": { - "spearman": 0.8323603869543141 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5436288048604071 - }, - "mewsc16": { - "v_measure_score": 0.4883532965483729 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.5019988844015973 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-v3-30m/summary.json b/docs/results/cl-nagoya/ruri-v3-30m/summary.json deleted file mode 100644 index c4e768a..0000000 --- a/docs/results/cl-nagoya/ruri-v3-30m/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7559571782387728 - }, - "amazon_review_classification": { - "macro_f1": 0.5570789457429248 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9262839486939813 - }, - "massive_intent_classification": { - "macro_f1": 0.783074979041957 - }, - "massive_scenario_classification": { - "macro_f1": 0.8672396605716526 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8140481078951145 - }, - "wrime_classification": { - "macro_f1": 0.4311261750368354 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9305651903486406 - }, - "jacwir_reranking": { - "ndcg@10": 0.8761294751423317 - }, - "jqara": { - "ndcg@10": 0.5747490185208084 - }, - "miracl_reranking": { - "ndcg@10": 0.8352458113588647 - }, - "mldr_reranking": { - "ndcg@10": 0.9297421530365237 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.827028266156452 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7020872105862214 - }, - "jaqket": { - "ndcg@10": 0.6244733500896729 - }, - "mintaka_retrieval": { - "ndcg@10": 0.4304756847175998 - }, - "miracl_retrieval": { - "ndcg@10": 0.6498916988979277 - }, - "mldr_retrieval": { - "ndcg@10": 0.4577076048703079 - }, - "mrtydi": { - "ndcg@10": 0.41775750844113785 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.9876046427100846 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9916030162169887 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9699245797579602 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.9534027111106339 - } - }, - "STS": { - "jsick": { - "spearman": 0.8161946935797372 - }, - "jsts": { - "spearman": 0.819463211043541 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5369067977199252 - }, - "mewsc16": { - "v_measure_score": 0.47961175798341066 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.4804316290090649 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-v3-310m/summary.json b/docs/results/cl-nagoya/ruri-v3-310m/summary.json deleted file mode 100644 index c27fed8..0000000 --- a/docs/results/cl-nagoya/ruri-v3-310m/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.8009270010529765 - }, - "amazon_review_classification": { - "macro_f1": 0.6071898527482484 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9530657500380437 - }, - "massive_intent_classification": { - "macro_f1": 0.8176293812793415 - }, - "massive_scenario_classification": { - "macro_f1": 0.890051922198645 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8812655271153628 - }, - "wrime_classification": { - "macro_f1": 0.4852854023445756 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9342725351989479 - }, - "jacwir_reranking": { - "ndcg@10": 0.8845859005757672 - }, - "jqara": { - "ndcg@10": 0.6893206802955604 - }, - "miracl_reranking": { - "ndcg@10": 0.8500853284469898 - }, - "mldr_reranking": { - "ndcg@10": 0.9335769070370818 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8406411130636801 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7648595155366429 - }, - "jaqket": { - "ndcg@10": 0.7186721885111346 - }, - "mintaka_retrieval": { - "ndcg@10": 0.5225348075920366 - }, - "miracl_retrieval": { - "ndcg@10": 0.677145342243983 - }, - "mldr_retrieval": { - "ndcg@10": 0.43425275955863796 - }, - "mrtydi": { - "ndcg@10": 0.47064490316120666 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.9958682142366949 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9935172926595653 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9790717306095701 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.9658294271714906 - } - }, - "STS": { - "jsick": { - "spearman": 0.7886332339318622 - }, - "jsts": { - "spearman": 0.8430847366018317 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5855988614657296 - }, - "mewsc16": { - "v_measure_score": 0.4860478393120035 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.4440626045366051 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/ruri-v3-70m/summary.json b/docs/results/cl-nagoya/ruri-v3-70m/summary.json deleted file mode 100644 index 3a2c52d..0000000 --- a/docs/results/cl-nagoya/ruri-v3-70m/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.8180877928218353 - }, - "amazon_review_classification": { - "macro_f1": 0.5798379850008339 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9339140455312027 - }, - "massive_intent_classification": { - "macro_f1": 0.7891754112354649 - }, - "massive_scenario_classification": { - "macro_f1": 0.8782518076402043 - }, - "sib200_japanese_classification": { - "macro_f1": 0.7686616284901401 - }, - "wrime_classification": { - "macro_f1": 0.4437562280187194 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9320237969329785 - }, - "jacwir_reranking": { - "ndcg@10": 0.8748197118530385 - }, - "jqara": { - "ndcg@10": 0.6309432249818713 - }, - "miracl_reranking": { - "ndcg@10": 0.8503057292439823 - }, - "mldr_reranking": { - "ndcg@10": 0.9225778620264797 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8275893500639571 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7327144021448485 - }, - "jaqket": { - "ndcg@10": 0.6768047159335538 - }, - "mintaka_retrieval": { - "ndcg@10": 0.4626106409683068 - }, - "miracl_retrieval": { - "ndcg@10": 0.6797764462851262 - }, - "mldr_retrieval": { - "ndcg@10": 0.43554376517918675 - }, - "mrtydi": { - "ndcg@10": 0.4499999994407917 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.984966699117648 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9868218521221748 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9706955197203543 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.9573354583951488 - } - }, - "STS": { - "jsick": { - "spearman": 0.7909930894957667 - }, - "jsts": { - "spearman": 0.828242284804404 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5492094636693866 - }, - "mewsc16": { - "v_measure_score": 0.47739615416643866 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.4719940146272088 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json b/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json deleted file mode 100644 index 91d272c..0000000 --- a/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7192545517004465 - }, - "amazon_review_classification": { - "macro_f1": 0.5454422812215437 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9100588500656168 - }, - "massive_intent_classification": { - "macro_f1": 0.8011172170046241 - }, - "massive_scenario_classification": { - "macro_f1": 0.8762609424720998 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8191722798191963 - }, - "wrime_classification": { - "macro_f1": 0.4188203301151871 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9184207070049463 - }, - "jacwir_reranking": { - "ndcg@10": 0.6426611140199804 - }, - "jqara": { - "ndcg@10": 0.3748362133870952 - }, - "miracl_reranking": { - "ndcg@10": 0.7087840971938433 - }, - "mldr_reranking": { - "ndcg@10": 0.8734013475096433 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.5331630522529377 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.5202480516932524 - }, - "jaqket": { - "ndcg@10": 0.5013089667314551 - }, - "mintaka_retrieval": { - "ndcg@10": 0.3288294149496304 - }, - "miracl_retrieval": { - "ndcg@10": 0.20681341934572967 - }, - "mldr_retrieval": { - "ndcg@10": 0.24700329716018354 - }, - "mrtydi": { - "ndcg@10": 0.141360680613414 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.6909104560170936 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.6619434888289687 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.6484407439307039 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.4696725603511326 - } - }, - "STS": { - "jsick": { - "spearman": 0.8283659349049672 - }, - "jsts": { - "spearman": 0.8126484380435667 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5511252826598367 - }, - "mewsc16": { - "v_measure_score": 0.5339141639252604 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.49207894013578146 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json b/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json deleted file mode 100644 index c2b5a3e..0000000 --- a/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7260568612881779 - }, - "amazon_review_classification": { - "macro_f1": 0.5455832826466495 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8942024454984163 - }, - "massive_intent_classification": { - "macro_f1": 0.792273118014186 - }, - "massive_scenario_classification": { - "macro_f1": 0.8770657195206764 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8042709569831964 - }, - "wrime_classification": { - "macro_f1": 0.4525777476393026 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9149640515619839 - }, - "jacwir_reranking": { - "ndcg@10": 0.5614550878114778 - }, - "jqara": { - "ndcg@10": 0.38302855218604437 - }, - "miracl_reranking": { - "ndcg@10": 0.7126433285790728 - }, - "mldr_reranking": { - "ndcg@10": 0.8659821811381412 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.4370774500135088 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.47421467281855384 - }, - "jaqket": { - "ndcg@10": 0.4004385277719307 - }, - "mintaka_retrieval": { - "ndcg@10": 0.376774984849213 - }, - "miracl_retrieval": { - "ndcg@10": 0.18125969161337505 - }, - "mldr_retrieval": { - "ndcg@10": 0.23480755788261093 - }, - "mrtydi": { - "ndcg@10": 0.1188048690188868 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.6407825080386719 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.6295135121177772 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.36949537039923136 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.2490316613470849 - } - }, - "STS": { - "jsick": { - "spearman": 0.8377753687267541 - }, - "jsts": { - "spearman": 0.8256006176068381 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5337915256082275 - }, - "mewsc16": { - "v_measure_score": 0.5125821768154618 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.45736658859438273 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json b/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json deleted file mode 100644 index 2cc2181..0000000 --- a/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7364790582283407 - }, - "amazon_review_classification": { - "macro_f1": 0.5413541626836352 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8986588956343088 - }, - "massive_intent_classification": { - "macro_f1": 0.7767897385750657 - }, - "massive_scenario_classification": { - "macro_f1": 0.8610390686035142 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8413013579577491 - }, - "wrime_classification": { - "macro_f1": 0.41309966752995253 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9117818311636607 - }, - "jacwir_reranking": { - "ndcg@10": 0.5154239181007129 - }, - "jqara": { - "ndcg@10": 0.3218696921394324 - }, - "miracl_reranking": { - "ndcg@10": 0.6995597032253587 - }, - "mldr_reranking": { - "ndcg@10": 0.8612256071032377 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.35106925427500363 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.4673719618749888 - }, - "jaqket": { - "ndcg@10": 0.3951670829019162 - }, - "mintaka_retrieval": { - "ndcg@10": 0.299231152726057 - }, - "miracl_retrieval": { - "ndcg@10": 0.10934136213023636 - }, - "mldr_retrieval": { - "ndcg@10": 0.15981611825721914 - }, - "mrtydi": { - "ndcg@10": 0.055133639963568334 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.582165240647806 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.5841104498413489 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.55577879846708 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.3284050897756761 - } - }, - "STS": { - "jsick": { - "spearman": 0.7852600594448598 - }, - "jsts": { - "spearman": 0.7894496424482047 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5065452260003059 - }, - "mewsc16": { - "v_measure_score": 0.39578933501406055 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.3362930091678794 - } - } -} \ No newline at end of file diff --git a/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json b/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json deleted file mode 100644 index 09525c9..0000000 --- a/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7640316468319925 - }, - "amazon_review_classification": { - "macro_f1": 0.5504736753163985 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9057099704855596 - }, - "massive_intent_classification": { - "macro_f1": 0.792495956569193 - }, - "massive_scenario_classification": { - "macro_f1": 0.8749858164207054 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8288719236604842 - }, - "wrime_classification": { - "macro_f1": 0.44326523397693174 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9094836571513687 - }, - "jacwir_reranking": { - "ndcg@10": 0.5417192948613557 - }, - "jqara": { - "ndcg@10": 0.3877939946491903 - }, - "miracl_reranking": { - "ndcg@10": 0.7001887861606321 - }, - "mldr_reranking": { - "ndcg@10": 0.8303617273610736 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.37613574135010835 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.46564010373437337 - }, - "jaqket": { - "ndcg@10": 0.3452888488420233 - }, - "mintaka_retrieval": { - "ndcg@10": 0.3058130510308383 - }, - "miracl_retrieval": { - "ndcg@10": 0.10326154138228141 - }, - "mldr_retrieval": { - "ndcg@10": 0.12550430031143336 - }, - "mrtydi": { - "ndcg@10": 0.057502989435967655 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.504469050615059 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.5069650402920987 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.6043158227609278 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.34323430832579677 - } - }, - "STS": { - "jsick": { - "spearman": 0.8013849170804103 - }, - "jsts": { - "spearman": 0.809789575264219 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5147732775967515 - }, - "mewsc16": { - "v_measure_score": 0.44443267597570074 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.34646662604886447 - } - } -} \ No newline at end of file diff --git a/docs/results/colorfulscoop/sbert-base-ja/summary.json b/docs/results/colorfulscoop/sbert-base-ja/summary.json deleted file mode 100644 index 91ef6aa..0000000 --- a/docs/results/colorfulscoop/sbert-base-ja/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7080315613053877 - }, - "amazon_review_classification": { - "macro_f1": 0.4779713813897666 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8350239953633378 - }, - "massive_intent_classification": { - "macro_f1": 0.7288673932703351 - }, - "massive_scenario_classification": { - "macro_f1": 0.8370655127879382 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8262660922438109 - }, - "wrime_classification": { - "macro_f1": 0.35057897749310646 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.8996866702578056 - }, - "jacwir_reranking": { - "ndcg@10": 0.37147215136686634 - }, - "jqara": { - "ndcg@10": 0.2220517076242275 - }, - "miracl_reranking": { - "ndcg@10": 0.6502702968219343 - }, - "mldr_reranking": { - "ndcg@10": 0.8255483571039144 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.192984468642645 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.21704292684612675 - }, - "jaqket": { - "ndcg@10": 0.13139887002144995 - }, - "mintaka_retrieval": { - "ndcg@10": 0.19067862146114167 - }, - "miracl_retrieval": { - "ndcg@10": 0.018598782450328283 - }, - "mldr_retrieval": { - "ndcg@10": 0.06972936265190934 - }, - "mrtydi": { - "ndcg@10": 0.004126228941345733 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.29023294982669573 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.2580237968832312 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.21071404885072903 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.11573741610386916 - } - }, - "STS": { - "jsick": { - "spearman": 0.6656074999372202 - }, - "jsts": { - "spearman": 0.7425444938991701 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.4059869097583984 - }, - "mewsc16": { - "v_measure_score": 0.4617625340860209 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.3035702180528845 - } - } -} \ No newline at end of file diff --git a/docs/results/google/embeddinggemma-300m/summary.json b/docs/results/google/embeddinggemma-300m/summary.json deleted file mode 100644 index 1cbe1cd..0000000 --- a/docs/results/google/embeddinggemma-300m/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7473788045121156 - }, - "amazon_review_classification": { - "macro_f1": 0.5803989931720487 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9598578035045773 - }, - "massive_intent_classification": { - "macro_f1": 0.8007123314267398 - }, - "massive_scenario_classification": { - "macro_f1": 0.9058457580997293 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8691524520966505 - }, - "wrime_classification": { - "macro_f1": 0.46617181157351545 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9325852428034396 - }, - "jacwir_reranking": { - "ndcg@10": 0.8672290139012463 - }, - "jqara": { - "ndcg@10": 0.5208735587352208 - }, - "miracl_reranking": { - "ndcg@10": 0.8237547981136122 - }, - "mldr_reranking": { - "ndcg@10": 0.9019285986799139 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8107178459954021 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.6942509653422283 - }, - "jaqket": { - "ndcg@10": 0.6326539731698172 - }, - "mintaka_retrieval": { - "ndcg@10": 0.38634126517980316 - }, - "miracl_retrieval": { - "ndcg@10": 0.3527982534428366 - }, - "mldr_retrieval": { - "ndcg@10": 0.34664273718176375 - }, - "mrtydi": { - "ndcg@10": 0.13863867175417482 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.9934404877801122 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9902425863025213 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9611708983967426 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.9435055100669566 - } - }, - "STS": { - "jsick": { - "spearman": 0.8167115014804869 - }, - "jsts": { - "spearman": 0.8381005453815682 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.553278169293011 - }, - "mewsc16": { - "v_measure_score": 0.5055377268682895 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.4254674919395097 - } - } -} \ No newline at end of file diff --git a/docs/results/hotchpotch/static-embedding-japanese/summary.json b/docs/results/hotchpotch/static-embedding-japanese/summary.json deleted file mode 100644 index dea2123..0000000 --- a/docs/results/hotchpotch/static-embedding-japanese/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.6806231003039513 - }, - "amazon_review_classification": { - "macro_f1": 0.46807443888459704 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.7982203591912549 - }, - "massive_intent_classification": { - "macro_f1": 0.7479207001300227 - }, - "massive_scenario_classification": { - "macro_f1": 0.8218342894775092 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8333478541030553 - }, - "wrime_classification": { - "macro_f1": 0.32116037890073806 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.918697023137389 - }, - "jacwir_reranking": { - "ndcg@10": 0.8096474845962077 - }, - "jqara": { - "ndcg@10": 0.470607034824141 - }, - "miracl_reranking": { - "ndcg@10": 0.7201497903350694 - }, - "mldr_reranking": { - "ndcg@10": 0.9355298111228094 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.7227068099625594 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.5555106276533467 - }, - "jaqket": { - "ndcg@10": 0.6403798293637829 - }, - "mintaka_retrieval": { - "ndcg@10": 0.3893399585539267 - }, - "miracl_retrieval": { - "ndcg@10": 0.3261108514005591 - }, - "mldr_retrieval": { - "ndcg@10": 0.4251322740050699 - }, - "mrtydi": { - "ndcg@10": 0.1118466505474389 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.7618517724714088 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9573914637080742 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9036776565067465 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.862455457223212 - } - }, - "STS": { - "jsick": { - "spearman": 0.8251124620732032 - }, - "jsts": { - "spearman": 0.7781260135980573 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5143752588371998 - }, - "mewsc16": { - "v_measure_score": 0.34814733829489664 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.21465115117004985 - } - } -} \ No newline at end of file diff --git a/docs/results/intfloat/multilingual-e5-base/summary.json b/docs/results/intfloat/multilingual-e5-base/summary.json deleted file mode 100644 index 4d84be2..0000000 --- a/docs/results/intfloat/multilingual-e5-base/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.6428957534047911 - }, - "amazon_review_classification": { - "macro_f1": 0.5417258327796466 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9231910434886872 - }, - "massive_intent_classification": { - "macro_f1": 0.7318717264077053 - }, - "massive_scenario_classification": { - "macro_f1": 0.8677940980663801 - }, - "sib200_japanese_classification": { - "macro_f1": 0.785022714268383 - }, - "wrime_classification": { - "macro_f1": 0.3865061394465788 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9290148108090969 - }, - "jacwir_reranking": { - "ndcg@10": 0.8865491934939191 - }, - "jqara": { - "ndcg@10": 0.4761308479065645 - }, - "miracl_reranking": { - "ndcg@10": 0.8196779545649944 - }, - "mldr_reranking": { - "ndcg@10": 0.8614612823139557 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8431602298737804 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.687214041967885 - }, - "jaqket": { - "ndcg@10": 0.5169392915456349 - }, - "mintaka_retrieval": { - "ndcg@10": 0.34676383987252357 - }, - "miracl_retrieval": { - "ndcg@10": 0.6449511893902589 - }, - "mldr_retrieval": { - "ndcg@10": 0.2573147838464383 - }, - "mrtydi": { - "ndcg@10": 0.42298287793585587 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.8355946539433561 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.8447862631398672 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9461907998491789 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.7469571396756213 - } - }, - "STS": { - "jsick": { - "spearman": 0.8125544166626103 - }, - "jsts": { - "spearman": 0.7965480195299134 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5379041349111564 - }, - "mewsc16": { - "v_measure_score": 0.4943772106331262 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.4713134178805946 - } - } -} \ No newline at end of file diff --git a/docs/results/intfloat/multilingual-e5-large/summary.json b/docs/results/intfloat/multilingual-e5-large/summary.json deleted file mode 100644 index 40752a5..0000000 --- a/docs/results/intfloat/multilingual-e5-large/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.6969861236021963 - }, - "amazon_review_classification": { - "macro_f1": 0.5763612743026115 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9554866923455646 - }, - "massive_intent_classification": { - "macro_f1": 0.7401244088033258 - }, - "massive_scenario_classification": { - "macro_f1": 0.887053685338159 - }, - "sib200_japanese_classification": { - "macro_f1": 0.7811476853348774 - }, - "wrime_classification": { - "macro_f1": 0.42377599926222737 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9330712866652149 - }, - "jacwir_reranking": { - "ndcg@10": 0.9036816685131848 - }, - "jqara": { - "ndcg@10": 0.561374764136422 - }, - "miracl_reranking": { - "ndcg@10": 0.8631195198401651 - }, - "mldr_reranking": { - "ndcg@10": 0.8891328806594833 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8641271530674604 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7297746711291291 - }, - "jaqket": { - "ndcg@10": 0.5967326588135612 - }, - "mintaka_retrieval": { - "ndcg@10": 0.3958992445664435 - }, - "miracl_retrieval": { - "ndcg@10": 0.7095604570396511 - }, - "mldr_retrieval": { - "ndcg@10": 0.2984972238105224 - }, - "mrtydi": { - "ndcg@10": 0.4781603349494696 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.8326468852967057 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.8571088737195884 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.952870249874937 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.7257268520360993 - } - }, - "STS": { - "jsick": { - "spearman": 0.7985423882395024 - }, - "jsts": { - "spearman": 0.8186303902222064 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5157643001398088 - }, - "mewsc16": { - "v_measure_score": 0.46806674695304834 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.5334765362912619 - } - } -} \ No newline at end of file diff --git a/docs/results/intfloat/multilingual-e5-small/summary.json b/docs/results/intfloat/multilingual-e5-small/summary.json deleted file mode 100644 index 5a3add1..0000000 --- a/docs/results/intfloat/multilingual-e5-small/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.5866005078388893 - }, - "amazon_review_classification": { - "macro_f1": 0.5120598395740691 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8773239262941632 - }, - "massive_intent_classification": { - "macro_f1": 0.7134377059258787 - }, - "massive_scenario_classification": { - "macro_f1": 0.8676947906742417 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8177503141758454 - }, - "wrime_classification": { - "macro_f1": 0.36913347435432137 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9298402731760124 - }, - "jacwir_reranking": { - "ndcg@10": 0.8998812594907971 - }, - "jqara": { - "ndcg@10": 0.49280220404951935 - }, - "miracl_reranking": { - "ndcg@10": 0.8178461260193638 - }, - "mldr_reranking": { - "ndcg@10": 0.864145360860429 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8558160940470637 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.6568760244912849 - }, - "jaqket": { - "ndcg@10": 0.5157123960708363 - }, - "mintaka_retrieval": { - "ndcg@10": 0.3153737960263929 - }, - "miracl_retrieval": { - "ndcg@10": 0.6323300168472976 - }, - "mldr_retrieval": { - "ndcg@10": 0.2590832302769219 - }, - "mrtydi": { - "ndcg@10": 0.4236692119753354 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.8396508926780583 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.8409842458346825 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9447219194706624 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.7455737280382885 - } - }, - "STS": { - "jsick": { - "spearman": 0.8199946308873799 - }, - "jsts": { - "spearman": 0.7892106647109823 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5194355229712517 - }, - "mewsc16": { - "v_measure_score": 0.5233814767010047 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.43592128019411325 - } - } -} \ No newline at end of file diff --git a/docs/results/jinaai/jina-embeddings-v3/summary.json b/docs/results/jinaai/jina-embeddings-v3/summary.json deleted file mode 100644 index 8524862..0000000 --- a/docs/results/jinaai/jina-embeddings-v3/summary.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7882733929438857 - }, - "amazon_review_classification": { - "macro_f1": 0.5933239824757218 - }, - "massive_intent_classification": { - "macro_f1": 0.7765343277120157 - }, - "massive_scenario_classification": { - "macro_f1": 0.8974174944345525 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9271464336251287 - } - }, - "Retrieval": { - "jagovfaqs_22k": { - "ndcg@10": 0.7149884473155108 - }, - "jaqket": { - "ndcg@10": 0.46484206025698144 - }, - "mrtydi": { - "ndcg@10": 0.4544765083850943 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9843205562446103 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9561509620323349 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.9385000684351988 - } - }, - "STS": { - "jsick": { - "spearman": 0.781637470000662 - }, - "jsts": { - "spearman": 0.8193234425217734 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5472248713636514 - }, - "mewsc16": { - "v_measure_score": 0.4818974386694296 - } - }, - "PairClassification": { - "paws_x_ja": { - "binary_f1": 0.623716814159292 - } - } -} diff --git a/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json b/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json deleted file mode 100644 index 38e78b4..0000000 --- a/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7972419438068292 - }, - "amazon_review_classification": { - "macro_f1": 0.5802127224160758 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9199098092136551 - }, - "massive_intent_classification": { - "macro_f1": 0.8015558847211773 - }, - "massive_scenario_classification": { - "macro_f1": 0.8878291337617034 - }, - "sib200_japanese_classification": { - "macro_f1": 0.7731122315942124 - }, - "wrime_classification": { - "macro_f1": 0.4573111522822367 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9151322326635167 - }, - "jacwir_reranking": { - "ndcg@10": 0.6745048816141938 - }, - "jqara": { - "ndcg@10": 0.36039102371287524 - }, - "miracl_reranking": { - "ndcg@10": 0.6867643099800397 - }, - "mldr_reranking": { - "ndcg@10": 0.8538476294446257 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.5964999187333498 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.5407367959715127 - }, - "jaqket": { - "ndcg@10": 0.4021523812335328 - }, - "mintaka_retrieval": { - "ndcg@10": 0.2482827887837841 - }, - "miracl_retrieval": { - "ndcg@10": 0.17190013577864438 - }, - "mldr_retrieval": { - "ndcg@10": 0.19084474235068657 - }, - "mrtydi": { - "ndcg@10": 0.10090455185771262 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.44067635335327865 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.44837143094362086 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.7368252250653567 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.3115238718909808 - } - }, - "STS": { - "jsick": { - "spearman": 0.7203759702575281 - }, - "jsts": { - "spearman": 0.8107670759374308 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5170361974340975 - }, - "mewsc16": { - "v_measure_score": 0.5152481901891431 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.43034104597999767 - } - } -} \ No newline at end of file diff --git a/docs/results/pfnet/plamo-embedding-1b/summary.json b/docs/results/pfnet/plamo-embedding-1b/summary.json deleted file mode 100644 index bbd1ebe..0000000 --- a/docs/results/pfnet/plamo-embedding-1b/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7758538459902731 - }, - "amazon_review_classification": { - "macro_f1": 0.5947995518406083 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9172503242542154 - }, - "massive_intent_classification": { - "macro_f1": 0.8278794713377423 - }, - "massive_scenario_classification": { - "macro_f1": 0.8994521566290758 - }, - "sib200_japanese_classification": { - "macro_f1": 0.9031045220702235 - }, - "wrime_classification": { - "macro_f1": 0.4920234056704329 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9358806147164782 - }, - "jacwir_reranking": { - "ndcg@10": 0.9174123687849153 - }, - "jqara": { - "ndcg@10": 0.6614745715723234 - }, - "miracl_reranking": { - "ndcg@10": 0.8191089804461983 - }, - "mldr_reranking": { - "ndcg@10": 0.9187107530127357 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8891350347274469 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7902563114751548 - }, - "jaqket": { - "ndcg@10": 0.543879907336617 - }, - "mintaka_retrieval": { - "ndcg@10": 0.5455917771478032 - }, - "miracl_retrieval": { - "ndcg@10": 0.5991430810654191 - }, - "mldr_retrieval": { - "ndcg@10": 0.3668286739593277 - }, - "mrtydi": { - "ndcg@10": 0.4186565845821445 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.9765055597743824 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.990219021795052 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9862781050998647 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.9510769472900551 - } - }, - "STS": { - "jsick": { - "spearman": 0.81830804755845 - }, - "jsts": { - "spearman": 0.8446183418196836 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.6173644704637056 - }, - "mewsc16": { - "v_measure_score": 0.4802637594283387 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.4773483587781526 - } - } -} \ No newline at end of file diff --git a/docs/results/pkshatech/GLuCoSE-base-ja-v2/summary.json b/docs/results/pkshatech/GLuCoSE-base-ja-v2/summary.json deleted file mode 100644 index 6d1041e..0000000 --- a/docs/results/pkshatech/GLuCoSE-base-ja-v2/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7528271196943096 - }, - "amazon_review_classification": { - "macro_f1": 0.5518771080100612 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.892368025976312 - }, - "massive_intent_classification": { - "macro_f1": 0.7872725195473699 - }, - "massive_scenario_classification": { - "macro_f1": 0.8713846348082936 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8583089323083904 - }, - "wrime_classification": { - "macro_f1": 0.4323129039345514 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9301525338489429 - }, - "jacwir_reranking": { - "ndcg@10": 0.8827390816541736 - }, - "jqara": { - "ndcg@10": 0.6070225247152883 - }, - "miracl_reranking": { - "ndcg@10": 0.8243623644224994 - }, - "mldr_reranking": { - "ndcg@10": 0.887121388271364 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8385011452405416 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.6984652569482365 - }, - "jaqket": { - "ndcg@10": 0.6751948574643762 - }, - "mintaka_retrieval": { - "ndcg@10": 0.3957491894384977 - }, - "miracl_retrieval": { - "ndcg@10": 0.652881832622734 - }, - "mldr_retrieval": { - "ndcg@10": 0.3374776122444277 - }, - "mrtydi": { - "ndcg@10": 0.4167021902708705 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.899055473429718 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9008045583912581 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9566816164352073 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.757906107708436 - } - }, - "STS": { - "jsick": { - "spearman": 0.8494858386977019 - }, - "jsts": { - "spearman": 0.8095670694135243 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5446091559116468 - }, - "mewsc16": { - "v_measure_score": 0.4611859858929692 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.43979504978761347 - } - } -} \ No newline at end of file diff --git a/docs/results/pkshatech/GLuCoSE-base-ja/summary.json b/docs/results/pkshatech/GLuCoSE-base-ja/summary.json deleted file mode 100644 index 5a50ab4..0000000 --- a/docs/results/pkshatech/GLuCoSE-base-ja/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.8203088346974938 - }, - "amazon_review_classification": { - "macro_f1": 0.5793470941382456 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9289309593569228 - }, - "massive_intent_classification": { - "macro_f1": 0.7852003872158392 - }, - "massive_scenario_classification": { - "macro_f1": 0.8771105186592234 - }, - "sib200_japanese_classification": { - "macro_f1": 0.7723533533184818 - }, - "wrime_classification": { - "macro_f1": 0.48820317778534994 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9182072351783757 - }, - "jacwir_reranking": { - "ndcg@10": 0.7453523153562407 - }, - "jqara": { - "ndcg@10": 0.30235678517238046 - }, - "miracl_reranking": { - "ndcg@10": 0.7782487998017047 - }, - "mldr_reranking": { - "ndcg@10": 0.8742431547482784 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.6929937892822252 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.6414300605061649 - }, - "jaqket": { - "ndcg@10": 0.39775627519142726 - }, - "mintaka_retrieval": { - "ndcg@10": 0.2981097485323552 - }, - "miracl_retrieval": { - "ndcg@10": 0.4826861479972318 - }, - "mldr_retrieval": { - "ndcg@10": 0.2507030467719784 - }, - "mrtydi": { - "ndcg@10": 0.3013997193651328 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.7677861541704494 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.7720777474520221 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.8139955508348415 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.5843440022515908 - } - }, - "STS": { - "jsick": { - "spearman": 0.7489963692364312 - }, - "jsts": { - "spearman": 0.8246470658338377 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5040813114960272 - }, - "mewsc16": { - "v_measure_score": 0.4952409837584659 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.41426282292221306 - } - } -} \ No newline at end of file diff --git a/docs/results/pkshatech/RoSEtta-base-ja/summary.json b/docs/results/pkshatech/RoSEtta-base-ja/summary.json deleted file mode 100644 index 7951ed1..0000000 --- a/docs/results/pkshatech/RoSEtta-base-ja/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7021400751808275 - }, - "amazon_review_classification": { - "macro_f1": 0.5261693704750353 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8728387064627037 - }, - "massive_intent_classification": { - "macro_f1": 0.7958661089844552 - }, - "massive_scenario_classification": { - "macro_f1": 0.869642477269303 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8400507949086808 - }, - "wrime_classification": { - "macro_f1": 0.41243251223612126 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9267709447988313 - }, - "jacwir_reranking": { - "ndcg@10": 0.8682926176464301 - }, - "jqara": { - "ndcg@10": 0.5792158527364997 - }, - "miracl_reranking": { - "ndcg@10": 0.8038275156892214 - }, - "mldr_reranking": { - "ndcg@10": 0.8844542290758788 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8201713015308671 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.6627940635852495 - }, - "jaqket": { - "ndcg@10": 0.642772517951208 - }, - "mintaka_retrieval": { - "ndcg@10": 0.3404237377925581 - }, - "miracl_retrieval": { - "ndcg@10": 0.6016261958696313 - }, - "mldr_retrieval": { - "ndcg@10": 0.3236631225997826 - }, - "mrtydi": { - "ndcg@10": 0.36773428568023436 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.9604317247356383 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9541194598644321 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.931681815900694 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.821937205258955 - } - }, - "STS": { - "jsick": { - "spearman": 0.8383423614590403 - }, - "jsts": { - "spearman": 0.7894639448529204 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.4888541691163841 - }, - "mewsc16": { - "v_measure_score": 0.4515710456360326 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.4060764834036522 - } - } -} \ No newline at end of file diff --git a/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json b/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json deleted file mode 100644 index 5bbd9f7..0000000 --- a/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.6827876647194675 - }, - "amazon_review_classification": { - "macro_f1": 0.5175208911836656 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8821403624230039 - }, - "massive_intent_classification": { - "macro_f1": 0.7964832948145142 - }, - "massive_scenario_classification": { - "macro_f1": 0.8722583552883876 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8118131918956941 - }, - "wrime_classification": { - "macro_f1": 0.38393198133793865 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9127205853729194 - }, - "jacwir_reranking": { - "ndcg@10": 0.5745412347869042 - }, - "jqara": { - "ndcg@10": 0.31740297589991745 - }, - "miracl_reranking": { - "ndcg@10": 0.7212459481239325 - }, - "mldr_reranking": { - "ndcg@10": 0.8749859006713937 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.45027356866159485 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.4100248722670852 - }, - "jaqket": { - "ndcg@10": 0.37009937036200197 - }, - "mintaka_retrieval": { - "ndcg@10": 0.3129516236109114 - }, - "miracl_retrieval": { - "ndcg@10": 0.16066205698392905 - }, - "mldr_retrieval": { - "ndcg@10": 0.20077263817507693 - }, - "mrtydi": { - "ndcg@10": 0.10152904724472846 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.3813451499418741 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.3760245554186644 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.5918422105100428 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.25260061985270044 - } - }, - "STS": { - "jsick": { - "spearman": 0.7310527928257868 - }, - "jsts": { - "spearman": 0.8050903530724467 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.491058629988371 - }, - "mewsc16": { - "v_measure_score": 0.4702243143778868 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.5220924001787737 - } - } -} \ No newline at end of file diff --git a/docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json b/docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json deleted file mode 100644 index d1a1183..0000000 --- a/docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7966249319542733 - }, - "amazon_review_classification": { - "macro_f1": 0.6202158443035662 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9503418215782169 - }, - "massive_intent_classification": { - "macro_f1": 0.8121127783146885 - }, - "massive_scenario_classification": { - "macro_f1": 0.9015618520645106 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8262549610016919 - }, - "wrime_classification": { - "macro_f1": 0.496952794347916 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9359864365331227 - }, - "jacwir_reranking": { - "ndcg@10": 0.8684667204236405 - }, - "jqara": { - "ndcg@10": 0.6592446626934351 - }, - "miracl_reranking": { - "ndcg@10": 0.8516895656188278 - }, - "mldr_reranking": { - "ndcg@10": 0.9024168764200886 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8242898079860301 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.7176236149918197 - }, - "jaqket": { - "ndcg@10": 0.729199960117355 - }, - "mintaka_retrieval": { - "ndcg@10": 0.6260117718497401 - }, - "miracl_retrieval": { - "ndcg@10": 0.6323109932464099 - }, - "mldr_retrieval": { - "ndcg@10": 0.3458953565848906 - }, - "mrtydi": { - "ndcg@10": 0.4075091710258615 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.9919931534803926 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9916030162169888 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.968506421217649 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.9629377323425067 - } - }, - "STS": { - "jsick": { - "spearman": 0.7979403746663343 - }, - "jsts": { - "spearman": 0.8362521198880197 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5603187837880047 - }, - "mewsc16": { - "v_measure_score": 0.5068875864473731 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.4418928761777483 - } - } -} \ No newline at end of file diff --git a/docs/results/sbintuitions/sarashina-embedding-v2-1b/summary.json b/docs/results/sbintuitions/sarashina-embedding-v2-1b/summary.json deleted file mode 100644 index 86137b4..0000000 --- a/docs/results/sbintuitions/sarashina-embedding-v2-1b/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7981260149778604 - }, - "amazon_review_classification": { - "macro_f1": 0.613904230518876 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.9350720201784032 - }, - "massive_intent_classification": { - "macro_f1": 0.8368870408710274 - }, - "massive_scenario_classification": { - "macro_f1": 0.9023393778180459 - }, - "sib200_japanese_classification": { - "macro_f1": 0.814822834466633 - }, - "wrime_classification": { - "macro_f1": 0.49874416955622525 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9357698212029779 - }, - "jacwir_reranking": { - "ndcg@10": 0.8879290064759172 - }, - "jqara": { - "ndcg@10": 0.7055458565694387 - }, - "miracl_reranking": { - "ndcg@10": 0.8593120098725527 - }, - "mldr_reranking": { - "ndcg@10": 0.9252857993806471 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.8553812052293157 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.748733390366879 - }, - "jaqket": { - "ndcg@10": 0.7351759183476264 - }, - "mintaka_retrieval": { - "ndcg@10": 0.6610711832074698 - }, - "miracl_retrieval": { - "ndcg@10": 0.6825626228833273 - }, - "mldr_retrieval": { - "ndcg@10": 0.403522262945172 - }, - "mrtydi": { - "ndcg@10": 0.4956554219902846 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.9684244331815967 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.9627838420424424 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.9810825575187433 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.9178887982974248 - } - }, - "STS": { - "jsick": { - "spearman": 0.8257994437715604 - }, - "jsts": { - "spearman": 0.8586626198858301 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.5741299477926689 - }, - "mewsc16": { - "v_measure_score": 0.5167004748357505 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.48585227521060775 - } - } -} \ No newline at end of file diff --git a/docs/results/sentence-transformers/LaBSE/summary.json b/docs/results/sentence-transformers/LaBSE/summary.json deleted file mode 100644 index d4575ba..0000000 --- a/docs/results/sentence-transformers/LaBSE/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7473900578785092 - }, - "amazon_review_classification": { - "macro_f1": 0.5163381922398036 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8952055768957177 - }, - "massive_intent_classification": { - "macro_f1": 0.7708783013419095 - }, - "massive_scenario_classification": { - "macro_f1": 0.883882574111003 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8147469939175009 - }, - "wrime_classification": { - "macro_f1": 0.4010561963802254 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.9147393987384248 - }, - "jacwir_reranking": { - "ndcg@10": 0.6785244283016075 - }, - "jqara": { - "ndcg@10": 0.24624584903493016 - }, - "miracl_reranking": { - "ndcg@10": 0.692780512325045 - }, - "mldr_reranking": { - "ndcg@10": 0.818396899799895 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.49122610922285737 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.4243154817699682 - }, - "jaqket": { - "ndcg@10": 0.24919695742546066 - }, - "mintaka_retrieval": { - "ndcg@10": 0.20021150938693902 - }, - "miracl_retrieval": { - "ndcg@10": 0.09357313571231995 - }, - "mldr_retrieval": { - "ndcg@10": 0.07525879379433965 - }, - "mrtydi": { - "ndcg@10": 0.04221321214455149 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.48063138821949475 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.48202233374429526 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.7559363652226313 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.3553481928114969 - } - }, - "STS": { - "jsick": { - "spearman": 0.770087314840748 - }, - "jsts": { - "spearman": 0.7611615118281959 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.4908336523752348 - }, - "mewsc16": { - "v_measure_score": 0.41781835844551085 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.2859403214333406 - } - } -} \ No newline at end of file diff --git a/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json b/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json deleted file mode 100644 index 4a59ed9..0000000 --- a/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "Classification": { - "amazon_counterfactual_classification": { - "macro_f1": 0.7514299930187799 - }, - "amazon_review_classification": { - "macro_f1": 0.516712003417941 - }, - "japanese_sentiment_classification": { - "macro_f1": 0.8714537157100772 - }, - "massive_intent_classification": { - "macro_f1": 0.7433839585058197 - }, - "massive_scenario_classification": { - "macro_f1": 0.8606582397219589 - }, - "sib200_japanese_classification": { - "macro_f1": 0.8372998969612304 - }, - "wrime_classification": { - "macro_f1": 0.4167776597670575 - } - }, - "Reranking": { - "esci": { - "ndcg@10": 0.8971639400421929 - }, - "jacwir_reranking": { - "ndcg@10": 0.3920595575511347 - }, - "jqara": { - "ndcg@10": 0.18511169246774806 - }, - "miracl_reranking": { - "ndcg@10": 0.6535500060613615 - }, - "mldr_reranking": { - "ndcg@10": 0.768787823495723 - } - }, - "Retrieval": { - "jacwir_retrieval": { - "ndcg@10": 0.21075313614845367 - }, - "jagovfaqs_22k": { - "ndcg@10": 0.2248606553485316 - }, - "jaqket": { - "ndcg@10": 0.06494577519372931 - }, - "mintaka_retrieval": { - "ndcg@10": 0.22312923127278733 - }, - "miracl_retrieval": { - "ndcg@10": 0.022833015048992402 - }, - "mldr_retrieval": { - "ndcg@10": 0.06529330431356167 - }, - "mrtydi": { - "ndcg@10": 0.027849411947159904 - }, - "nlp_journal_abs_article": { - "ndcg@10": 0.24914118502751986 - }, - "nlp_journal_abs_intro": { - "ndcg@10": 0.2554860092306942 - }, - "nlp_journal_title_abs": { - "ndcg@10": 0.35835508156998896 - }, - "nlp_journal_title_intro": { - "ndcg@10": 0.12133118349638791 - } - }, - "STS": { - "jsick": { - "spearman": 0.7238085290735078 - }, - "jsts": { - "spearman": 0.784483411606707 - } - }, - "Clustering": { - "livedoor_news": { - "v_measure_score": 0.26615937330682315 - }, - "mewsc16": { - "v_measure_score": 0.32048277963560623 - }, - "sib200_japanese_clustering": { - "v_measure_score": 0.2434250739162938 - } - } -} \ No newline at end of file diff --git a/leaderboard.md b/leaderboard.md deleted file mode 100644 index 1b83092..0000000 --- a/leaderboard.md +++ /dev/null @@ -1,270 +0,0 @@ -# Leaderboard -This leaderboard shows the results stored under `docs/results`. The scores are all multiplied by 100. - -## Summary - -The summary shows the average scores within each task. The average score is the average of scores by dataset. - -| Model | Avg. | Retrieval | STS | Classification | Reranking | Clustering | -|:----------------------------------------------|:---------:|:-----------:|:---------:|:----------------:|:-----------:|:------------:| -| sbintuitions/sarashina-embedding-v2-1b | **76.38** | **76.48** | **84.22** | 77.14 | **86.28** | 52.56 | -| cl-nagoya/ruri-v3-310m | 75.85 | 76.03 | 81.59 | **77.65** | 85.84 | 50.52 | -| cl-nagoya/ruri-v3-130m | 75.52 | 76.45 | 81.05 | 75.65 | 85.71 | 51.13 | -| sbintuitions/sarashina-embedding-v1-1b | 74.87 | 74.53 | 81.71 | 77.20 | 84.36 | 50.30 | -| pfnet/plamo-embedding-1b | 74.85 | 73.25 | 83.15 | 77.29 | 85.05 | 52.50 | -| cl-nagoya/ruri-v3-70m | 73.95 | 74.23 | 80.96 | 74.45 | 84.21 | 49.95 | -| OpenAI/text-embedding-3-large | 73.86 | 71.95 | 82.52 | 77.27 | 83.06 | 51.82 | -| cl-nagoya/ruri-large-v2 | 73.63 | 71.87 | 83.18 | 76.10 | 83.89 | 50.88 | -| cl-nagoya/ruri-v3-30m | 72.95 | 72.84 | 81.78 | 73.35 | 82.93 | 49.90 | -| BAAI/bge-m3 | 72.46 | 72.15 | 79.74 | 74.10 | 84.10 | 45.56 | -| cl-nagoya/ruri-large | 71.69 | 68.30 | 83.13 | 76.25 | 81.26 | 49.93 | -| cl-nagoya/ruri-base-v2 | 71.66 | 68.96 | 83.03 | 75.59 | 82.46 | 46.84 | -| cl-nagoya/ruri-small-v2 | 71.40 | 68.46 | 82.91 | 74.12 | 82.30 | 49.97 | -| pkshatech/GLuCoSE-base-ja-v2 | 71.11 | 68.45 | 82.95 | 73.52 | 82.63 | 48.19 | -| intfloat/multilingual-e5-large | 70.67 | 67.65 | 80.86 | 72.30 | 83.01 | 50.58 | -| google/embeddinggemma-300m | 70.59 | 65.91 | 82.74 | 76.14 | 80.93 | 49.48 | -| cl-nagoya/ruri-base | 70.25 | 65.90 | 82.88 | 75.34 | 80.31 | 49.10 | -| pkshatech/RoSEtta-base-ja | 69.58 | 67.52 | 81.39 | 71.70 | 81.25 | 44.88 | -| cl-nagoya/ruri-small | 69.34 | 63.95 | 82.79 | 74.83 | 79.98 | 49.59 | -| intfloat/multilingual-e5-base | 68.06 | 64.48 | 80.46 | 69.70 | 79.46 | 50.12 | -| intfloat/multilingual-e5-small | 67.38 | 63.91 | 80.46 | 67.77 | 80.09 | 49.29 | -| OpenAI/text-embedding-3-small | 67.10 | 61.79 | 79.46 | 72.43 | 77.29 | 48.91 | -| OpenAI/text-embedding-ada-002 | 65.13 | 59.58 | 79.02 | 69.39 | 75.63 | 48.78 | -| hotchpotch/static-embedding-japanese | 63.80 | 60.51 | 80.16 | 66.73 | 77.09 | 35.91 | -| pkshatech/GLuCoSE-base-ja | 63.79 | 54.58 | 78.68 | 75.02 | 72.37 | 47.12 | -| cl-nagoya/sup-simcse-ja-base | 59.91 | 45.00 | 82.05 | 72.72 | 70.36 | **52.57** | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 57.60 | 42.41 | 79.00 | 71.83 | 71.88 | 42.02 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 56.75 | 38.08 | 76.56 | 74.53 | 69.81 | 48.75 | -| cl-nagoya/sup-simcse-ja-large | 56.46 | 37.38 | 83.17 | 72.74 | 68.76 | 50.12 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 55.78 | 39.85 | 77.96 | 71.46 | 69.92 | 39.27 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 55.35 | 36.23 | 78.29 | 72.59 | 70.59 | 44.54 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 54.65 | 36.24 | 77.75 | 71.81 | 68.58 | 43.45 | -| cl-nagoya/unsup-simcse-ja-large | 54.23 | 33.98 | 80.56 | 73.71 | 67.39 | 43.52 | -| cl-nagoya/unsup-simcse-ja-base | 53.86 | 35.34 | 78.74 | 72.41 | 66.20 | 41.29 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 53.82 | 35.22 | 74.96 | 71.48 | 68.15 | 42.86 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 53.59 | 34.93 | 76.70 | 72.06 | 67.73 | 39.93 | -| pkshatech/simcse-ja-bert-base-clcmlp | 53.48 | 32.80 | 76.81 | 70.67 | 68.02 | 49.45 | -| sentence-transformers/LaBSE | 52.70 | 33.18 | 76.56 | 71.85 | 67.01 | 39.82 | -| sentence-transformers/stsb-xlm-r-multilingual | 43.06 | 16.58 | 75.41 | 71.40 | 57.93 | 27.67 | -| colorfulscoop/sbert-base-ja | 42.90 | 15.45 | 70.41 | 68.05 | 59.38 | 39.04 | - -## Retrieval -| Model | Avg. | jacwir_retrieval
(ndcg@10) | jagovfaqs_22k
(ndcg@10) | jaqket
(ndcg@10) | mintaka_retrieval
(ndcg@10) | miracl_retrieval
(ndcg@10) | mldr_retrieval
(ndcg@10) | mrtydi
(ndcg@10) | nlp_abs_article
(ndcg@10) | nlp_abs_intro
(ndcg@10) | nlp_title_abs
(ndcg@10) | nlp_title_intro
(ndcg@10) | -|:----------------------------------------------|:---------:|:-------------------------------:|:----------------------------:|:---------------------:|:--------------------------------:|:-------------------------------:|:-----------------------------:|:---------------------:|:------------------------------:|:----------------------------:|:----------------------------:|:------------------------------:| -| sbintuitions/sarashina-embedding-v2-1b | **76.48** | 85.54 | 74.87 | **73.52** | **66.11** | 68.26 | 40.35 | **49.57** | 96.84 | 96.28 | 98.11 | 91.79 | -| cl-nagoya/ruri-v3-130m | 76.45 | 84.21 | 75.32 | 73.10 | 51.77 | 71.01 | 45.16 | 47.80 | 99.51 | 98.88 | 97.95 | 96.28 | -| cl-nagoya/ruri-v3-310m | 76.03 | 84.06 | 76.49 | 71.87 | 52.25 | 67.71 | 43.43 | 47.06 | **99.59** | **99.35** | 97.91 | **96.58** | -| sbintuitions/sarashina-embedding-v1-1b | 74.53 | 82.43 | 71.76 | 72.92 | 62.60 | 63.23 | 34.59 | 40.75 | 99.20 | 99.16 | 96.85 | 96.29 | -| cl-nagoya/ruri-v3-70m | 74.23 | 82.76 | 73.27 | 67.68 | 46.26 | 67.98 | 43.55 | 45.00 | 98.50 | 98.68 | 97.07 | 95.73 | -| pfnet/plamo-embedding-1b | 73.25 | **88.91** | **79.03** | 54.39 | 54.56 | 59.91 | 36.68 | 41.87 | 97.65 | 99.02 | **98.63** | 95.11 | -| cl-nagoya/ruri-v3-30m | 72.84 | 82.70 | 70.21 | 62.45 | 43.05 | 64.99 | 45.77 | 41.78 | 98.76 | 99.16 | 96.99 | 95.34 | -| BAAI/bge-m3 | 72.15 | 85.13 | 69.07 | 56.59 | 32.18 | **73.48** | **51.26** | 45.18 | 95.22 | 97.53 | 96.02 | 91.98 | -| OpenAI/text-embedding-3-large | 71.95 | 82.90 | 72.41 | 48.21 | 63.52 | 60.57 | 45.26 | 34.88 | 92.37 | 99.33 | 96.55 | 95.47 | -| cl-nagoya/ruri-large-v2 | 71.87 | 80.49 | 78.23 | 65.61 | 50.41 | 70.46 | 36.97 | 46.37 | 90.85 | 91.15 | 97.74 | 82.32 | -| cl-nagoya/ruri-base-v2 | 68.96 | 81.01 | 75.90 | 57.01 | 44.18 | 68.22 | 37.73 | 40.89 | 88.05 | 89.73 | 96.96 | 78.93 | -| cl-nagoya/ruri-small-v2 | 68.46 | 83.04 | 74.02 | 62.25 | 35.31 | 66.90 | 32.58 | 42.40 | 90.65 | 90.42 | 97.30 | 78.21 | -| pkshatech/GLuCoSE-base-ja-v2 | 68.45 | 83.85 | 69.85 | 67.52 | 39.57 | 65.29 | 33.75 | 41.67 | 89.91 | 90.08 | 95.67 | 75.79 | -| cl-nagoya/ruri-large | 68.30 | 81.69 | 77.64 | 61.73 | 51.06 | 55.47 | 34.77 | 38.12 | 86.53 | 88.91 | 96.17 | 79.22 | -| intfloat/multilingual-e5-large | 67.65 | 86.41 | 72.98 | 59.67 | 39.59 | 70.96 | 29.85 | 47.82 | 83.26 | 85.71 | 95.29 | 72.57 | -| pkshatech/RoSEtta-base-ja | 67.52 | 82.02 | 66.28 | 64.28 | 34.04 | 60.16 | 32.37 | 36.77 | 96.04 | 95.41 | 93.17 | 82.19 | -| google/embeddinggemma-300m | 65.91 | 81.07 | 69.43 | 63.27 | 38.63 | 35.28 | 34.66 | 13.86 | 99.34 | 99.02 | 96.12 | 94.35 | -| cl-nagoya/ruri-base | 65.90 | 82.48 | 75.50 | 50.23 | 45.37 | 54.88 | 35.42 | 35.59 | 86.65 | 87.23 | 95.27 | 76.25 | -| intfloat/multilingual-e5-base | 64.48 | 84.32 | 68.72 | 51.69 | 34.68 | 64.50 | 25.73 | 42.30 | 83.56 | 84.48 | 94.62 | 74.70 | -| cl-nagoya/ruri-small | 63.95 | 82.58 | 74.01 | 48.44 | 37.23 | 52.22 | 28.99 | 33.51 | 86.89 | 87.23 | 96.20 | 76.09 | -| intfloat/multilingual-e5-small | 63.91 | 85.58 | 65.69 | 51.57 | 31.54 | 63.23 | 25.91 | 42.37 | 83.97 | 84.10 | 94.47 | 74.56 | -| OpenAI/text-embedding-3-small | 61.79 | 79.58 | 64.02 | 33.94 | 32.44 | 48.45 | 35.07 | 20.03 | 85.83 | 98.47 | 91.70 | 90.17 | -| hotchpotch/static-embedding-japanese | 60.51 | 72.27 | 55.55 | 64.04 | 38.93 | 32.61 | 42.51 | 11.18 | 76.19 | 95.74 | 90.37 | 86.25 | -| OpenAI/text-embedding-ada-002 | 59.58 | 78.08 | 61.02 | 42.56 | 27.09 | 34.54 | 31.90 | 14.51 | 97.51 | 94.99 | 91.23 | 81.98 | -| pkshatech/GLuCoSE-base-ja | 54.58 | 69.30 | 64.14 | 39.78 | 29.81 | 48.27 | 25.07 | 30.14 | 76.78 | 77.21 | 81.40 | 58.43 | -| cl-nagoya/sup-simcse-ja-base | 45.00 | 53.32 | 52.02 | 50.13 | 32.88 | 20.68 | 24.70 | 14.14 | 69.09 | 66.19 | 64.84 | 46.97 | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 42.41 | 53.16 | 51.20 | 45.81 | 30.42 | 26.08 | 23.65 | 13.06 | 54.65 | 52.13 | 74.13 | 42.21 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 39.85 | 48.95 | 48.41 | 38.73 | 25.72 | 21.60 | 18.11 | 11.02 | 58.91 | 60.05 | 69.15 | 37.72 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 38.08 | 59.65 | 54.07 | 40.22 | 24.83 | 17.19 | 19.08 | 10.09 | 44.07 | 44.84 | 73.68 | 31.15 | -| cl-nagoya/sup-simcse-ja-large | 37.38 | 43.71 | 47.42 | 40.04 | 37.68 | 18.13 | 23.48 | 11.88 | 64.08 | 62.95 | 36.95 | 24.90 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 36.24 | 42.43 | 43.60 | 37.35 | 25.18 | 14.76 | 16.86 | 7.77 | 56.89 | 59.11 | 61.81 | 32.88 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 36.23 | 46.28 | 48.25 | 44.17 | 28.89 | 19.52 | 18.66 | 11.44 | 43.97 | 40.33 | 60.49 | 36.51 | -| cl-nagoya/unsup-simcse-ja-base | 35.34 | 35.11 | 46.74 | 39.52 | 29.92 | 10.93 | 15.98 | 5.51 | 58.22 | 58.41 | 55.58 | 32.84 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 35.22 | 39.92 | 44.60 | 38.45 | 22.39 | 13.94 | 13.91 | 7.30 | 58.35 | 58.63 | 57.43 | 32.47 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 34.93 | 40.86 | 43.88 | 35.56 | 19.98 | 16.52 | 12.06 | 7.11 | 54.30 | 55.86 | 62.96 | 35.17 | -| cl-nagoya/unsup-simcse-ja-large | 33.98 | 37.61 | 46.56 | 34.53 | 30.58 | 10.33 | 12.55 | 5.75 | 50.45 | 50.70 | 60.43 | 34.32 | -| sentence-transformers/LaBSE | 33.18 | 49.12 | 42.43 | 24.92 | 20.02 | 9.36 | 7.53 | 4.22 | 48.06 | 48.20 | 75.59 | 35.53 | -| pkshatech/simcse-ja-bert-base-clcmlp | 32.80 | 45.03 | 41.00 | 37.01 | 31.30 | 16.07 | 20.08 | 10.15 | 38.13 | 37.60 | 59.18 | 25.26 | -| sentence-transformers/stsb-xlm-r-multilingual | 16.58 | 21.08 | 22.49 | 6.49 | 22.31 | 2.28 | 6.53 | 2.78 | 24.91 | 25.55 | 35.84 | 12.13 | -| colorfulscoop/sbert-base-ja | 15.45 | 19.30 | 21.70 | 13.14 | 19.07 | 1.86 | 6.97 | 0.41 | 29.02 | 25.80 | 21.07 | 11.57 | - -## STS -| Model | Avg. | jsick
(spearman) | jsts
(spearman) | -|:----------------------------------------------|:---------:|:---------------------:|:--------------------:| -| sbintuitions/sarashina-embedding-v2-1b | **84.22** | 82.58 | **85.87** | -| cl-nagoya/ruri-large-v2 | 83.18 | 82.12 | 84.24 | -| cl-nagoya/sup-simcse-ja-large | 83.17 | 83.78 | 82.56 | -| pfnet/plamo-embedding-1b | 83.15 | 81.83 | 84.46 | -| cl-nagoya/ruri-large | 83.13 | 82.00 | 84.26 | -| cl-nagoya/ruri-base-v2 | 83.03 | 82.63 | 83.43 | -| pkshatech/GLuCoSE-base-ja-v2 | 82.95 | **84.95** | 80.96 | -| cl-nagoya/ruri-small-v2 | 82.91 | 83.88 | 81.93 | -| cl-nagoya/ruri-base | 82.88 | 82.32 | 83.43 | -| cl-nagoya/ruri-small | 82.79 | 83.45 | 82.13 | -| google/embeddinggemma-300m | 82.74 | 81.67 | 83.81 | -| OpenAI/text-embedding-3-large | 82.52 | 81.27 | 83.77 | -| cl-nagoya/sup-simcse-ja-base | 82.05 | 82.84 | 81.26 | -| cl-nagoya/ruri-v3-30m | 81.78 | 81.62 | 81.95 | -| sbintuitions/sarashina-embedding-v1-1b | 81.71 | 79.79 | 83.63 | -| cl-nagoya/ruri-v3-310m | 81.59 | 78.86 | 84.31 | -| pkshatech/RoSEtta-base-ja | 81.39 | 83.83 | 78.95 | -| cl-nagoya/ruri-v3-130m | 81.05 | 78.86 | 83.24 | -| cl-nagoya/ruri-v3-70m | 80.96 | 79.10 | 82.82 | -| intfloat/multilingual-e5-large | 80.86 | 79.85 | 81.86 | -| cl-nagoya/unsup-simcse-ja-large | 80.56 | 80.14 | 80.98 | -| intfloat/multilingual-e5-small | 80.46 | 82.00 | 78.92 | -| intfloat/multilingual-e5-base | 80.46 | 81.26 | 79.65 | -| hotchpotch/static-embedding-japanese | 80.16 | 82.51 | 77.81 | -| BAAI/bge-m3 | 79.74 | 79.27 | 80.21 | -| OpenAI/text-embedding-3-small | 79.46 | 80.83 | 78.08 | -| OpenAI/text-embedding-ada-002 | 79.02 | 79.09 | 78.94 | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 79.00 | 79.86 | 78.14 | -| cl-nagoya/unsup-simcse-ja-base | 78.74 | 78.53 | 78.94 | -| pkshatech/GLuCoSE-base-ja | 78.68 | 74.90 | 82.46 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 78.29 | 78.76 | 77.82 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 77.96 | 79.14 | 76.77 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 77.75 | 78.93 | 76.57 | -| pkshatech/simcse-ja-bert-base-clcmlp | 76.81 | 73.11 | 80.51 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 76.70 | 77.76 | 75.63 | -| sentence-transformers/LaBSE | 76.56 | 77.01 | 76.12 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 76.56 | 72.04 | 81.08 | -| sentence-transformers/stsb-xlm-r-multilingual | 75.41 | 72.38 | 78.45 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 74.96 | 75.25 | 74.66 | -| colorfulscoop/sbert-base-ja | 70.41 | 66.56 | 74.25 | - -## Classification -| Model | Avg. | amazon_counterfactual
(macro_f1) | amazon_review
(macro_f1) | jpn_sentiment
(macro_f1) | massive_intent
(macro_f1) | massive_scenario
(macro_f1) | sib200_jpn_cls
(macro_f1) | wrime_classification
(macro_f1) | -|:----------------------------------------------|:---------:|:-------------------------------------:|:-----------------------------:|:-----------------------------:|:------------------------------:|:--------------------------------:|:------------------------------:|:------------------------------------:| -| cl-nagoya/ruri-v3-310m | **77.65** | 80.09 | 60.72 | 95.31 | 81.76 | 89.01 | 88.13 | 48.53 | -| pfnet/plamo-embedding-1b | 77.29 | 77.59 | 59.48 | 91.73 | 82.79 | 89.95 | **90.31** | 49.20 | -| OpenAI/text-embedding-3-large | 77.27 | 77.90 | 60.44 | **96.89** | 80.91 | **91.08** | 87.85 | 45.84 | -| sbintuitions/sarashina-embedding-v1-1b | 77.20 | 79.66 | **62.02** | 95.03 | 81.21 | 90.16 | 82.63 | 49.70 | -| sbintuitions/sarashina-embedding-v2-1b | 77.14 | 79.81 | 61.39 | 93.51 | **83.69** | 90.23 | 81.48 | **49.87** | -| cl-nagoya/ruri-large | 76.25 | 79.50 | 56.85 | 93.56 | 82.10 | 90.03 | 85.26 | 46.45 | -| google/embeddinggemma-300m | 76.14 | 74.74 | 58.04 | 95.99 | 80.07 | 90.58 | 86.92 | 46.62 | -| cl-nagoya/ruri-large-v2 | 76.10 | 79.51 | 57.09 | 93.57 | 80.87 | 89.71 | 84.72 | 47.23 | -| cl-nagoya/ruri-v3-130m | 75.65 | 76.75 | 59.56 | 95.00 | 80.79 | 87.90 | 82.88 | 46.63 | -| cl-nagoya/ruri-base-v2 | 75.59 | 75.97 | 55.55 | 92.36 | 80.93 | 88.87 | 89.26 | 46.17 | -| cl-nagoya/ruri-base | 75.34 | 76.66 | 56.02 | 91.69 | 81.22 | 88.61 | 87.73 | 45.47 | -| pkshatech/GLuCoSE-base-ja | 75.02 | **82.03** | 57.93 | 92.89 | 78.52 | 87.71 | 77.24 | 48.82 | -| cl-nagoya/ruri-small | 74.83 | 80.55 | 55.41 | 88.86 | 81.08 | 88.00 | 83.97 | 45.95 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 74.53 | 79.72 | 58.02 | 91.99 | 80.16 | 88.78 | 77.31 | 45.73 | -| cl-nagoya/ruri-v3-70m | 74.45 | 81.81 | 57.98 | 93.39 | 78.92 | 87.83 | 76.87 | 44.38 | -| cl-nagoya/ruri-small-v2 | 74.12 | 77.67 | 55.60 | 88.64 | 82.00 | 88.16 | 81.57 | 45.23 | -| BAAI/bge-m3 | 74.10 | 71.86 | 56.65 | 94.41 | 78.68 | 89.70 | 84.25 | 43.17 | -| cl-nagoya/unsup-simcse-ja-large | 73.71 | 76.40 | 55.05 | 90.57 | 79.25 | 87.50 | 82.89 | 44.33 | -| pkshatech/GLuCoSE-base-ja-v2 | 73.52 | 75.28 | 55.19 | 89.24 | 78.73 | 87.14 | 85.83 | 43.23 | -| cl-nagoya/ruri-v3-30m | 73.35 | 75.60 | 55.71 | 92.63 | 78.31 | 86.72 | 81.40 | 43.11 | -| cl-nagoya/sup-simcse-ja-large | 72.74 | 72.61 | 54.56 | 89.42 | 79.23 | 87.71 | 80.43 | 45.26 | -| cl-nagoya/sup-simcse-ja-base | 72.72 | 71.93 | 54.54 | 91.01 | 80.11 | 87.63 | 81.92 | 41.88 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 72.59 | 77.67 | 53.48 | 89.28 | 76.79 | 86.24 | 83.77 | 40.89 | -| OpenAI/text-embedding-3-small | 72.43 | 70.01 | 55.92 | 89.97 | 77.66 | 88.67 | 84.72 | 40.05 | -| cl-nagoya/unsup-simcse-ja-base | 72.41 | 73.65 | 54.14 | 89.87 | 77.68 | 86.10 | 84.13 | 41.31 | -| intfloat/multilingual-e5-large | 72.30 | 69.70 | 57.64 | 95.55 | 74.01 | 88.71 | 78.11 | 42.38 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 72.06 | 77.70 | 51.46 | 88.45 | 78.72 | 86.40 | 83.50 | 38.15 | -| sentence-transformers/LaBSE | 71.85 | 74.74 | 51.63 | 89.52 | 77.09 | 88.39 | 81.47 | 40.11 | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 71.83 | 76.55 | 52.73 | 88.22 | 77.22 | 86.25 | 81.45 | 40.38 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 71.81 | 77.79 | 51.11 | 87.82 | 77.97 | 86.34 | 85.06 | 36.56 | -| pkshatech/RoSEtta-base-ja | 71.70 | 70.21 | 52.62 | 87.28 | 79.59 | 86.96 | 84.01 | 41.24 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 71.48 | 74.30 | 51.97 | 89.69 | 77.83 | 84.60 | 83.82 | 38.15 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 71.46 | 76.40 | 51.65 | 87.86 | 78.15 | 86.44 | 81.80 | 37.93 | -| sentence-transformers/stsb-xlm-r-multilingual | 71.40 | 75.14 | 51.67 | 87.15 | 74.34 | 86.07 | 83.73 | 41.68 | -| pkshatech/simcse-ja-bert-base-clcmlp | 70.67 | 68.28 | 51.75 | 88.21 | 79.65 | 87.23 | 81.18 | 38.39 | -| intfloat/multilingual-e5-base | 69.70 | 64.29 | 54.17 | 92.32 | 73.19 | 86.78 | 78.50 | 38.65 | -| OpenAI/text-embedding-ada-002 | 69.39 | 64.42 | 53.13 | 88.76 | 74.57 | 86.89 | 80.39 | 37.57 | -| colorfulscoop/sbert-base-ja | 68.05 | 70.80 | 47.80 | 83.50 | 72.89 | 83.71 | 82.63 | 35.06 | -| intfloat/multilingual-e5-small | 67.77 | 58.66 | 51.21 | 87.73 | 71.34 | 86.77 | 81.78 | 36.91 | -| hotchpotch/static-embedding-japanese | 66.73 | 68.06 | 46.81 | 79.82 | 74.79 | 82.18 | 83.33 | 32.12 | - -## Reranking -| Model | Avg. | esci
(ndcg@10) | jacwir_reranking
(ndcg@10) | jqara
(ndcg@10) | miracl_reranking
(ndcg@10) | mldr_reranking
(ndcg@10) | -|:----------------------------------------------|:---------:|:-------------------:|:-------------------------------:|:--------------------:|:-------------------------------:|:-----------------------------:| -| sbintuitions/sarashina-embedding-v2-1b | **86.28** | 93.58 | 88.79 | **70.55** | 85.93 | 92.53 | -| cl-nagoya/ruri-v3-310m | 85.84 | 93.43 | 88.46 | 68.93 | 85.01 | 93.36 | -| cl-nagoya/ruri-v3-130m | 85.71 | 93.37 | 88.65 | 66.30 | **86.59** | 93.62 | -| pfnet/plamo-embedding-1b | 85.05 | 93.59 | **91.74** | 66.15 | 81.91 | 91.87 | -| sbintuitions/sarashina-embedding-v1-1b | 84.36 | **93.60** | 86.85 | 65.92 | 85.17 | 90.24 | -| cl-nagoya/ruri-v3-70m | 84.21 | 93.20 | 87.48 | 63.09 | 85.03 | 92.26 | -| BAAI/bge-m3 | 84.10 | 93.27 | 89.55 | 53.92 | 85.96 | **97.78** | -| cl-nagoya/ruri-large-v2 | 83.89 | 93.21 | 85.29 | 64.47 | 85.78 | 90.68 | -| OpenAI/text-embedding-3-large | 83.06 | 93.58 | 86.78 | 56.89 | 83.80 | 94.24 | -| intfloat/multilingual-e5-large | 83.01 | 93.31 | 90.37 | 56.14 | 86.31 | 88.91 | -| cl-nagoya/ruri-v3-30m | 82.93 | 93.06 | 87.61 | 57.47 | 83.52 | 92.97 | -| pkshatech/GLuCoSE-base-ja-v2 | 82.63 | 93.02 | 88.27 | 60.70 | 82.44 | 88.71 | -| cl-nagoya/ruri-base-v2 | 82.46 | 93.17 | 85.76 | 60.66 | 84.26 | 88.47 | -| cl-nagoya/ruri-small-v2 | 82.30 | 93.20 | 88.18 | 56.70 | 83.33 | 90.09 | -| cl-nagoya/ruri-large | 81.26 | 92.99 | 86.61 | 59.59 | 80.23 | 86.91 | -| pkshatech/RoSEtta-base-ja | 81.25 | 92.68 | 86.83 | 57.92 | 80.38 | 88.45 | -| google/embeddinggemma-300m | 80.93 | 93.26 | 86.72 | 52.09 | 82.38 | 90.19 | -| cl-nagoya/ruri-base | 80.31 | 92.92 | 87.24 | 54.15 | 79.22 | 88.01 | -| intfloat/multilingual-e5-small | 80.09 | 92.98 | 89.99 | 49.28 | 81.78 | 86.41 | -| cl-nagoya/ruri-small | 79.98 | 93.01 | 87.67 | 53.26 | 77.84 | 88.14 | -| intfloat/multilingual-e5-base | 79.46 | 92.90 | 88.65 | 47.61 | 81.97 | 86.15 | -| OpenAI/text-embedding-3-small | 77.29 | 92.92 | 84.72 | 38.58 | 77.61 | 92.61 | -| hotchpotch/static-embedding-japanese | 77.09 | 91.87 | 80.96 | 47.06 | 72.01 | 93.55 | -| OpenAI/text-embedding-ada-002 | 75.63 | 93.04 | 83.91 | 37.54 | 72.83 | 90.83 | -| pkshatech/GLuCoSE-base-ja | 72.37 | 91.82 | 74.54 | 30.24 | 77.82 | 87.42 | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 71.88 | 91.30 | 65.14 | 44.96 | 71.21 | 86.79 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 70.59 | 90.93 | 61.45 | 42.47 | 70.65 | 87.42 | -| cl-nagoya/sup-simcse-ja-base | 70.36 | 91.84 | 64.27 | 37.48 | 70.88 | 87.34 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 69.92 | 91.17 | 65.41 | 38.39 | 70.19 | 84.42 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 69.81 | 91.51 | 67.45 | 36.04 | 68.68 | 85.38 | -| cl-nagoya/sup-simcse-ja-large | 68.76 | 91.50 | 56.15 | 38.30 | 71.26 | 86.60 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 68.58 | 90.92 | 60.51 | 36.84 | 69.31 | 85.31 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 68.15 | 90.67 | 58.16 | 36.66 | 69.09 | 86.15 | -| pkshatech/simcse-ja-bert-base-clcmlp | 68.02 | 91.27 | 57.45 | 31.74 | 72.12 | 87.50 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 67.73 | 90.95 | 59.81 | 37.20 | 67.90 | 82.81 | -| cl-nagoya/unsup-simcse-ja-large | 67.39 | 90.95 | 54.17 | 38.78 | 70.02 | 83.04 | -| sentence-transformers/LaBSE | 67.01 | 91.47 | 67.85 | 24.62 | 69.28 | 81.84 | -| cl-nagoya/unsup-simcse-ja-base | 66.20 | 91.18 | 51.54 | 32.19 | 69.96 | 86.12 | -| colorfulscoop/sbert-base-ja | 59.38 | 89.97 | 37.15 | 22.21 | 65.03 | 82.55 | -| sentence-transformers/stsb-xlm-r-multilingual | 57.93 | 89.72 | 39.21 | 18.51 | 65.36 | 76.88 | - -## Clustering -| Model | Avg. | livedoor_news
(v_measure_score) | mewsc16
(v_measure_score) | sib200_jpn_clust
(v_measure_score) | -|:----------------------------------------------|:---------:|:------------------------------------:|:------------------------------:|:---------------------------------------:| -| cl-nagoya/sup-simcse-ja-base | **52.57** | 55.11 | **53.39** | 49.21 | -| sbintuitions/sarashina-embedding-v2-1b | 52.56 | 57.41 | 51.67 | 48.59 | -| pfnet/plamo-embedding-1b | 52.50 | **61.74** | 48.03 | 47.73 | -| OpenAI/text-embedding-3-large | 51.82 | 57.09 | 49.55 | 48.83 | -| cl-nagoya/ruri-v3-130m | 51.13 | 54.36 | 48.84 | 50.20 | -| cl-nagoya/ruri-large-v2 | 50.88 | 55.62 | 50.97 | 46.06 | -| intfloat/multilingual-e5-large | 50.58 | 51.58 | 46.81 | **53.35** | -| cl-nagoya/ruri-v3-310m | 50.52 | 58.56 | 48.60 | 44.41 | -| sbintuitions/sarashina-embedding-v1-1b | 50.30 | 56.03 | 50.69 | 44.19 | -| cl-nagoya/sup-simcse-ja-large | 50.12 | 53.38 | 51.26 | 45.74 | -| intfloat/multilingual-e5-base | 50.12 | 53.79 | 49.44 | 47.13 | -| cl-nagoya/ruri-small-v2 | 49.97 | 52.61 | 49.47 | 47.82 | -| cl-nagoya/ruri-v3-70m | 49.95 | 54.92 | 47.74 | 47.20 | -| cl-nagoya/ruri-large | 49.93 | 54.44 | 50.59 | 44.76 | -| cl-nagoya/ruri-v3-30m | 49.90 | 53.69 | 47.96 | 48.04 | -| cl-nagoya/ruri-small | 49.59 | 52.90 | 49.37 | 46.51 | -| google/embeddinggemma-300m | 49.48 | 55.33 | 50.55 | 42.55 | -| pkshatech/simcse-ja-bert-base-clcmlp | 49.45 | 49.11 | 47.02 | 52.21 | -| intfloat/multilingual-e5-small | 49.29 | 51.94 | 52.34 | 43.59 | -| cl-nagoya/ruri-base | 49.10 | 56.69 | 52.05 | 38.55 | -| OpenAI/text-embedding-3-small | 48.91 | 54.57 | 47.55 | 44.59 | -| OpenAI/text-embedding-ada-002 | 48.78 | 49.67 | 46.92 | 49.74 | -| oshizo/sbert-jsnli-luke-japanese-base-lite | 48.75 | 51.70 | 51.52 | 43.03 | -| pkshatech/GLuCoSE-base-ja-v2 | 48.19 | 54.46 | 46.12 | 43.98 | -| pkshatech/GLuCoSE-base-ja | 47.12 | 50.41 | 49.52 | 41.43 | -| cl-nagoya/ruri-base-v2 | 46.84 | 54.38 | 50.61 | 35.53 | -| BAAI/bge-m3 | 45.56 | 54.76 | 42.00 | 39.91 | -| pkshatech/RoSEtta-base-ja | 44.88 | 48.89 | 45.16 | 40.61 | -| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 44.54 | 51.30 | 46.27 | 36.04 | -| cl-nagoya/unsup-simcse-ja-large | 43.52 | 51.48 | 44.44 | 34.65 | -| MU-Kindai/Japanese-MixCSE-BERT-base | 43.45 | 48.56 | 43.20 | 38.60 | -| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 42.86 | 45.84 | 44.08 | 38.67 | -| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 42.02 | 51.59 | 42.68 | 31.78 | -| cl-nagoya/unsup-simcse-ja-base | 41.29 | 50.65 | 39.58 | 33.63 | -| MU-Kindai/Japanese-DiffCSE-BERT-base | 39.93 | 46.01 | 39.22 | 34.56 | -| sentence-transformers/LaBSE | 39.82 | 49.08 | 41.78 | 28.59 | -| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 39.27 | 48.79 | 42.61 | 26.42 | -| colorfulscoop/sbert-base-ja | 39.04 | 40.60 | 46.18 | 30.36 | -| hotchpotch/static-embedding-japanese | 35.91 | 51.44 | 34.81 | 21.47 | -| sentence-transformers/stsb-xlm-r-multilingual | 27.67 | 26.62 | 32.05 | 24.34 | - diff --git a/make_leaderboard.py b/make_leaderboard.py deleted file mode 100644 index 5d472eb..0000000 --- a/make_leaderboard.py +++ /dev/null @@ -1,161 +0,0 @@ -import json -from collections import defaultdict -from pathlib import Path - -from tabulate import tabulate - -dataset_name_aliases = { - "amazon_counterfactual_classification": "amazon_counterfactual", - "amazon_review_classification": "amazon_review", - "massive_intent_classification": "massive_intent", - "massive_scenario_classification": "massive_scenario", - "japanese_sentiment_classification": "jpn_sentiment", - "sib200_japanese_classification": "sib200_jpn_cls", - "sib200_japanese_clustering": "sib200_jpn_clust", - "nlp_journal_abs_article": "nlp_abs_article", - "nlp_journal_abs_intro": "nlp_abs_intro", - "nlp_journal_title_abs": "nlp_title_abs", - "nlp_journal_title_intro": "nlp_title_intro", -} - -TASK_ORDER = ["Retrieval", "STS", "Classification", "Reranking", "Clustering"] -SUMMARY_KEY = "Summary" - -""" -Collects the results from the results folder. -""" -# Load reference structure from sbintuitions/sarashina-embedding-v1-1b/summary.json -reference_file = Path("docs/results/sbintuitions/sarashina-embedding-v1-1b/summary.json") -with open(reference_file) as f: - reference_structure = json.load(f) - -# Extract the expected structure -expected_structure = {} -for task_name, task_results in reference_structure.items(): - expected_structure[task_name] = set(task_results.keys()) - - -def has_same_structure(summary: dict, expected: dict) -> bool: - """Check if summary has exactly the same structure as expected.""" - if set(summary.keys()) != set(expected.keys()): - return False - - for task_name, datasets in expected.items(): - if set(summary[task_name].keys()) != datasets: - return False - - return True - - -# {task_name: {model_signature: {(dataset_name, metric_name): score}}} -all_results: dict[str, dict[str, dict[str, float]]] = defaultdict(lambda: defaultdict(dict)) -for summary_file in Path("docs/results").rglob("summary.json"): - if not summary_file.exists(): - continue - - with open(summary_file) as f: - summary = json.load(f) - - # Skip models that don't have the same structure as reference - if not has_same_structure(summary, expected_structure): - org_name = summary_file.parent.parent.name - model_name = summary_file.parent.name - print(f"Skipping {org_name}/{model_name}: different structure") - continue - - org_name = summary_file.parent.parent.name - model_name = summary_file.parent.name - model_signature = f"{org_name}/{model_name}" - - for task_name, task_results in summary.items(): - task_results_formatted: dict[str, float] = {} - task_scores: list[float] = [] - for dataset_name, metric_dict in task_results.items(): - metric_name, score = next(iter(metric_dict.items())) - dataset_name = dataset_name_aliases.get(dataset_name, dataset_name) - task_results_formatted[f"{dataset_name}
({metric_name})"] = score - task_scores.append(score) - all_results[task_name][model_signature] = task_results_formatted - all_results[SUMMARY_KEY][model_signature][task_name] = sum(task_scores) / len(task_scores) - -""" -Creates markdown tables for each task. -""" - - -def format_score(score: float) -> str: - return f"{score * 100:.2f}" - - -AVG_COLUMN_NAME = "Avg." -markdown_tables: dict[str, str] = {} -for task_name, task_results in all_results.items(): - # format to markdown table - dataset_keys = list(task_results[next(iter(task_results))].keys()) - if task_name == SUMMARY_KEY: - # Only include existing tasks in the summary - dataset_keys = [task for task in TASK_ORDER if task in all_results] - - header = ["Model", AVG_COLUMN_NAME, *dataset_keys] - table_list: list[list[str | float]] = [] - for model_signature, dataset_scores in task_results.items(): - # Skip models that don't have all required datasets - if not all(k in dataset_scores for k in dataset_keys): - continue - - model_scores = [dataset_scores[k] for k in dataset_keys] - if task_name == SUMMARY_KEY: - scores_by_dataset = [] - for _task_name, _task_results in all_results.items(): - if _task_name != SUMMARY_KEY and model_signature in _task_results: - scores_by_dataset.extend(list(_task_results[model_signature].values())) - if not scores_by_dataset: # Skip if no scores available - continue - average_score = sum(scores_by_dataset) / len(scores_by_dataset) - else: - average_score = sum(model_scores) / len(model_scores) - table_list.append([model_signature, average_score, *model_scores]) - - # sort by the average score - avg_idx = header.index(AVG_COLUMN_NAME) - table_list.sort(key=lambda x: x[avg_idx], reverse=True) - - # make the highest score in each dataset bold - for dataset_name in [AVG_COLUMN_NAME, *dataset_keys]: - task_idx = header.index(dataset_name) - max_score = max(row[task_idx] for row in table_list) - for row in table_list: - if row[task_idx] == max_score: - row[task_idx] = f"**{format_score(row[task_idx])}**" - else: - row[task_idx] = format_score(row[task_idx]) - - # add header - table_list.insert(0, ["Model", AVG_COLUMN_NAME, *dataset_keys]) - # Set alignment: left for model names, center for all numeric columns - col_alignment = ["left"] + ["center"] * (len(dataset_keys) + 1) - markdown_table = tabulate(table_list, headers="firstrow", tablefmt="pipe", colalign=col_alignment) - markdown_tables[task_name] = markdown_table - -""" -Dump the markdown tables to a file. -""" -with open("leaderboard.md", "w") as f: - f.write("# Leaderboard\n") - f.write( - "This leaderboard shows the results stored under `docs/results`. The scores are all multiplied by 100.\n\n" - ) - for task_name in [SUMMARY_KEY, *TASK_ORDER]: - if task_name not in markdown_tables: - continue - markdown_table = markdown_tables[task_name] - f.write(f"## {task_name}\n") - - if task_name == SUMMARY_KEY: - f.write( - "\nThe summary shows the average scores within each task. " - "The average score is the average of scores by dataset.\n\n" - ) - - f.write(markdown_table) - f.write("\n\n") diff --git a/poetry.lock b/poetry.lock index 5766c9f..1f74006 100644 --- a/poetry.lock +++ b/poetry.lock @@ -198,7 +198,7 @@ version = "23.2.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.7" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, @@ -393,11 +393,11 @@ description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" groups = ["main", "dev"] +markers = "platform_system == \"Windows\" or sys_platform == \"win32\"" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", dev = "platform_system == \"Windows\""} [[package]] name = "datasets" @@ -476,9 +476,10 @@ files = [ name = "eval-type-backport" version = "0.1.3" description = "Like `typing._eval_type`, but lets older Python versions use newer typing features." -optional = false +optional = true python-versions = ">=3.7" groups = ["main"] +markers = "extra == \"v1\" or extra == \"all\"" files = [ {file = "eval_type_backport-0.1.3-py3-none-any.whl", hash = "sha256:519d2a993b3da286df9f90e17f503f66435106ad870cf26620c5720e2158ddf2"}, {file = "eval_type_backport-0.1.3.tar.gz", hash = "sha256:d83ee225331dfa009493cec1f3608a71550b515ee4749abe78da14e3c5e314f5"}, @@ -869,7 +870,7 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" -groups = ["main"] +groups = ["dev"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, @@ -919,6 +920,118 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "jiter" +version = "0.12.0" +description = "Fast iterable JSON parser." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "jiter-0.12.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:e7acbaba9703d5de82a2c98ae6a0f59ab9770ab5af5fa35e43a303aee962cf65"}, + {file = "jiter-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:364f1a7294c91281260364222f535bc427f56d4de1d8ffd718162d21fbbd602e"}, + {file = "jiter-0.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85ee4d25805d4fb23f0a5167a962ef8e002dbfb29c0989378488e32cf2744b62"}, + {file = "jiter-0.12.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:796f466b7942107eb889c08433b6e31b9a7ed31daceaecf8af1be26fb26c0ca8"}, + {file = "jiter-0.12.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:35506cb71f47dba416694e67af996bbdefb8e3608f1f78799c2e1f9058b01ceb"}, + {file = "jiter-0.12.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:726c764a90c9218ec9e4f99a33d6bf5ec169163f2ca0fc21b654e88c2abc0abc"}, + {file = "jiter-0.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa47810c5565274810b726b0dc86d18dce5fd17b190ebdc3890851d7b2a0e74"}, + {file = "jiter-0.12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8ec0259d3f26c62aed4d73b198c53e316ae11f0f69c8fbe6682c6dcfa0fcce2"}, + {file = "jiter-0.12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:79307d74ea83465b0152fa23e5e297149506435535282f979f18b9033c0bb025"}, + {file = "jiter-0.12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cf6e6dd18927121fec86739f1a8906944703941d000f0639f3eb6281cc601dca"}, + {file = "jiter-0.12.0-cp310-cp310-win32.whl", hash = "sha256:b6ae2aec8217327d872cbfb2c1694489057b9433afce447955763e6ab015b4c4"}, + {file = "jiter-0.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:c7f49ce90a71e44f7e1aa9e7ec415b9686bbc6a5961e57eab511015e6759bc11"}, + {file = "jiter-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8f8a7e317190b2c2d60eb2e8aa835270b008139562d70fe732e1c0020ec53c9"}, + {file = "jiter-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2218228a077e784c6c8f1a8e5d6b8cb1dea62ce25811c356364848554b2056cd"}, + {file = "jiter-0.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9354ccaa2982bf2188fd5f57f79f800ef622ec67beb8329903abf6b10da7d423"}, + {file = "jiter-0.12.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8f2607185ea89b4af9a604d4c7ec40e45d3ad03ee66998b031134bc510232bb7"}, + {file = "jiter-0.12.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3a585a5e42d25f2e71db5f10b171f5e5ea641d3aa44f7df745aa965606111cc2"}, + {file = "jiter-0.12.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd9e21d34edff5a663c631f850edcb786719c960ce887a5661e9c828a53a95d9"}, + {file = "jiter-0.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a612534770470686cd5431478dc5a1b660eceb410abade6b1b74e320ca98de6"}, + {file = "jiter-0.12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3985aea37d40a908f887b34d05111e0aae822943796ebf8338877fee2ab67725"}, + {file = "jiter-0.12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b1207af186495f48f72529f8d86671903c8c10127cac6381b11dddc4aaa52df6"}, + {file = "jiter-0.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ef2fb241de583934c9915a33120ecc06d94aa3381a134570f59eed784e87001e"}, + {file = "jiter-0.12.0-cp311-cp311-win32.whl", hash = "sha256:453b6035672fecce8007465896a25b28a6b59cfe8fbc974b2563a92f5a92a67c"}, + {file = "jiter-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:ca264b9603973c2ad9435c71a8ec8b49f8f715ab5ba421c85a51cde9887e421f"}, + {file = "jiter-0.12.0-cp311-cp311-win_arm64.whl", hash = "sha256:cb00ef392e7d684f2754598c02c409f376ddcef857aae796d559e6cacc2d78a5"}, + {file = "jiter-0.12.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:305e061fa82f4680607a775b2e8e0bcb071cd2205ac38e6ef48c8dd5ebe1cf37"}, + {file = "jiter-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c1860627048e302a528333c9307c818c547f214d8659b0705d2195e1a94b274"}, + {file = "jiter-0.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df37577a4f8408f7e0ec3205d2a8f87672af8f17008358063a4d6425b6081ce3"}, + {file = "jiter-0.12.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:75fdd787356c1c13a4f40b43c2156276ef7a71eb487d98472476476d803fb2cf"}, + {file = "jiter-0.12.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1eb5db8d9c65b112aacf14fcd0faae9913d07a8afea5ed06ccdd12b724e966a1"}, + {file = "jiter-0.12.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:73c568cc27c473f82480abc15d1301adf333a7ea4f2e813d6a2c7d8b6ba8d0df"}, + {file = "jiter-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4321e8a3d868919bcb1abb1db550d41f2b5b326f72df29e53b2df8b006eb9403"}, + {file = "jiter-0.12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a51bad79f8cc9cac2b4b705039f814049142e0050f30d91695a2d9a6611f126"}, + {file = "jiter-0.12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2a67b678f6a5f1dd6c36d642d7db83e456bc8b104788262aaefc11a22339f5a9"}, + {file = "jiter-0.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efe1a211fe1fd14762adea941e3cfd6c611a136e28da6c39272dbb7a1bbe6a86"}, + {file = "jiter-0.12.0-cp312-cp312-win32.whl", hash = "sha256:d779d97c834b4278276ec703dc3fc1735fca50af63eb7262f05bdb4e62203d44"}, + {file = "jiter-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e8269062060212b373316fe69236096aaf4c49022d267c6736eebd66bbbc60bb"}, + {file = "jiter-0.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:06cb970936c65de926d648af0ed3d21857f026b1cf5525cb2947aa5e01e05789"}, + {file = "jiter-0.12.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6cc49d5130a14b732e0612bc76ae8db3b49898732223ef8b7599aa8d9810683e"}, + {file = "jiter-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:37f27a32ce36364d2fa4f7fdc507279db604d27d239ea2e044c8f148410defe1"}, + {file = "jiter-0.12.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbc0944aa3d4b4773e348cda635252824a78f4ba44328e042ef1ff3f6080d1cf"}, + {file = "jiter-0.12.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da25c62d4ee1ffbacb97fac6dfe4dcd6759ebdc9015991e92a6eae5816287f44"}, + {file = "jiter-0.12.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:048485c654b838140b007390b8182ba9774621103bd4d77c9c3f6f117474ba45"}, + {file = "jiter-0.12.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:635e737fbb7315bef0037c19b88b799143d2d7d3507e61a76751025226b3ac87"}, + {file = "jiter-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e017c417b1ebda911bd13b1e40612704b1f5420e30695112efdbed8a4b389ed"}, + {file = "jiter-0.12.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:89b0bfb8b2bf2351fba36bb211ef8bfceba73ef58e7f0c68fb67b5a2795ca2f9"}, + {file = "jiter-0.12.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:f5aa5427a629a824a543672778c9ce0c5e556550d1569bb6ea28a85015287626"}, + {file = "jiter-0.12.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed53b3d6acbcb0fd0b90f20c7cb3b24c357fe82a3518934d4edfa8c6898e498c"}, + {file = "jiter-0.12.0-cp313-cp313-win32.whl", hash = "sha256:4747de73d6b8c78f2e253a2787930f4fffc68da7fa319739f57437f95963c4de"}, + {file = "jiter-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:e25012eb0c456fcc13354255d0338cd5397cce26c77b2832b3c4e2e255ea5d9a"}, + {file = "jiter-0.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:c97b92c54fe6110138c872add030a1f99aea2401ddcdaa21edf74705a646dd60"}, + {file = "jiter-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:53839b35a38f56b8be26a7851a48b89bc47e5d88e900929df10ed93b95fea3d6"}, + {file = "jiter-0.12.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94f669548e55c91ab47fef8bddd9c954dab1938644e715ea49d7e117015110a4"}, + {file = "jiter-0.12.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:351d54f2b09a41600ffea43d081522d792e81dcfb915f6d2d242744c1cc48beb"}, + {file = "jiter-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2a5e90604620f94bf62264e7c2c038704d38217b7465b863896c6d7c902b06c7"}, + {file = "jiter-0.12.0-cp313-cp313t-win_arm64.whl", hash = "sha256:88ef757017e78d2860f96250f9393b7b577b06a956ad102c29c8237554380db3"}, + {file = "jiter-0.12.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c46d927acd09c67a9fb1416df45c5a04c27e83aae969267e98fba35b74e99525"}, + {file = "jiter-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:774ff60b27a84a85b27b88cd5583899c59940bcc126caca97eb2a9df6aa00c49"}, + {file = "jiter-0.12.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5433fab222fb072237df3f637d01b81f040a07dcac1cb4a5c75c7aa9ed0bef1"}, + {file = "jiter-0.12.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f8c593c6e71c07866ec6bfb790e202a833eeec885022296aff6b9e0b92d6a70e"}, + {file = "jiter-0.12.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:90d32894d4c6877a87ae00c6b915b609406819dce8bc0d4e962e4de2784e567e"}, + {file = "jiter-0.12.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:798e46eed9eb10c3adbbacbd3bdb5ecd4cf7064e453d00dbef08802dae6937ff"}, + {file = "jiter-0.12.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3f1368f0a6719ea80013a4eb90ba72e75d7ea67cfc7846db2ca504f3df0169a"}, + {file = "jiter-0.12.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65f04a9d0b4406f7e51279710b27484af411896246200e461d80d3ba0caa901a"}, + {file = "jiter-0.12.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:fd990541982a24281d12b67a335e44f117e4c6cbad3c3b75c7dea68bf4ce3a67"}, + {file = "jiter-0.12.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:b111b0e9152fa7df870ecaebb0bd30240d9f7fff1f2003bcb4ed0f519941820b"}, + {file = "jiter-0.12.0-cp314-cp314-win32.whl", hash = "sha256:a78befb9cc0a45b5a5a0d537b06f8544c2ebb60d19d02c41ff15da28a9e22d42"}, + {file = "jiter-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:e1fe01c082f6aafbe5c8faf0ff074f38dfb911d53f07ec333ca03f8f6226debf"}, + {file = "jiter-0.12.0-cp314-cp314-win_arm64.whl", hash = "sha256:d72f3b5a432a4c546ea4bedc84cce0c3404874f1d1676260b9c7f048a9855451"}, + {file = "jiter-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e6ded41aeba3603f9728ed2b6196e4df875348ab97b28fc8afff115ed42ba7a7"}, + {file = "jiter-0.12.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a947920902420a6ada6ad51892082521978e9dd44a802663b001436e4b771684"}, + {file = "jiter-0.12.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:add5e227e0554d3a52cf390a7635edaffdf4f8fce4fdbcef3cc2055bb396a30c"}, + {file = "jiter-0.12.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f9b1cda8fcb736250d7e8711d4580ebf004a46771432be0ae4796944b5dfa5d"}, + {file = "jiter-0.12.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deeb12a2223fe0135c7ff1356a143d57f95bbf1f4a66584f1fc74df21d86b993"}, + {file = "jiter-0.12.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c596cc0f4cb574877550ce4ecd51f8037469146addd676d7c1a30ebe6391923f"}, + {file = "jiter-0.12.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ab4c823b216a4aeab3fdbf579c5843165756bd9ad87cc6b1c65919c4715f783"}, + {file = "jiter-0.12.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e427eee51149edf962203ff8db75a7514ab89be5cb623fb9cea1f20b54f1107b"}, + {file = "jiter-0.12.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:edb868841f84c111255ba5e80339d386d937ec1fdce419518ce1bd9370fac5b6"}, + {file = "jiter-0.12.0-cp314-cp314t-win32.whl", hash = "sha256:8bbcfe2791dfdb7c5e48baf646d37a6a3dcb5a97a032017741dea9f817dca183"}, + {file = "jiter-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2fa940963bf02e1d8226027ef461e36af472dea85d36054ff835aeed944dd873"}, + {file = "jiter-0.12.0-cp314-cp314t-win_arm64.whl", hash = "sha256:506c9708dd29b27288f9f8f1140c3cb0e3d8ddb045956d7757b1fa0e0f39a473"}, + {file = "jiter-0.12.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c9d28b218d5f9e5f69a0787a196322a5056540cb378cac8ff542b4fa7219966c"}, + {file = "jiter-0.12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0ee12028daf8cfcf880dd492349a122a64f42c059b6c62a2b0c96a83a8da820"}, + {file = "jiter-0.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b135ebe757a82d67ed2821526e72d0acf87dd61f6013e20d3c45b8048af927b"}, + {file = "jiter-0.12.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:15d7fafb81af8a9e3039fc305529a61cd933eecee33b4251878a1c89859552a3"}, + {file = "jiter-0.12.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92d1f41211d8a8fe412faad962d424d334764c01dac6691c44691c2e4d3eedaf"}, + {file = "jiter-0.12.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3a64a48d7c917b8f32f25c176df8749ecf08cec17c466114727efe7441e17f6d"}, + {file = "jiter-0.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:122046f3b3710b85de99d9aa2f3f0492a8233a2f54a64902b096efc27ea747b5"}, + {file = "jiter-0.12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:27ec39225e03c32c6b863ba879deb427882f243ae46f0d82d68b695fa5b48b40"}, + {file = "jiter-0.12.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:26b9e155ddc132225a39b1995b3b9f0fe0f79a6d5cbbeacf103271e7d309b404"}, + {file = "jiter-0.12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9ab05b7c58e29bb9e60b70c2e0094c98df79a1e42e397b9bb6eaa989b7a66dd0"}, + {file = "jiter-0.12.0-cp39-cp39-win32.whl", hash = "sha256:59f9f9df87ed499136db1c2b6c9efb902f964bed42a582ab7af413b6a293e7b0"}, + {file = "jiter-0.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:d3719596a1ebe7a48a498e8d5d0c4bf7553321d4c3eee1d620628d51351a3928"}, + {file = "jiter-0.12.0-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:4739a4657179ebf08f85914ce50332495811004cc1747852e8b2041ed2aab9b8"}, + {file = "jiter-0.12.0-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:41da8def934bf7bec16cb24bd33c0ca62126d2d45d81d17b864bd5ad721393c3"}, + {file = "jiter-0.12.0-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c44ee814f499c082e69872d426b624987dbc5943ab06e9bbaa4f81989fdb79e"}, + {file = "jiter-0.12.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd2097de91cf03eaa27b3cbdb969addf83f0179c6afc41bbc4513705e013c65d"}, + {file = "jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:e8547883d7b96ef2e5fe22b88f8a4c8725a56e7f4abafff20fd5272d634c7ecb"}, + {file = "jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:89163163c0934854a668ed783a2546a0617f71706a2551a4a0666d91ab365d6b"}, + {file = "jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d96b264ab7d34bbb2312dedc47ce07cd53f06835eacbc16dde3761f47c3a9e7f"}, + {file = "jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24e864cb30ab82311c6425655b0cdab0a98c5d973b065c66a3f020740c2324c"}, + {file = "jiter-0.12.0.tar.gz", hash = "sha256:64dfcd7d5c168b38d3f9f8bba7fc639edb3418abcc74f22fdbe6b8938293f30b"}, +] + [[package]] name = "joblib" version = "1.4.2" @@ -933,113 +1046,120 @@ files = [ [[package]] name = "jsonargparse" -version = "4.29.0" -description = "Implement minimal boilerplate CLIs derived from type hints and parse from command line, config files and environment variables." +version = "4.44.0" +description = "Minimal effort CLIs derived from type hints and parse from command line, config files and environment variables." optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" groups = ["main"] files = [ - {file = "jsonargparse-4.29.0-py3-none-any.whl", hash = "sha256:e093d9509996b031d156fe8d4a087e2d91adbfc654b9e2c783878d45ad0dfefe"}, - {file = "jsonargparse-4.29.0.tar.gz", hash = "sha256:03d407122c856095c48b07c58107002c9d3eaeb2795d8040efad831db5817494"}, + {file = "jsonargparse-4.44.0-py3-none-any.whl", hash = "sha256:7243fd177c29d46a56b4fdf677e3b83acc71fac1ca3710d2954882f220d09741"}, + {file = "jsonargparse-4.44.0.tar.gz", hash = "sha256:54836d2bbb37483dc93fbd21d67241bb3ff0fad6ec513176776c659b2133b287"}, ] [package.dependencies] -jsonnet = {version = ">=0.13.0", optional = true, markers = "os_name == \"posix\" and extra == \"jsonnet\""} -jsonnet-binary = {version = ">=0.17.0", optional = true, markers = "os_name != \"posix\" and extra == \"jsonnet\""} +jsonnet = {version = ">=0.21.0", optional = true, markers = "extra == \"jsonnet\""} PyYAML = ">=3.13" [package.extras] -all = ["jsonargparse[argcomplete]", "jsonargparse[fsspec]", "jsonargparse[jsonnet]", "jsonargparse[jsonschema]", "jsonargparse[omegaconf]", "jsonargparse[reconplogger]", "jsonargparse[ruyaml]", "jsonargparse[signatures]", "jsonargparse[typing-extensions]", "jsonargparse[urls]"] -argcomplete = ["argcomplete (>=2.0.0) ; python_version < \"3.8\"", "argcomplete (>=3.3.0) ; python_version >= \"3.8\""] +all = ["jsonargparse[fsspec]", "jsonargparse[jsonnet]", "jsonargparse[jsonschema]", "jsonargparse[omegaconf]", "jsonargparse[reconplogger]", "jsonargparse[ruamel]", "jsonargparse[signatures]", "jsonargparse[toml]", "jsonargparse[typing-extensions]", "jsonargparse[urls]"] +argcomplete = ["argcomplete (>=3.5.1)"] coverage = ["jsonargparse[test-no-urls]", "pytest-cov (>=4.0.0)"] -dev = ["build (>=0.10.0)", "jsonargparse[coverage]", "jsonargparse[doc]", "jsonargparse[mypy]", "jsonargparse[test]", "pre-commit (>=2.19.0)", "tox (>=3.25.0)"] +dev = ["build (>=0.10.0)", "jsonargparse[coverage]", "jsonargparse[doc]", "jsonargparse[test]", "pre-commit (>=2.19.0)", "tox (>=3.25.0)"] doc = ["Sphinx (>=1.7.9)", "autodocsumm (>=0.1.10)", "sphinx-autodoc-typehints (>=1.19.5)", "sphinx-rtd-theme (>=1.2.2)"] fsspec = ["fsspec (>=0.8.4)"] -jsonnet = ["jsonnet (>=0.13.0) ; os_name == \"posix\"", "jsonnet-binary (>=0.17.0) ; os_name != \"posix\""] +jsonnet = ["jsonnet (>=0.21.0)"] jsonschema = ["jsonschema (>=3.2.0)"] maintainer = ["bump2version (>=0.5.11)", "twine (>=4.0.2)"] omegaconf = ["omegaconf (>=2.1.1)"] reconplogger = ["reconplogger (>=4.4.0)"] -ruyaml = ["ruyaml (>=0.20.0)"] -signatures = ["docstring-parser (>=0.15)", "jsonargparse[typing-extensions]", "typeshed-client (>=2.1.0)"] -test = ["attrs (>=22.2.0)", "jsonargparse[test-no-urls]", "pydantic (>=2.3.0)", "responses (>=0.12.0)", "types-PyYAML (>=6.0.11)", "types-requests (>=2.28.9)"] +ruamel = ["ruamel.yaml (>=0.18.15)"] +ruyaml = ["jsonargparse[ruamel]"] +shtab = ["shtab (>=1.7.1)"] +signatures = ["docstring-parser (>=0.17)", "jsonargparse[typing-extensions]", "typeshed-client (>=2.8.2)"] +test = ["attrs (>=22.2.0)", "jsonargparse[argcomplete]", "jsonargparse[shtab]", "jsonargparse[test-no-urls]", "pydantic (>=2.3.0)", "responses (>=0.12.0)", "types-PyYAML (>=6.0.11)", "types-requests (>=2.28.9)"] test-no-urls = ["pytest (>=6.2.5)", "pytest-subtests (>=0.8.0)"] +toml = ["toml (>=0.10.2)"] typing-extensions = ["typing-extensions (>=3.10.0.0) ; python_version < \"3.10\""] urls = ["requests (>=2.18.4)"] [[package]] name = "jsonnet" -version = "0.20.0" -description = "Python bindings for Jsonnet - The data templating language" +version = "0.21.0" +description = "Python bindings for Jsonnet - The data templating language " optional = false python-versions = "*" groups = ["main"] -markers = "os_name == \"posix\"" files = [ - {file = "jsonnet-0.20.0.tar.gz", hash = "sha256:7e770c7bf3a366b97b650a39430450f77612e74406731eb75c5bd59f3f104d4f"}, + {file = "jsonnet-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4717d83a15144adc9ae7d3d0a0d0ff54d7fe18349346130bd9b9bb7f8c9b0db"}, + {file = "jsonnet-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:121a24583fe6980705b8f775f2b66e2b01c4006dbd258d047d54f60b76b98681"}, + {file = "jsonnet-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c87bbf37e2f118e75de30ec4d3d1d2a5eedd7fe213f00042e3a2fe0e7026bbc"}, + {file = "jsonnet-0.21.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:902cb1a9bb7916f3e8041a2936e6ba4deea7312843927360c698d1092144d49c"}, + {file = "jsonnet-0.21.0-cp310-cp310-win_amd64.whl", hash = "sha256:ad896e2d70bc6ea4c5503b9587703e75a233506a57c33fa3192922e49b97a90a"}, + {file = "jsonnet-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bc2c8b35122884dcb63431a831e81d6ab494e37148704a781ef88bb7e12fb36b"}, + {file = "jsonnet-0.21.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f837389c6b384070b870c98f12c05847fdd801bb7752ab7893beaeac662f4b54"}, + {file = "jsonnet-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85a2089fb77d6db86ef84d9403654d710ba3e41dcf4ad21d0cba2635497ba852"}, + {file = "jsonnet-0.21.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:559d59e8984b804f60a97d72e7aeaa2a2572fc0a5bf7ef1109eb21b91dbc166c"}, + {file = "jsonnet-0.21.0-cp311-cp311-win_amd64.whl", hash = "sha256:6018365037491e91b5d3f0eccfdf78812d84e25aa9ccbba097bd3ba6ce70709a"}, + {file = "jsonnet-0.21.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ba35051103bed81ddcb446db52c31bba00391c52069107498eb44952feac8a30"}, + {file = "jsonnet-0.21.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71afa464a74dcbec30b39d8f28cad091ce27497a8620c0ef7859814e173ce454"}, + {file = "jsonnet-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fccebb019917004cf860490a80d17189bad01c9d425b7a1cb138a14745488cf0"}, + {file = "jsonnet-0.21.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ba913bb650b2b5dac29e65fd6963dff7cad960580523c0ccdd66e23e22e3b772"}, + {file = "jsonnet-0.21.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a39b5a3195bb6ec16050d14f8aa9378cf862ff2dd54ca0973cbbfbc9cec6e89"}, + {file = "jsonnet-0.21.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:95d0e0e59ed29f7e424066c05c4585fd255e288fd6050686e1d5bb54bd719896"}, + {file = "jsonnet-0.21.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:eb926cae6ea157e2e0851e6ec8f6a2949e926f67754a87980bbcb2698a211dc5"}, + {file = "jsonnet-0.21.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb642fe864e41a432957f71bfa57ae4eaab904886f06dec183c9e40d6ce4e24b"}, + {file = "jsonnet-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22a87070c1c50ecf6c0c8df252a4984a89275ceb18fe059dfa99eeaf548be71f"}, + {file = "jsonnet-0.21.0-cp313-cp313-win_amd64.whl", hash = "sha256:6e23e55e0a0811b899398aaa03a5b46eea01ffcafc697a705fe7b07eb8cd0ce7"}, + {file = "jsonnet-0.21.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9da10d31f4a540d57d31be70fc89c2d1c1124b1fffdee251cc2a5e3b2c56e4ab"}, + {file = "jsonnet-0.21.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:27da1b36735ac2c6f277c235f62a9524f769989e52314f06894d070d52ae1225"}, + {file = "jsonnet-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:874e0ed8838b1f4a019c77324d7219173cb971e7f4193a18c4757b643f0f2bfc"}, + {file = "jsonnet-0.21.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:874714350cd89e41cb95509490147865696ab9420255ff21e01f11e02d0033d8"}, + {file = "jsonnet-0.21.0-cp39-cp39-win_amd64.whl", hash = "sha256:8dddc2e5394a27b6ee12e49253121fea87284cac6042cdb4983b3427d590e1b8"}, + {file = "jsonnet-0.21.0.tar.gz", hash = "sha256:7fe2865e6e1dc2b9791d880fea3eba7e72334b256d85f027da3ae1f56a55b1da"}, ] [[package]] -name = "jsonnet-binary" -version = "0.17.0" -description = "An UNOFFICIAL Python interface to Jsonnet, available as whl packages for Mac, Linux and Windows." +name = "loguru" +version = "0.7.3" +description = "Python logging made (stupidly) simple" optional = false -python-versions = "*" +python-versions = "<4.0,>=3.5" groups = ["main"] -markers = "os_name != \"posix\"" -files = [ - {file = "jsonnet-binary-0.17.0.tar.gz", hash = "sha256:fbadf25f28161b0ccf29e0b72ef689790d14a9b23a681ab6846bd7cb12e17f1d"}, - {file = "jsonnet_binary-0.17.0-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:5db15ed838b6e4d1373d5d772a8283cf3a62282056cc5a3643c65bf257efeda4"}, - {file = "jsonnet_binary-0.17.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:d868f063ea80be0c6b431faf2967e7440260b840374dbe50bae9876e6e36a162"}, - {file = "jsonnet_binary-0.17.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:103c0e58fe79dec930bc5f013832da96cbd41cf0a15cf538027d74cbc22f7414"}, - {file = "jsonnet_binary-0.17.0-cp35-cp35m-win32.whl", hash = "sha256:af2d6eecb7bcfe0e0e2431b2744361d0cdee3363e634f48960bac79e770a2174"}, - {file = "jsonnet_binary-0.17.0-cp35-cp35m-win_amd64.whl", hash = "sha256:4cd4ea866afba1937b706eda4821b17a215976b5fdb56939c1bb2f5d4a67189c"}, - {file = "jsonnet_binary-0.17.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:3738ee3992535eaa45d243f95a735c506a3661a910f3e679d0c4393a76823048"}, - {file = "jsonnet_binary-0.17.0-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:286592ae705ddee12c44c1ed65efc3b5a238e6c659049cacd82ee84dc379d145"}, - {file = "jsonnet_binary-0.17.0-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:5a0e737ff3aeae8639ff004e5e57d29f0665621234e43576745a143cd69b8944"}, - {file = "jsonnet_binary-0.17.0-cp36-cp36m-win32.whl", hash = "sha256:1c2a27c20b34c66d9bb07a8ba019fd4063e46a65d46a652e52feed73b0dad7d3"}, - {file = "jsonnet_binary-0.17.0-cp36-cp36m-win_amd64.whl", hash = "sha256:4b6479c00e10b15d905364005b9a3a2e87e0f7691cb9e88287cf7b702b278139"}, - {file = "jsonnet_binary-0.17.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:60f8b892e25040a677663a77134b2d8251c965fd1a2007c9baf13f1c0b97523f"}, - {file = "jsonnet_binary-0.17.0-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:926fa7a59777d525f3075179d118550ee196f492529a2d10cba65cbacb88de17"}, - {file = "jsonnet_binary-0.17.0-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:8710353f6018d09edcd90bfa61fbddafa853b2caac51b97d0e760f2c7e65e2de"}, - {file = "jsonnet_binary-0.17.0-cp37-cp37m-win32.whl", hash = "sha256:0e8f6c03172d84ec4807836f707265d067c4c798a13dbd4818449c37176f38eb"}, - {file = "jsonnet_binary-0.17.0-cp37-cp37m-win_amd64.whl", hash = "sha256:37462cc8602eb64cffae082c2ed379afc8921a5762261226aeae337f51266ea5"}, - {file = "jsonnet_binary-0.17.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d086992e6880900ac33501b7d9744df42a0302ba4c7196a495e812929364e599"}, - {file = "jsonnet_binary-0.17.0-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:f34fad6492196c5e31f2aee5d6069ec05c335a53f0dbf3a3466722f8c89c4ec1"}, - {file = "jsonnet_binary-0.17.0-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:19c2bc5509d1e188fd640e96bba28015b5777e7518120b1b772bc2c4b9360274"}, - {file = "jsonnet_binary-0.17.0-cp38-cp38-win32.whl", hash = "sha256:21e00cfb9c18ccd8fc80739f0429472a6113772e76c0880b49bb8a256fa7af6b"}, - {file = "jsonnet_binary-0.17.0-cp38-cp38-win_amd64.whl", hash = "sha256:3ad99ce128bbc58fd6b6bbd059224056a72d007ff7997cc9fe856868a7ae8c33"}, - {file = "jsonnet_binary-0.17.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:783fdb0ebfa307a981385e140c96b932573388018cff5db7658797b17f453ba8"}, - {file = "jsonnet_binary-0.17.0-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:21bf65ade6fe923ba4c0a6f5dbc1ffc26594be13c3852b5974913c0d57f5ce68"}, - {file = "jsonnet_binary-0.17.0-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:f26b81994ec796d7479e29b8d83fa74a53e5ad4e4c763a77c56e5365ced1e3af"}, - {file = "jsonnet_binary-0.17.0-cp39-cp39-win32.whl", hash = "sha256:d7003c38ff1478c09cff95990bb2abd3f8473866e99af9c49ac1cc894922bde5"}, - {file = "jsonnet_binary-0.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:7882e48bcc68424c97721011ef0c41e81bae71a61a7f56187f5c31e6b929381f"}, - {file = "jsonnet_binary-0.17.0-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e15cd069124c05c3d09b1528a7382a3b52457bcc3499e11129ae71db1a7b3e2e"}, - {file = "jsonnet_binary-0.17.0-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:9bd64b4123a9d25a78150344a8b06eeca7139311fa73fe98b7dbfcb2b438ab5b"}, - {file = "jsonnet_binary-0.17.0-pp36-pypy36_pp73-win32.whl", hash = "sha256:d356ebc6d2ba04f2f97629e8cc1dbe933c27a7da75cfe10098c97d465ae9c700"}, - {file = "jsonnet_binary-0.17.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7a735e8bc0a23f2e6175262670b3fe993a90b3299fdaaffb1599c1146c5702ab"}, - {file = "jsonnet_binary-0.17.0-pp37-pypy37_pp73-manylinux2010_x86_64.whl", hash = "sha256:febd59e04ea4c2dc93f2c29dbfd03b49c092e6a275188f29639ea009f83696e6"}, - {file = "jsonnet_binary-0.17.0-pp37-pypy37_pp73-win32.whl", hash = "sha256:846735c55cf704acb071932dd2c4a22afc7cc77b0a90884080e97f58c7df75a0"}, +files = [ + {file = "loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c"}, + {file = "loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6"}, ] +[package.dependencies] +colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} +win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} + +[package.extras] +dev = ["Sphinx (==8.1.3) ; python_version >= \"3.11\"", "build (==1.2.2) ; python_version >= \"3.11\"", "colorama (==0.4.5) ; python_version < \"3.8\"", "colorama (==0.4.6) ; python_version >= \"3.8\"", "exceptiongroup (==1.1.3) ; python_version >= \"3.7\" and python_version < \"3.11\"", "freezegun (==1.1.0) ; python_version < \"3.8\"", "freezegun (==1.5.0) ; python_version >= \"3.8\"", "mypy (==v0.910) ; python_version < \"3.6\"", "mypy (==v0.971) ; python_version == \"3.6\"", "mypy (==v1.13.0) ; python_version >= \"3.8\"", "mypy (==v1.4.1) ; python_version == \"3.7\"", "myst-parser (==4.0.0) ; python_version >= \"3.11\"", "pre-commit (==4.0.1) ; python_version >= \"3.9\"", "pytest (==6.1.2) ; python_version < \"3.8\"", "pytest (==8.3.2) ; python_version >= \"3.8\"", "pytest-cov (==2.12.1) ; python_version < \"3.8\"", "pytest-cov (==5.0.0) ; python_version == \"3.8\"", "pytest-cov (==6.0.0) ; python_version >= \"3.9\"", "pytest-mypy-plugins (==1.9.3) ; python_version >= \"3.6\" and python_version < \"3.8\"", "pytest-mypy-plugins (==3.1.0) ; python_version >= \"3.8\"", "sphinx-rtd-theme (==3.0.2) ; python_version >= \"3.11\"", "tox (==3.27.1) ; python_version < \"3.8\"", "tox (==4.23.2) ; python_version >= \"3.8\"", "twine (==6.0.1) ; python_version >= \"3.11\""] + [[package]] -name = "loguru" -version = "0.7.2" -description = "Python logging made (stupidly) simple" +name = "markdown-it-py" +version = "4.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" optional = false -python-versions = ">=3.5" +python-versions = ">=3.10" groups = ["main"] files = [ - {file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"}, - {file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"}, + {file = "markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147"}, + {file = "markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3"}, ] [package.dependencies] -colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} -win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} +mdurl = ">=0.1,<1.0" [package.extras] -dev = ["Sphinx (==7.2.5) ; python_version >= \"3.9\"", "colorama (==0.4.5) ; python_version < \"3.8\"", "colorama (==0.4.6) ; python_version >= \"3.8\"", "exceptiongroup (==1.1.3) ; python_version >= \"3.7\" and python_version < \"3.11\"", "freezegun (==1.1.0) ; python_version < \"3.8\"", "freezegun (==1.2.2) ; python_version >= \"3.8\"", "mypy (==v0.910) ; python_version < \"3.6\"", "mypy (==v0.971) ; python_version == \"3.6\"", "mypy (==v1.4.1) ; python_version == \"3.7\"", "mypy (==v1.5.1) ; python_version >= \"3.8\"", "pre-commit (==3.4.0) ; python_version >= \"3.8\"", "pytest (==6.1.2) ; python_version < \"3.8\"", "pytest (==7.4.0) ; python_version >= \"3.8\"", "pytest-cov (==2.12.1) ; python_version < \"3.8\"", "pytest-cov (==4.1.0) ; python_version >= \"3.8\"", "pytest-mypy-plugins (==1.9.3) ; python_version >= \"3.6\" and python_version < \"3.8\"", "pytest-mypy-plugins (==3.0.0) ; python_version >= \"3.8\"", "sphinx-autobuild (==2021.3.14) ; python_version >= \"3.9\"", "sphinx-rtd-theme (==1.3.0) ; python_version >= \"3.9\"", "tox (==3.27.1) ; python_version < \"3.8\"", "tox (==4.11.0) ; python_version >= \"3.8\""] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "markdown-it-pyrs", "mistletoe (>=1.0,<2.0)", "mistune (>=3.0,<4.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins (>=0.5.0)"] +profiling = ["gprof2dot"] +rtd = ["ipykernel", "jupyter_sphinx", "mdit-py-plugins (>=0.5.0)", "myst-parser", "pyyaml", "sphinx", "sphinx-book-theme (>=1.0,<2.0)", "sphinx-copybutton", "sphinx-design"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions", "requests"] [[package]] name = "markupsafe" @@ -1123,6 +1243,18 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -1141,6 +1273,66 @@ docs = ["sphinx"] gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] tests = ["pytest (>=4.6)"] +[[package]] +name = "mteb" +version = "2.4.2" +description = "Massive Text Embedding Benchmark" +optional = false +python-versions = "<3.15,>=3.10" +groups = ["main"] +files = [ + {file = "mteb-2.4.2-py3-none-any.whl", hash = "sha256:c64951835bb9ddf2cb296ee1bb6a2021b7a22738ba50b4de5e6a03ed8e72901a"}, + {file = "mteb-2.4.2.tar.gz", hash = "sha256:64b512db5424e1450065c3b5935ac6a2a3a04662e058f2cab5456ee3a883f2d7"}, +] + +[package.dependencies] +datasets = ">=2.19.0" +numpy = ">=1.0.0,<3.0.0" +polars = ">=0.20.22" +pydantic = ">=2.0.0" +pytrec-eval-terrier = ">=0.5.6" +requests = ">=2.26.0" +rich = ">=0.0.0" +scikit-learn = ">=1.4.0" +scipy = ">=0.0.0" +sentence_transformers = ">=3.0.0" +torch = ">1.0.0" +tqdm = ">1.0.0" +typing-extensions = ">=4.5.0" + +[package.extras] +ark = ["tiktoken (>=0.8.0)", "volcengine-python-sdk[ark] (==3.0.2)"] +blip2 = ["salesforce-lavis (>=1.0.2)"] +bm25s = ["PyStemmer (>=2.2.0.3)", "bm25s (>=0.2.6)"] +codecarbon = ["codecarbon (>=2.0.0,<3.0.0)"] +cohere = ["cohere (==5.14.0)"] +colpali-engine = ["colpali_engine (>=0.3.12)"] +colqwen3 = ["torchvision (>=0.22.1)", "transformers (>=4.57)"] +eager-embed = ["qwen_vl_utils (>=0.0.14)"] +faiss-cpu = ["faiss-cpu (>=1.12.0)"] +flagembedding = ["FlagEmbedding (==1.3.4)"] +flash-attention = ["flash-attn (>=2.6.3)"] +gritlm = ["gritlm (>=1.0.2)"] +image = ["torchvision (>0.2.1)", "transformers[torch-vision,vision]"] +jina = ["einops (>=0.8.0)"] +jina-v4 = ["peft (>=0.15.2)", "torchvision (>=0.22.1)", "transformers (>=4.52.0)"] +leaderboard = ["cachetools (>=5.2.0)", "gradio (==6.0.1)", "matplotlib (>=3.9.4)", "plotly (>=5.24.0,<6.0.0)"] +llama-embed-nemotron = ["transformers (==4.51.0)"] +llm2vec = ["llm2vec (>=0.2.3,<0.3.0)"] +model2vec = ["model2vec (>=0.3.0)"] +nomic = ["einops (>=0.8.1)"] +open-clip-torch = ["open_clip_torch (==2.31.0)"] +openai = ["openai (>=1.41.0)", "tiktoken (>=0.8.0)"] +peft = ["peft (>=0.11.0)"] +pylate = ["pylate (>=1.3.1) ; python_version < \"3.13\""] +timm = ["timm (>=1.0.15,<1.1.0)"] +vertexai = ["vertexai (==1.71.1)"] +voyage-v = ["tenacity (>9.0.0)", "voyageai (>0.3.0,<2.0.0)"] +voyageai = ["voyageai (>0.3.0,<2.0.0)"] +xet = ["huggingface_hub (>=0.32.0)"] +xformers = ["xformers (>=0.0.29)"] +youtu = ["tencentcloud-sdk-python-common (>=3.0.1454)", "tencentcloud-sdk-python-lkeap (>=3.0.1451)"] + [[package]] name = "multidict" version = "6.0.5" @@ -1391,20 +1583,6 @@ files = [ {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, ] -[[package]] -name = "nvidia-cublas-cu12" -version = "12.6.4.1" -description = "CUBLAS native runtime libraries" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08ed2686e9875d01b58e3cb379c6896df8e76c75e0d4a7f7dace3d7b6d9ef8eb"}, - {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:235f728d6e2a409eddf1df58d5b0921cf80cfa9e72b9f2775ccb7b4a87984668"}, - {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-win_amd64.whl", hash = "sha256:9e4fa264f4d8a4eb0cdbd34beadc029f453b3bafae02401e999cf3d5a5af75f8"}, -] - [[package]] name = "nvidia-cublas-cu12" version = "12.8.4.1" @@ -1412,29 +1590,13 @@ description = "CUBLAS native runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0"}, {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142"}, {file = "nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af"}, ] -[[package]] -name = "nvidia-cuda-cupti-cu12" -version = "12.6.80" -description = "CUDA profiling tools runtime libs." -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:166ee35a3ff1587f2490364f90eeeb8da06cd867bd5b701bf7f9a02b78bc63fc"}, - {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_aarch64.whl", hash = "sha256:358b4a1d35370353d52e12f0a7d1769fc01ff74a191689d3870b2123156184c4"}, - {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6768bad6cab4f19e8292125e5f1ac8aa7d1718704012a0e3272a6f61c4bce132"}, - {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a3eff6cdfcc6a4c35db968a06fcadb061cbc7d6dde548609a941ff8701b98b73"}, - {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-win_amd64.whl", hash = "sha256:bbe6ae76e83ce5251b56e8c8e61a964f757175682bbad058b170b136266ab00a"}, -] - [[package]] name = "nvidia-cuda-cupti-cu12" version = "12.8.90" @@ -1442,27 +1604,13 @@ description = "CUDA profiling tools runtime libs." optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed"}, {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182"}, {file = "nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e"}, ] -[[package]] -name = "nvidia-cuda-nvrtc-cu12" -version = "12.6.77" -description = "NVRTC native runtime libraries" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5847f1d6e5b757f1d2b3991a01082a44aad6f10ab3c5c0213fa3e25bddc25a13"}, - {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53"}, - {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:f7007dbd914c56bd80ea31bc43e8e149da38f68158f423ba845fc3292684e45a"}, -] - [[package]] name = "nvidia-cuda-nvrtc-cu12" version = "12.8.93" @@ -1470,29 +1618,13 @@ description = "NVRTC native runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994"}, {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8"}, {file = "nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909"}, ] -[[package]] -name = "nvidia-cuda-runtime-cu12" -version = "12.6.77" -description = "CUDA Runtime native Libraries" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6116fad3e049e04791c0256a9778c16237837c08b27ed8c8401e2e45de8d60cd"}, - {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d461264ecb429c84c8879a7153499ddc7b19b5f8d84c204307491989a365588e"}, - {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ba3b56a4f896141e25e19ab287cd71e52a6a0f4b29d0d31609f60e3b4d5219b7"}, - {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a84d15d5e1da416dd4774cb42edf5e954a3e60cc945698dc1d5be02321c44dc8"}, - {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:86c58044c824bf3c173c49a2dbc7a6c8b53cb4e4dca50068be0bf64e9dab3f7f"}, -] - [[package]] name = "nvidia-cuda-runtime-cu12" version = "12.8.90" @@ -1500,30 +1632,13 @@ description = "CUDA Runtime native Libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d"}, {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90"}, {file = "nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8"}, ] -[[package]] -name = "nvidia-cudnn-cu12" -version = "9.5.1.17" -description = "cuDNN runtime libraries" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9fd4584468533c61873e5fda8ca41bac3a38bcb2d12350830c69b0a96a7e4def"}, - {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2"}, - {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-win_amd64.whl", hash = "sha256:d7af0f8a4f3b4b9dbb3122f2ef553b45694ed9c384d5a75bab197b8eefb79ab8"}, -] - -[package.dependencies] -nvidia-cublas-cu12 = "*" - [[package]] name = "nvidia-cudnn-cu12" version = "9.10.2.21" @@ -1531,7 +1646,7 @@ description = "cuDNN runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8"}, {file = "nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8"}, @@ -1541,25 +1656,6 @@ files = [ [package.dependencies] nvidia-cublas-cu12 = "*" -[[package]] -name = "nvidia-cufft-cu12" -version = "11.3.0.4" -description = "CUFFT native runtime libraries" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d16079550df460376455cba121db6564089176d9bac9e4f360493ca4741b22a6"}, - {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8510990de9f96c803a051822618d42bf6cb8f069ff3f48d93a8486efdacb48fb"}, - {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ccba62eb9cef5559abd5e0d54ceed2d9934030f51163df018532142a8ec533e5"}, - {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.whl", hash = "sha256:768160ac89f6f7b459bee747e8d175dbf53619cfe74b2a5636264163138013ca"}, - {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-win_amd64.whl", hash = "sha256:6048ebddfb90d09d2707efb1fd78d4e3a77cb3ae4dc60e19aab6be0ece2ae464"}, -] - -[package.dependencies] -nvidia-nvjitlink-cu12 = "*" - [[package]] name = "nvidia-cufft-cu12" version = "11.3.3.83" @@ -1567,7 +1663,7 @@ description = "CUFFT native runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a"}, {file = "nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74"}, @@ -1577,19 +1673,6 @@ files = [ [package.dependencies] nvidia-nvjitlink-cu12 = "*" -[[package]] -name = "nvidia-cufile-cu12" -version = "1.11.1.6" -description = "cuFile GPUDirect libraries" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159"}, - {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:8f57a0051dcf2543f6dc2b98a98cb2719c37d3cee1baba8965d57f3bbc90d4db"}, -] - [[package]] name = "nvidia-cufile-cu12" version = "1.13.1.3" @@ -1597,28 +1680,12 @@ description = "cuFile GPUDirect libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc"}, {file = "nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a"}, ] -[[package]] -name = "nvidia-curand-cu12" -version = "10.3.7.77" -description = "CURAND native runtime libraries" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:6e82df077060ea28e37f48a3ec442a8f47690c7499bff392a5938614b56c98d8"}, - {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a42cd1344297f70b9e39a1e4f467a4e1c10f1da54ff7a85c12197f6c652c8bdf"}, - {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:99f1a32f1ac2bd134897fc7a203f779303261268a65762a623bf30cc9fe79117"}, - {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:7b2ed8e95595c3591d984ea3603dd66fe6ce6812b886d59049988a712ed06b6e"}, - {file = "nvidia_curand_cu12-10.3.7.77-py3-none-win_amd64.whl", hash = "sha256:6d6d935ffba0f3d439b7cd968192ff068fafd9018dbf1b85b37261b13cfc9905"}, -] - [[package]] name = "nvidia-curand-cu12" version = "10.3.9.90" @@ -1626,34 +1693,13 @@ description = "CURAND native runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd"}, {file = "nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9"}, {file = "nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec"}, ] -[[package]] -name = "nvidia-cusolver-cu12" -version = "11.7.1.2" -description = "CUDA solver native runtime libraries" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0ce237ef60acde1efc457335a2ddadfd7610b892d94efee7b776c64bb1cac9e0"}, - {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c"}, - {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6cf28f17f64107a0c4d7802be5ff5537b2130bfc112f25d5a30df227058ca0e6"}, - {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dbbe4fc38ec1289c7e5230e16248365e375c3673c9c8bac5796e2e20db07f56e"}, - {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-win_amd64.whl", hash = "sha256:6813f9d8073f555444a8705f3ab0296d3e1cb37a16d694c5fc8b862a0d8706d7"}, -] - -[package.dependencies] -nvidia-cublas-cu12 = "*" -nvidia-cusparse-cu12 = "*" -nvidia-nvjitlink-cu12 = "*" - [[package]] name = "nvidia-cusolver-cu12" version = "11.7.3.90" @@ -1661,7 +1707,7 @@ description = "CUDA solver native runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0"}, {file = "nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450"}, @@ -1673,25 +1719,6 @@ nvidia-cublas-cu12 = "*" nvidia-cusparse-cu12 = "*" nvidia-nvjitlink-cu12 = "*" -[[package]] -name = "nvidia-cusparse-cu12" -version = "12.5.4.2" -description = "CUSPARSE native runtime libraries" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d25b62fb18751758fe3c93a4a08eff08effedfe4edf1c6bb5afd0890fe88f887"}, - {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7aa32fa5470cf754f72d1116c7cbc300b4e638d3ae5304cfa4a638a5b87161b1"}, - {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7556d9eca156e18184b94947ade0fba5bb47d69cec46bf8660fd2c71a4b48b73"}, - {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:23749a6571191a215cb74d1cdbff4a86e7b19f1200c071b3fcf844a5bea23a2f"}, - {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-win_amd64.whl", hash = "sha256:4acb8c08855a26d737398cba8fb6f8f5045d93f82612b4cfd84645a2332ccf20"}, -] - -[package.dependencies] -nvidia-nvjitlink-cu12 = "*" - [[package]] name = "nvidia-cusparse-cu12" version = "12.5.8.93" @@ -1699,7 +1726,7 @@ description = "CUSPARSE native runtime libraries" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc"}, {file = "nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b"}, @@ -1709,20 +1736,6 @@ files = [ [package.dependencies] nvidia-nvjitlink-cu12 = "*" -[[package]] -name = "nvidia-cusparselt-cu12" -version = "0.6.3" -description = "NVIDIA cuSPARSELt" -optional = false -python-versions = "*" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8371549623ba601a06322af2133c4a44350575f5a3108fb75f3ef20b822ad5f1"}, - {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46"}, - {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-win_amd64.whl", hash = "sha256:3b325bcbd9b754ba43df5a311488fca11a6b5dc3d11df4d190c000cf1a0765c7"}, -] - [[package]] name = "nvidia-cusparselt-cu12" version = "0.7.1" @@ -1730,26 +1743,13 @@ description = "NVIDIA cuSPARSELt" optional = false python-versions = "*" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5"}, {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623"}, {file = "nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075"}, ] -[[package]] -name = "nvidia-nccl-cu12" -version = "2.26.2" -description = "NVIDIA Collective Communication Library (NCCL) Runtime" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c196e95e832ad30fbbb50381eb3cbd1fadd5675e587a548563993609af19522"}, - {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6"}, -] - [[package]] name = "nvidia-nccl-cu12" version = "2.27.5" @@ -1757,26 +1757,12 @@ description = "NVIDIA Collective Communication Library (NCCL) Runtime" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a"}, {file = "nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457"}, ] -[[package]] -name = "nvidia-nvjitlink-cu12" -version = "12.6.85" -description = "Nvidia JIT LTO Library" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a"}, - {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41"}, - {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-win_amd64.whl", hash = "sha256:e61120e52ed675747825cdd16febc6a0730537451d867ee58bee3853b1b13d1c"}, -] - [[package]] name = "nvidia-nvjitlink-cu12" version = "12.8.93" @@ -1784,7 +1770,7 @@ description = "Nvidia JIT LTO Library" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88"}, {file = "nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7"}, @@ -1798,28 +1784,12 @@ description = "NVSHMEM creates a global address space that provides efficient an optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b0b960da3842212758e4fa4696b94f129090b30e5122fea3c5345916545cff0"}, {file = "nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5"}, ] -[[package]] -name = "nvidia-nvtx-cu12" -version = "12.6.77" -description = "NVIDIA Tools Extension" -optional = false -python-versions = ">=3" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f44f8d86bb7d5629988d61c8d3ae61dddb2015dee142740536bc7481b022fe4b"}, - {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:adcaabb9d436c9761fca2b13959a2d237c5f9fd406c8e4b723c695409ff88059"}, - {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b90bed3df379fa79afbd21be8e04a0314336b8ae16768b58f2d34cb1d04cd7d2"}, - {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1"}, - {file = "nvidia_nvtx_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:2fb11a4af04a5e6c84073e6404d26588a34afd35379f0855a99797897efa75c0"}, -] - [[package]] name = "nvidia-nvtx-cu12" version = "12.8.90" @@ -1827,7 +1797,7 @@ description = "NVIDIA Tools Extension" optional = false python-versions = ">=3" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615"}, {file = "nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f"}, @@ -1836,27 +1806,31 @@ files = [ [[package]] name = "openai" -version = "1.32.0" +version = "2.14.0" description = "The official Python library for the openai API" optional = false -python-versions = ">=3.7.1" +python-versions = ">=3.9" groups = ["main"] files = [ - {file = "openai-1.32.0-py3-none-any.whl", hash = "sha256:953d57669f309002044fd2f678aba9f07a43256d74b3b00cd04afb5b185568ea"}, - {file = "openai-1.32.0.tar.gz", hash = "sha256:a6df15a7ab9344b1bc2bc8d83639f68b7a7e2453c0f5e50c1666547eee86f0bd"}, + {file = "openai-2.14.0-py3-none-any.whl", hash = "sha256:7ea40aca4ffc4c4a776e77679021b47eec1160e341f42ae086ba949c9dcc9183"}, + {file = "openai-2.14.0.tar.gz", hash = "sha256:419357bedde9402d23bf8f2ee372fca1985a73348debba94bddff06f19459952"}, ] [package.dependencies] anyio = ">=3.5.0,<5" distro = ">=1.7.0,<2" httpx = ">=0.23.0,<1" +jiter = ">=0.10.0,<1" pydantic = ">=1.9.0,<3" sniffio = "*" tqdm = ">4" -typing-extensions = ">=4.7,<5" +typing-extensions = ">=4.11,<5" [package.extras] +aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.9)"] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +realtime = ["websockets (>=13,<16)"] +voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"] [[package]] name = "packaging" @@ -2078,7 +2052,7 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["dev"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -2088,6 +2062,67 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "polars" +version = "1.36.1" +description = "Blazingly fast DataFrame library" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "polars-1.36.1-py3-none-any.whl", hash = "sha256:853c1bbb237add6a5f6d133c15094a9b727d66dd6a4eb91dbb07cdb056b2b8ef"}, + {file = "polars-1.36.1.tar.gz", hash = "sha256:12c7616a2305559144711ab73eaa18814f7aa898c522e7645014b68f1432d54c"}, +] + +[package.dependencies] +polars-runtime-32 = "1.36.1" + +[package.extras] +adbc = ["adbc-driver-manager[dbapi]", "adbc-driver-sqlite[dbapi]"] +all = ["polars[async,cloudpickle,database,deltalake,excel,fsspec,graph,iceberg,numpy,pandas,plot,pyarrow,pydantic,style,timezone]"] +async = ["gevent"] +calamine = ["fastexcel (>=0.9)"] +cloudpickle = ["cloudpickle"] +connectorx = ["connectorx (>=0.3.2)"] +database = ["polars[adbc,connectorx,sqlalchemy]"] +deltalake = ["deltalake (>=1.0.0)"] +excel = ["polars[calamine,openpyxl,xlsx2csv,xlsxwriter]"] +fsspec = ["fsspec"] +gpu = ["cudf-polars-cu12"] +graph = ["matplotlib"] +iceberg = ["pyiceberg (>=0.7.1)"] +numpy = ["numpy (>=1.16.0)"] +openpyxl = ["openpyxl (>=3.0.0)"] +pandas = ["pandas", "polars[pyarrow]"] +plot = ["altair (>=5.4.0)"] +polars-cloud = ["polars_cloud (>=0.4.0)"] +pyarrow = ["pyarrow (>=7.0.0)"] +pydantic = ["pydantic"] +rt64 = ["polars-runtime-64 (==1.36.1)"] +rtcompat = ["polars-runtime-compat (==1.36.1)"] +sqlalchemy = ["polars[pandas]", "sqlalchemy"] +style = ["great-tables (>=0.8.0)"] +timezone = ["tzdata ; platform_system == \"Windows\""] +xlsx2csv = ["xlsx2csv (>=0.8.0)"] +xlsxwriter = ["xlsxwriter"] + +[[package]] +name = "polars-runtime-32" +version = "1.36.1" +description = "Blazingly fast DataFrame library" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "polars_runtime_32-1.36.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:327b621ca82594f277751f7e23d4b939ebd1be18d54b4cdf7a2f8406cecc18b2"}, + {file = "polars_runtime_32-1.36.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ab0d1f23084afee2b97de8c37aa3e02ec3569749ae39571bd89e7a8b11ae9e83"}, + {file = "polars_runtime_32-1.36.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:899b9ad2e47ceb31eb157f27a09dbc2047efbf4969a923a6b1ba7f0412c3e64c"}, + {file = "polars_runtime_32-1.36.1-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:d9d077bb9df711bc635a86540df48242bb91975b353e53ef261c6fae6cb0948f"}, + {file = "polars_runtime_32-1.36.1-cp39-abi3-win_amd64.whl", hash = "sha256:cc17101f28c9a169ff8b5b8d4977a3683cd403621841623825525f440b564cf0"}, + {file = "polars_runtime_32-1.36.1-cp39-abi3-win_arm64.whl", hash = "sha256:809e73857be71250141225ddd5d2b30c97e6340aeaa0d445f930e01bef6888dc"}, + {file = "polars_runtime_32-1.36.1.tar.gz", hash = "sha256:201c2cfd80ceb5d5cd7b63085b5fd08d6ae6554f922bcb941035e39638528a09"}, +] + [[package]] name = "protobuf" version = "6.33.0" @@ -2144,7 +2179,7 @@ version = "1.11.0" description = "library with cross-python path, ini-parsing, io, code, log facilities" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -groups = ["main"] +groups = ["dev"] files = [ {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, @@ -2347,13 +2382,28 @@ files = [ {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"}, ] +[[package]] +name = "pygments" +version = "2.19.2" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, + {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + [[package]] name = "pytest" version = "7.1.3" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" -groups = ["main"] +groups = ["dev"] files = [ {file = "pytest-7.1.3-py3-none-any.whl", hash = "sha256:1377bda3466d70b55e3f5cecfa55bb7cfcf219c7964629b967c37cf0bda818b7"}, {file = "pytest-7.1.3.tar.gz", hash = "sha256:4f365fec2dff9c1162f834d9f18af1ba13062db0c708bf7b946f8a5c76180c39"}, @@ -2377,7 +2427,7 @@ version = "3.14.0" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["dev"] files = [ {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, @@ -2404,6 +2454,52 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "pytrec-eval-terrier" +version = "0.5.10" +description = "Provides Python bindings for popular Information Retrieval measures implemented within trec_eval." +optional = false +python-versions = ">=3" +groups = ["main"] +files = [ + {file = "pytrec_eval_terrier-0.5.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5e574b2d4285d42e3bdc7ca0d9724d46c3bce06d3ee5d6c20e90fdea19761a2f"}, + {file = "pytrec_eval_terrier-0.5.10-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e69c78878379e3e5e280ecf91e9c3bd882f637763d2378655bb0f121e62efbd4"}, + {file = "pytrec_eval_terrier-0.5.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:04266dd7869276ae025399df69bf050bba26043b37426cd482fb9bcaa2b78ffa"}, + {file = "pytrec_eval_terrier-0.5.10-cp310-cp310-win_amd64.whl", hash = "sha256:bb0bb4495f10a0bff95f97a8c17df67c967d611c9fc1a5db13e143e7888b102e"}, + {file = "pytrec_eval_terrier-0.5.10-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1036735d4a12d1c92eea38a14a071168a292f8696099e90742c2c701479f010b"}, + {file = "pytrec_eval_terrier-0.5.10-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b36a2fbdccc7669c4b8aba1f6de2a661e6f2f77c10f05855eda55dda60fc88f5"}, + {file = "pytrec_eval_terrier-0.5.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9e4ca19110f24922d7435cf9ef9951a61f0b575488b6a1db86081d82b88dd621"}, + {file = "pytrec_eval_terrier-0.5.10-cp311-cp311-win_amd64.whl", hash = "sha256:d36e9a8966560ed10bc5aeb30c5c29a53d3fe8e4ccb6ff6bb026bffb21be3fe3"}, + {file = "pytrec_eval_terrier-0.5.10-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28c3c14728713cdbad165964e2d1aba96b0fc7445a5a13168b398e9bd3bbd08"}, + {file = "pytrec_eval_terrier-0.5.10-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:689ee541d72c27d14ae15cd1f11d2cb86cf9bdc880f5e8af9c5dbbdd47663d4d"}, + {file = "pytrec_eval_terrier-0.5.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3f02118dadd3c09b71462bb26e405e49bd10fe0c60bcc169fcd31454a4256dc2"}, + {file = "pytrec_eval_terrier-0.5.10-cp312-cp312-win_amd64.whl", hash = "sha256:202e48fe24948453fe45dcd73261f9865f99cb2ff4c8a3255ac2ab4c993a64ba"}, + {file = "pytrec_eval_terrier-0.5.10-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:fcf96c33446c16de8db78e829c5279f7404ceaaf6b502bb5a6a3669b06051601"}, + {file = "pytrec_eval_terrier-0.5.10-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8455485f1faf6759f1be11b12c904d1c749ba5db7e2b6f414aa56e19533ce069"}, + {file = "pytrec_eval_terrier-0.5.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e7cc9666305281b0ca1873761dc71cd3f0863e6d759f00a12fd363aa2d558d6f"}, + {file = "pytrec_eval_terrier-0.5.10-cp313-cp313-win_amd64.whl", hash = "sha256:9440bd4a78ee0bc5db6821d7483e962a6c494303fd26598f84f00d54cc64cdd7"}, + {file = "pytrec_eval_terrier-0.5.10-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:70bc61b8d02e61a37ed97c088282bb0a124b58e7141cc52756512750efabacbb"}, + {file = "pytrec_eval_terrier-0.5.10-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d52d94803c32cadbff7fe5195b0d0d68d27393092f64207fe8250a4485d1f8d7"}, + {file = "pytrec_eval_terrier-0.5.10-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:77950d0ce9bd960af40efede6850e7b6519400e7fda3f9313e0d0d02c247e4e2"}, + {file = "pytrec_eval_terrier-0.5.10-cp314-cp314-win_amd64.whl", hash = "sha256:c69681fec350fa94af45dd7ef8f53f605e89f752583c814f713d7d2329435cfc"}, + {file = "pytrec_eval_terrier-0.5.10-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:876740f3d58625058d34aaa1939be31bf253ecacd85d0d8b1089db5dd57ab127"}, + {file = "pytrec_eval_terrier-0.5.10-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2ca4e624e5f2589ae75c1034ff1f38e9fc81de86314193508ac423e7ca56769c"}, + {file = "pytrec_eval_terrier-0.5.10-cp37-cp37m-win_amd64.whl", hash = "sha256:9e019263f266675c1d2350e57d7d4180e1c8de7cad6cc5b3104ecebe6b04879d"}, + {file = "pytrec_eval_terrier-0.5.10-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:df2bc7da44ede067b9a8404c39f0ed839bea873d25b4b3d839df0062bea45c66"}, + {file = "pytrec_eval_terrier-0.5.10-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c227e7722a0b1c49224488b26fa364383f263bb7cc9abae92bb065838b23f77c"}, + {file = "pytrec_eval_terrier-0.5.10-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:270d86ecba2fe717d17cd201bacc187f9b190658b7d1e238e7c6152031eee3e1"}, + {file = "pytrec_eval_terrier-0.5.10-cp38-cp38-win_amd64.whl", hash = "sha256:9efeb4fd9194528b7965919a58e9f1c4eddad8c7042c980949557a640dd01cd0"}, + {file = "pytrec_eval_terrier-0.5.10-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2afaff86a8eb66a766c996fb8955f6c0c601bc2b78ff678412de394f60a965d7"}, + {file = "pytrec_eval_terrier-0.5.10-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:af71efdd4b9628c088a1faf4c7777f73acda057961df2318b6a74b9711516d15"}, + {file = "pytrec_eval_terrier-0.5.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:46fe13f625b6214288b4a0d3df60ac086cf511efc0383fd3adb3e772fc1d54f8"}, + {file = "pytrec_eval_terrier-0.5.10-cp39-cp39-win_amd64.whl", hash = "sha256:a6e36d9fd2a9d4080201850d002790df12308b948de5e2c32defb7605eb97c64"}, + {file = "pytrec_eval_terrier-0.5.10.tar.gz", hash = "sha256:eaaf20580d17b5575a233e04dab8a4cbcc01a7e45be8cf547c07f0a2bb3e7eb9"}, +] + +[package.dependencies] +numpy = ">=1.15.1" +scipy = ">=1.1.0" + [[package]] name = "pytz" version = "2024.1" @@ -2602,6 +2698,25 @@ files = [ [package.extras] cli = ["PyYAML (>=6.0,<7.0)", "fastapi (>=0.92.0,<1)", "jinja2 (>=3.1.2,<4.0.0)", "rich (>=12.6)", "typer (>=0.6.1,<0.8)", "uvicorn (>=0.21.0,<1)"] +[[package]] +name = "rich" +version = "14.2.0" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.8.0" +groups = ["main"] +files = [ + {file = "rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd"}, + {file = "rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + [[package]] name = "safetensors" version = "0.4.3" @@ -2960,9 +3075,10 @@ files = [ name = "smart-open" version = "7.0.4" description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" -optional = false +optional = true python-versions = "<4.0,>=3.7" groups = ["main"] +markers = "extra == \"v1\" or extra == \"all\"" files = [ {file = "smart_open-7.0.4-py3-none-any.whl", hash = "sha256:4e98489932b3372595cddc075e6033194775165702887216b65eba760dfd8d47"}, {file = "smart_open-7.0.4.tar.gz", hash = "sha256:62b65852bdd1d1d516839fcb1f6bc50cd0f16e05b4ec44b52f43d38bcb838524"}, @@ -3062,9 +3178,10 @@ dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"] name = "tabulate" version = "0.9.0" description = "Pretty-print tabular data" -optional = false +optional = true python-versions = ">=3.7" -groups = ["main", "dev"] +groups = ["main"] +markers = "extra == \"v1\" or extra == \"all\"" files = [ {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, @@ -3087,48 +3204,69 @@ files = [ [[package]] name = "tiktoken" -version = "0.6.0" +version = "0.12.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main"] files = [ - {file = "tiktoken-0.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:277de84ccd8fa12730a6b4067456e5cf72fef6300bea61d506c09e45658d41ac"}, - {file = "tiktoken-0.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9c44433f658064463650d61387623735641dcc4b6c999ca30bc0f8ba3fccaf5c"}, - {file = "tiktoken-0.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afb9a2a866ae6eef1995ab656744287a5ac95acc7e0491c33fad54d053288ad3"}, - {file = "tiktoken-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c62c05b3109fefca26fedb2820452a050074ad8e5ad9803f4652977778177d9f"}, - {file = "tiktoken-0.6.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0ef917fad0bccda07bfbad835525bbed5f3ab97a8a3e66526e48cdc3e7beacf7"}, - {file = "tiktoken-0.6.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e095131ab6092d0769a2fda85aa260c7c383072daec599ba9d8b149d2a3f4d8b"}, - {file = "tiktoken-0.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:05b344c61779f815038292a19a0c6eb7098b63c8f865ff205abb9ea1b656030e"}, - {file = "tiktoken-0.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cefb9870fb55dca9e450e54dbf61f904aab9180ff6fe568b61f4db9564e78871"}, - {file = "tiktoken-0.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:702950d33d8cabc039845674107d2e6dcabbbb0990ef350f640661368df481bb"}, - {file = "tiktoken-0.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8d49d076058f23254f2aff9af603863c5c5f9ab095bc896bceed04f8f0b013a"}, - {file = "tiktoken-0.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:430bc4e650a2d23a789dc2cdca3b9e5e7eb3cd3935168d97d43518cbb1f9a911"}, - {file = "tiktoken-0.6.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:293cb8669757301a3019a12d6770bd55bec38a4d3ee9978ddbe599d68976aca7"}, - {file = "tiktoken-0.6.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7bd1a288b7903aadc054b0e16ea78e3171f70b670e7372432298c686ebf9dd47"}, - {file = "tiktoken-0.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac76e000183e3b749634968a45c7169b351e99936ef46f0d2353cd0d46c3118d"}, - {file = "tiktoken-0.6.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:17cc8a4a3245ab7d935c83a2db6bb71619099d7284b884f4b2aea4c74f2f83e3"}, - {file = "tiktoken-0.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:284aebcccffe1bba0d6571651317df6a5b376ff6cfed5aeb800c55df44c78177"}, - {file = "tiktoken-0.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c1a3a5d33846f8cd9dd3b7897c1d45722f48625a587f8e6f3d3e85080559be8"}, - {file = "tiktoken-0.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6318b2bb2337f38ee954fd5efa82632c6e5ced1d52a671370fa4b2eff1355e91"}, - {file = "tiktoken-0.6.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f5f0f2ed67ba16373f9a6013b68da298096b27cd4e1cf276d2d3868b5c7efd1"}, - {file = "tiktoken-0.6.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:75af4c0b16609c2ad02581f3cdcd1fb698c7565091370bf6c0cf8624ffaba6dc"}, - {file = "tiktoken-0.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:45577faf9a9d383b8fd683e313cf6df88b6076c034f0a16da243bb1c139340c3"}, - {file = "tiktoken-0.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7c1492ab90c21ca4d11cef3a236ee31a3e279bb21b3fc5b0e2210588c4209e68"}, - {file = "tiktoken-0.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e2b380c5b7751272015400b26144a2bab4066ebb8daae9c3cd2a92c3b508fe5a"}, - {file = "tiktoken-0.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9f497598b9f58c99cbc0eb764b4a92272c14d5203fc713dd650b896a03a50ad"}, - {file = "tiktoken-0.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e65e8bd6f3f279d80f1e1fbd5f588f036b9a5fa27690b7f0cc07021f1dfa0839"}, - {file = "tiktoken-0.6.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5f1495450a54e564d236769d25bfefbf77727e232d7a8a378f97acddee08c1ae"}, - {file = "tiktoken-0.6.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6c4e4857d99f6fb4670e928250835b21b68c59250520a1941618b5b4194e20c3"}, - {file = "tiktoken-0.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:168d718f07a39b013032741867e789971346df8e89983fe3c0ef3fbd5a0b1cb9"}, - {file = "tiktoken-0.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:47fdcfe11bd55376785a6aea8ad1db967db7f66ea81aed5c43fad497521819a4"}, - {file = "tiktoken-0.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fb7d2ccbf1a7784810aff6b80b4012fb42c6fc37eaa68cb3b553801a5cc2d1fc"}, - {file = "tiktoken-0.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ccb7a111ee76af5d876a729a347f8747d5ad548e1487eeea90eaf58894b3138"}, - {file = "tiktoken-0.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2048e1086b48e3c8c6e2ceeac866561374cd57a84622fa49a6b245ffecb7744"}, - {file = "tiktoken-0.6.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:07f229a5eb250b6403a61200199cecf0aac4aa23c3ecc1c11c1ca002cbb8f159"}, - {file = "tiktoken-0.6.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:432aa3be8436177b0db5a2b3e7cc28fd6c693f783b2f8722539ba16a867d0c6a"}, - {file = "tiktoken-0.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:8bfe8a19c8b5c40d121ee7938cd9c6a278e5b97dc035fd61714b4f0399d2f7a1"}, - {file = "tiktoken-0.6.0.tar.gz", hash = "sha256:ace62a4ede83c75b0374a2ddfa4b76903cf483e9cb06247f566be3bf14e6beed"}, + {file = "tiktoken-0.12.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3de02f5a491cfd179aec916eddb70331814bd6bf764075d39e21d5862e533970"}, + {file = "tiktoken-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6cfb6d9b7b54d20af21a912bfe63a2727d9cfa8fbda642fd8322c70340aad16"}, + {file = "tiktoken-0.12.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:cde24cdb1b8a08368f709124f15b36ab5524aac5fa830cc3fdce9c03d4fb8030"}, + {file = "tiktoken-0.12.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:6de0da39f605992649b9cfa6f84071e3f9ef2cec458d08c5feb1b6f0ff62e134"}, + {file = "tiktoken-0.12.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6faa0534e0eefbcafaccb75927a4a380463a2eaa7e26000f0173b920e98b720a"}, + {file = "tiktoken-0.12.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:82991e04fc860afb933efb63957affc7ad54f83e2216fe7d319007dab1ba5892"}, + {file = "tiktoken-0.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:6fb2995b487c2e31acf0a9e17647e3b242235a20832642bb7a9d1a181c0c1bb1"}, + {file = "tiktoken-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e227c7f96925003487c33b1b32265fad2fbcec2b7cf4817afb76d416f40f6bb"}, + {file = "tiktoken-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06cf0fcc24c2cb2adb5e185c7082a82cba29c17575e828518c2f11a01f445aa"}, + {file = "tiktoken-0.12.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f18f249b041851954217e9fd8e5c00b024ab2315ffda5ed77665a05fa91f42dc"}, + {file = "tiktoken-0.12.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:47a5bc270b8c3db00bb46ece01ef34ad050e364b51d406b6f9730b64ac28eded"}, + {file = "tiktoken-0.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:508fa71810c0efdcd1b898fda574889ee62852989f7c1667414736bcb2b9a4bd"}, + {file = "tiktoken-0.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1af81a6c44f008cba48494089dd98cccb8b313f55e961a52f5b222d1e507967"}, + {file = "tiktoken-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:3e68e3e593637b53e56f7237be560f7a394451cb8c11079755e80ae64b9e6def"}, + {file = "tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8"}, + {file = "tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b"}, + {file = "tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37"}, + {file = "tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad"}, + {file = "tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5"}, + {file = "tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3"}, + {file = "tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd"}, + {file = "tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3"}, + {file = "tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160"}, + {file = "tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa"}, + {file = "tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be"}, + {file = "tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a"}, + {file = "tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3"}, + {file = "tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697"}, + {file = "tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16"}, + {file = "tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a"}, + {file = "tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27"}, + {file = "tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb"}, + {file = "tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e"}, + {file = "tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25"}, + {file = "tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f"}, + {file = "tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646"}, + {file = "tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88"}, + {file = "tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff"}, + {file = "tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830"}, + {file = "tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b"}, + {file = "tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b"}, + {file = "tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3"}, + {file = "tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365"}, + {file = "tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e"}, + {file = "tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63"}, + {file = "tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0"}, + {file = "tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a"}, + {file = "tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0"}, + {file = "tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71"}, + {file = "tiktoken-0.12.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:d51d75a5bffbf26f86554d28e78bfb921eae998edc2675650fd04c7e1f0cdc1e"}, + {file = "tiktoken-0.12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:09eb4eae62ae7e4c62364d9ec3a57c62eea707ac9a2b2c5d6bd05de6724ea179"}, + {file = "tiktoken-0.12.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:df37684ace87d10895acb44b7f447d4700349b12197a526da0d4a4149fde074c"}, + {file = "tiktoken-0.12.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:4c9614597ac94bb294544345ad8cf30dac2129c05e2db8dc53e082f355857af7"}, + {file = "tiktoken-0.12.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:20cf97135c9a50de0b157879c3c4accbb29116bcf001283d26e073ff3b345946"}, + {file = "tiktoken-0.12.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:15d875454bbaa3728be39880ddd11a5a2a9e548c29418b41e8fd8a767172b5ec"}, + {file = "tiktoken-0.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:2cff3688ba3c639ebe816f8d58ffbbb0aa7433e23e08ab1cade5d175fc973fb3"}, + {file = "tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931"}, ] [package.dependencies] @@ -3177,75 +3315,11 @@ version = "2.0.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.7" -groups = ["main", "dev"] +groups = ["dev"] files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] -markers = {dev = "python_version < \"3.11\""} - -[[package]] -name = "torch" -version = "2.7.1" -description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -optional = false -python-versions = ">=3.9.0" -groups = ["main"] -markers = "python_version >= \"3.12\"" -files = [ - {file = "torch-2.7.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a103b5d782af5bd119b81dbcc7ffc6fa09904c423ff8db397a1e6ea8fd71508f"}, - {file = "torch-2.7.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:fe955951bdf32d182ee8ead6c3186ad54781492bf03d547d31771a01b3d6fb7d"}, - {file = "torch-2.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:885453d6fba67d9991132143bf7fa06b79b24352f4506fd4d10b309f53454162"}, - {file = "torch-2.7.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d72acfdb86cee2a32c0ce0101606f3758f0d8bb5f8f31e7920dc2809e963aa7c"}, - {file = "torch-2.7.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:236f501f2e383f1cb861337bdf057712182f910f10aeaf509065d54d339e49b2"}, - {file = "torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:06eea61f859436622e78dd0cdd51dbc8f8c6d76917a9cf0555a333f9eac31ec1"}, - {file = "torch-2.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:8273145a2e0a3c6f9fd2ac36762d6ee89c26d430e612b95a99885df083b04e52"}, - {file = "torch-2.7.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:aea4fc1bf433d12843eb2c6b2204861f43d8364597697074c8d38ae2507f8730"}, - {file = "torch-2.7.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:27ea1e518df4c9de73af7e8a720770f3628e7f667280bce2be7a16292697e3fa"}, - {file = "torch-2.7.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c33360cfc2edd976c2633b3b66c769bdcbbf0e0b6550606d188431c81e7dd1fc"}, - {file = "torch-2.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:d8bf6e1856ddd1807e79dc57e54d3335f2b62e6f316ed13ed3ecfe1fc1df3d8b"}, - {file = "torch-2.7.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:787687087412c4bd68d315e39bc1223f08aae1d16a9e9771d95eabbb04ae98fb"}, - {file = "torch-2.7.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:03563603d931e70722dce0e11999d53aa80a375a3d78e6b39b9f6805ea0a8d28"}, - {file = "torch-2.7.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d632f5417b6980f61404a125b999ca6ebd0b8b4bbdbb5fbbba44374ab619a412"}, - {file = "torch-2.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:23660443e13995ee93e3d844786701ea4ca69f337027b05182f5ba053ce43b38"}, - {file = "torch-2.7.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:0da4f4dba9f65d0d203794e619fe7ca3247a55ffdcbd17ae8fb83c8b2dc9b585"}, - {file = "torch-2.7.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:e08d7e6f21a617fe38eeb46dd2213ded43f27c072e9165dc27300c9ef9570934"}, - {file = "torch-2.7.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:30207f672328a42df4f2174b8f426f354b2baa0b7cca3a0adb3d6ab5daf00dc8"}, - {file = "torch-2.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:79042feca1c634aaf6603fe6feea8c6b30dfa140a6bbc0b973e2260c7e79a22e"}, - {file = "torch-2.7.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:988b0cbc4333618a1056d2ebad9eb10089637b659eb645434d0809d8d937b946"}, - {file = "torch-2.7.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:e0d81e9a12764b6f3879a866607c8ae93113cbcad57ce01ebde63eb48a576369"}, - {file = "torch-2.7.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:8394833c44484547ed4a47162318337b88c97acdb3273d85ea06e03ffff44998"}, - {file = "torch-2.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:df41989d9300e6e3c19ec9f56f856187a6ef060c3662fe54f4b6baf1fc90bd19"}, - {file = "torch-2.7.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a737b5edd1c44a5c1ece2e9f3d00df9d1b3fb9541138bee56d83d38293fb6c9d"}, -] - -[package.dependencies] -filelock = "*" -fsspec = "*" -jinja2 = "*" -networkx = "*" -nvidia-cublas-cu12 = {version = "12.6.4.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-cupti-cu12 = {version = "12.6.80", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-nvrtc-cu12 = {version = "12.6.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cuda-runtime-cu12 = {version = "12.6.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cudnn-cu12 = {version = "9.5.1.17", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cufft-cu12 = {version = "11.3.0.4", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cufile-cu12 = {version = "1.11.1.6", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-curand-cu12 = {version = "10.3.7.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cusolver-cu12 = {version = "11.7.1.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cusparse-cu12 = {version = "12.5.4.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-cusparselt-cu12 = {version = "0.6.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nccl-cu12 = {version = "2.26.2", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nvjitlink-cu12 = {version = "12.6.85", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nvtx-cu12 = {version = "12.6.77", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -setuptools = {version = "*", markers = "python_version >= \"3.12\""} -sympy = ">=1.13.3" -triton = {version = "3.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -typing-extensions = ">=4.10.0" - -[package.extras] -opt-einsum = ["opt-einsum (>=3.3)"] -optree = ["optree (>=0.13.0)"] [[package]] name = "torch" @@ -3254,7 +3328,6 @@ description = "Tensors and Dynamic neural networks in Python with strong GPU acc optional = false python-versions = ">=3.10" groups = ["main"] -markers = "python_version <= \"3.11\"" files = [ {file = "torch-2.9.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:030bbfe367379ae6a4ae4042b6c44da25383343b8b3c68abaa9c7231efbaf2dd"}, {file = "torch-2.9.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:51cb63902182a78e90886e8068befd8ea102af4b00e420263591a3d70c7d3c6c"}, @@ -3306,6 +3379,7 @@ nvidia-nccl-cu12 = {version = "2.27.5", markers = "platform_system == \"Linux\" nvidia-nvjitlink-cu12 = {version = "12.8.93", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nvshmem-cu12 = {version = "3.3.20", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nvtx-cu12 = {version = "12.8.90", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +setuptools = {version = "*", markers = "python_version >= \"3.12\""} sympy = ">=1.13.3" triton = {version = "3.5.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} typing-extensions = ">=4.10.0" @@ -3420,31 +3494,6 @@ torchhub = ["filelock", "huggingface-hub (>=0.34.0,<1.0)", "importlib_metadata", video = ["av"] vision = ["Pillow (>=10.0.1,<=15.0)"] -[[package]] -name = "triton" -version = "3.3.1" -description = "A language and compiler for custom Deep Learning operations" -optional = false -python-versions = "*" -groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version >= \"3.12\"" -files = [ - {file = "triton-3.3.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b74db445b1c562844d3cfad6e9679c72e93fdfb1a90a24052b03bb5c49d1242e"}, - {file = "triton-3.3.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b31e3aa26f8cb3cc5bf4e187bf737cbacf17311e1112b781d4a059353dfd731b"}, - {file = "triton-3.3.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9999e83aba21e1a78c1f36f21bce621b77bcaa530277a50484a7cb4a822f6e43"}, - {file = "triton-3.3.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b89d846b5a4198317fec27a5d3a609ea96b6d557ff44b56c23176546023c4240"}, - {file = "triton-3.3.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3198adb9d78b77818a5388bff89fa72ff36f9da0bc689db2f0a651a67ce6a42"}, - {file = "triton-3.3.1-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f6139aeb04a146b0b8e0fbbd89ad1e65861c57cfed881f21d62d3cb94a36bab7"}, -] - -[package.dependencies] -setuptools = ">=40.8.0" - -[package.extras] -build = ["cmake (>=3.20)", "lit"] -tests = ["autopep8", "isort", "llnl-hatchet", "numpy", "pytest", "pytest-forked", "pytest-xdist", "scipy (>=1.7.1)"] -tutorials = ["matplotlib", "pandas", "tabulate"] - [[package]] name = "triton" version = "3.5.0" @@ -3452,7 +3501,7 @@ description = "A language and compiler for custom Deep Learning operations" optional = false python-versions = "<3.15,>=3.10" groups = ["main"] -markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version <= \"3.11\"" +markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\"" files = [ {file = "triton-3.5.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6f90de6a6566bb619b4c0adc9855729e1b1b5e26533fca1bf6206e96b6d277a3"}, {file = "triton-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5d3b3d480debf24eaa739623c9a42446b0b77f95593d30eb1f64cd2278cc1f0"}, @@ -3577,9 +3626,10 @@ dev = ["black (>=19.3b0) ; python_version >= \"3.6\"", "pytest (>=4.6.2)"] name = "wrapt" version = "1.16.0" description = "Module for decorators, wrappers and monkey patching." -optional = false +optional = true python-versions = ">=3.6" groups = ["main"] +markers = "extra == \"v1\" or extra == \"all\"" files = [ {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, @@ -3875,7 +3925,11 @@ files = [ idna = ">=2.0" multidict = ">=4.0" +[extras] +all = ["eval-type-backport", "pydantic", "smart-open", "tabulate"] +v1 = ["eval-type-backport", "pydantic", "smart-open", "tabulate"] + [metadata] lock-version = "2.1" -python-versions = ">=3.10,<4.0" -content-hash = "f4ea38369c3560805eaf80e8b74de4909777dc51cba333401221fa6787c391bc" +python-versions = ">=3.10,<3.15" +content-hash = "65e5c60304ef04f9e4971d5ed27da61da0dcc459fd8df0b885b9dd4dd89502f4" diff --git a/pyproject.toml b/pyproject.toml index 8702dd0..b34da12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,40 +6,60 @@ requires = ["poetry-core"] authors = [ "shengzhe.li ", "ryokan.ri ", - "masaya.ohagi " + "masaya.ohagi ", + "akihiko.fukuchi " ] description = "The evaluation scripts for JMTEB (Japanese Massive Text Embedding Benchmark)" name = "JMTEB" packages = [{from = "src", include = "jmteb"}] readme = "README.md" -version = "1.4.0" +version = "2.0.0rc" [tool.poetry.dependencies] -python = ">=3.10,<4.0" -jsonargparse = {extras = ["jsonnet"], version = "^4.27.5"} -loguru = "^0.7.2" -scikit-learn = "^1.3.2" -transformers = {version = "^4.57.1", extras = ["ja", "sentencepiece"]} -datasets = ">=2.17" -sentence-transformers = "5.1.1" -pytest = "7.1.3" -torch = "^2.6" -pydantic = "^2.6.3" -eval-type-backport = "^0.1.3" -smart-open = "^7.0.1" -openai = "^1.16.2" -pytest-mock = "^3.14.0" -tiktoken = "^0.6.0" -numpy = "^1.26" -accelerate = "^0.31.0" -tabulate = "^0.9.0" +python = ">=3.10,<3.15" +# Core dependencies (v2) +mteb = ">=2.4.2" +sentence-transformers = ">=5.1.1" +torch = ">=2.6.0" +numpy = ">=1.26,<3.0.0" +pyyaml = "^6.0" +loguru = ">=0.7.3" +scikit-learn = ">=1.4.0" +transformers = {version = ">=4.57.1", extras = ["ja", "sentencepiece"]} +accelerate = ">=0.31.0" +tqdm = ">=4.0.0" +rich = ">=13.0.0" +openai = ">=1.41.0" +tiktoken = ">=0.8.0" +datasets = ">=2.19.0" +jsonargparse = {extras = ["jsonnet"], version = ">=4.36.0"} +# Optional v1 dependencies +pydantic = {version = ">=2.6.0", optional = true} +eval-type-backport = {version = "^0.1.3", optional = true} +smart-open = {version = ">=7.0.0", optional = true} +tabulate = {version = ">=0.9.0", optional = true} + +[tool.poetry.extras] +v1 = [ + "pydantic", + "eval-type-backport", + "smart-open", + "tabulate", +] +all = [ + "pydantic", + "eval-type-backport", + "smart-open", + "tabulate", +] [tool.poetry.group.dev.dependencies] black = "^23.11.0" isort = "^5.12.0" mypy = "^1.7.1" flake8 = "^7.0.0" -tabulate = "^0.9.0" +pytest = "7.1.3" +pytest-mock = "^3.14.0" [tool.black] line-length = 119 diff --git a/src/jmteb/configs/prompts/e5.yaml b/src/jmteb/configs/prompts/e5.yaml new file mode 100644 index 0000000..def29a2 --- /dev/null +++ b/src/jmteb/configs/prompts/e5.yaml @@ -0,0 +1,14 @@ +# E5 Model Prompt Configuration +# Prompts are specified by task type rather than individual datasets + +### Single-text tasks (Clustering, Classification, STS) +Clustering: "query: " +Classification: "query: " +STS: "query: " + +### Dual-text tasks (Retrieval, Reranking) +Retrieval-query: "query: " +Retrieval-document: "passage: " + +Reranking-query: "query: " +Reranking-document: "passage: " diff --git a/src/jmteb/configs/prompts/gemma.yaml b/src/jmteb/configs/prompts/gemma.yaml new file mode 100644 index 0000000..2fe76fd --- /dev/null +++ b/src/jmteb/configs/prompts/gemma.yaml @@ -0,0 +1,14 @@ +query: "task: search result | query: " +document: "title: none | text: " +BitextMining: "task: search result | query: " +Clustering: "task: clustering | query: " +Classification: "task: classification | query: " +InstructionRetrieval: "task: code retrieval | query: " +MultilabelClassification: "task: classification | query: " +PairClassification: "task: sentence similarity | query: " +Reranking: "task: search result | query: " +Retrieval: "task: search result | query: " +Retrieval-query: "task: search result | query: " +Retrieval-document: "title: none | text: " +STS: "task: sentence similarity | query: " +Summarization: "task: summarization | query: " \ No newline at end of file diff --git a/src/jmteb/configs/prompts/ruri-v3.yaml b/src/jmteb/configs/prompts/ruri-v3.yaml new file mode 100644 index 0000000..2bf5f32 --- /dev/null +++ b/src/jmteb/configs/prompts/ruri-v3.yaml @@ -0,0 +1,79 @@ +### Clustering (3) +LivedoorNewsClustering.v2: "トピック: " +MewsC16JaClustering: "トピック: " +SIB200ClusteringS2S: "トピック: " + +### Classification (7) +AmazonReviewsClassification: "トピック: " +AmazonCounterfactualClassification: "トピック: " +MassiveIntentClassification: "トピック: " +MassiveScenarioClassification: "トピック: " +JapaneseSentimentClassification: "トピック: " +SIB200Classification: "トピック: " +WRIMEClassification: "トピック: " + +### STS (2) +JSTS: "" +JSICK: "" + +### Retrieval (11 + 4 lite variants) +JaqketRetrieval-query: "検索クエリ: " +JaqketRetrieval-document: "検索文書: " +JaqketRetrievalLite-query: "検索クエリ: " +JaqketRetrievalLite-document: "検索文書: " + +MrTidyRetrieval-query: "検索クエリ: " +MrTidyRetrieval-document: "検索文書: " +MrTyDiJaRetrievalLite-query: "検索クエリ: " +MrTyDiJaRetrievalLite-document: "検索文書: " + +JaGovFaqsRetrieval-query: "検索クエリ: " +JaGovFaqsRetrieval-document: "検索文書: " + +NLPJournalTitleAbsRetrieval.V2-query: "検索クエリ: " +NLPJournalTitleAbsRetrieval.V2-document: "検索文書: " + +NLPJournalTitleIntroRetrieval.V2-query: "検索クエリ: " +NLPJournalTitleIntroRetrieval.V2-document: "検索文書: " + +NLPJournalAbsIntroRetrieval.V2-query: "検索クエリ: " +NLPJournalAbsIntroRetrieval.V2-document: "検索文書: " + +NLPJournalAbsArticleRetrieval.V2-query: "検索クエリ: " +NLPJournalAbsArticleRetrieval.V2-document: "検索文書: " + +JaCWIRRetrieval-query: "検索クエリ: " +JaCWIRRetrieval-document: "検索文書: " +JaCWIRRetrievalLite-query: "検索クエリ: " +JaCWIRRetrievalLite-document: "検索文書: " + +MIRACLRetrieval-query: "検索クエリ: " +MIRACLRetrieval-document: "検索文書: " +MIRACLJaRetrievalLite-query: "検索クエリ: " +MIRACLJaRetrievalLite-document: "検索文書: " + +MintakaRetrieval-query: "検索クエリ: " +MintakaRetrieval-document: "検索文書: " + +MultiLongDocRetrieval-query: "検索クエリ: " +MultiLongDocRetrieval-document: "検索文書: " + +### Reranking (5 + 2 lite variants) +ESCIReranking-query: "検索クエリ: " +ESCIReranking-document: "検索文書: " + +JQaRAReranking-query: "検索クエリ: " +JQaRAReranking-document: "検索文書: " +JQaRARerankingLite-query: "検索クエリ: " +JQaRARerankingLite-document: "検索文書: " + +JaCWIRReranking-query: "検索クエリ: " +JaCWIRReranking-document: "検索文書: " +JaCWIRRerankingLite-query: "検索クエリ: " +JaCWIRRerankingLite-document: "検索文書: " + +MIRACLReranking-query: "検索クエリ: " +MIRACLReranking-document: "検索文書: " + +MultiLongDocReranking-query: "検索クエリ: " +MultiLongDocReranking-document: "検索文書: " diff --git a/src/jmteb/configs/prompts/ruri.yaml b/src/jmteb/configs/prompts/ruri.yaml new file mode 100644 index 0000000..1de7cb6 --- /dev/null +++ b/src/jmteb/configs/prompts/ruri.yaml @@ -0,0 +1,14 @@ +# Ruri Model Prompt Configuration +# Prompts are specified by task type rather than individual datasets + +### Single-text tasks (Clustering, Classification, STS) +Clustering: "query: " +Classification: "query: " +STS: "query: " + +### Dual-text tasks (Retrieval, Reranking) +Retrieval-query: "query: " +Retrieval-document: "文章: " + +Reranking-query: "query: " +Reranking-document: "文章: " diff --git a/src/jmteb/configs/prompts/sarashina-v2.yaml b/src/jmteb/configs/prompts/sarashina-v2.yaml new file mode 100644 index 0000000..ed59e97 --- /dev/null +++ b/src/jmteb/configs/prompts/sarashina-v2.yaml @@ -0,0 +1,79 @@ +### Clustering (3) +LivedoorNewsClustering.v2: "task: 与えられたニュース記事のトピックを特定してください。\nquery: " +MewsC16JaClustering: "task: 与えられたニュース記事のトピックを特定してください。\nquery: " +SIB200ClusteringS2S: "task: 与えられたテキストのトピックを特定してください。\nquery: " + +### Classification (7) +AmazonReviewsClassification: "task: 与えられたAmazonレビューを適切な評価カテゴリに分類してください。\nquery: " +AmazonCounterfactualClassification: "task: 与えられたAmazonのカスタマーレビューのテキストを反事実か反事実でないかに分類してください。\nquery: " +MassiveIntentClassification: "task: ユーザーの発話をクエリとして与えるので、ユーザーの意図を見つけてください。\nquery: " +MassiveScenarioClassification: "task: ユーザーの発話をクエリとして与えるので、ユーザーシナリオを見つけてください。\nquery: " +JapaneseSentimentClassification: "task: 与えられたテキストの感情極性をポジティブ(1)かネガティブか(0)に分類してください。\nquery: " +SIB200Classification: "task: 与えられたテキストのトピックを特定してください。\nquery: " +WRIMEClassification: "task: 与えられたテキストの感情極性(-2:強いネガティブ、-1:ネガティブ、0:ニュートラル、1:ポジティブ、2:強いポジティブ)を分類してください。\nquery: " + +### STS (2) +JSTS: "task: クエリを与えるので,もっともクエリに意味が似ている一節を探してください。\nquery: " +JSICK: "task: クエリを与えるので,もっともクエリに意味が似ている一節を探してください。\nquery: " + +### Retrieval (11 + 4 lite variants) +JaqketRetrieval-query: "task: 質問を与えるので、その質問に答えるのに役立つWikipediaの文章を検索してください。\nquery: " +JaqketRetrieval-document: "text: " +JaqketRetrievalLite-query: "task: 質問を与えるので、その質問に答えるのに役立つWikipediaの文章を検索してください。\nquery: " +JaqketRetrievalLite-document: "text: " + +MrTidyRetrieval-query: "task: 質問を与えるので、その質問に答えるWikipediaの文章を検索するしてください。\nquery: " +MrTidyRetrieval-document: "text: " +MrTyDiJaRetrievalLite-query: "task: 質問を与えるので、その質問に答えるWikipediaの文章を検索するしてください。\nquery: " +MrTyDiJaRetrievalLite-document: "text: " + +JaGovFaqsRetrieval-query: "task: 質問を与えるので、その質問に答えるのに役立つ関連文書を検索してください。\nquery: " +JaGovFaqsRetrieval-document: "text: " + +NLPJournalTitleAbsRetrieval.V2-query: "task: 論文のタイトルを与えるので、タイトルに対応する要約を検索してください。\nquery: " +NLPJournalTitleAbsRetrieval.V2-document: "text: " + +NLPJournalTitleIntroRetrieval.V2-query: "task: 論文のタイトルを与えるので、タイトルに対応する要約を検索してください。\nquery: " +NLPJournalTitleIntroRetrieval.V2-document: "text: " + +NLPJournalAbsIntroRetrieval.V2-query: "task: 論文の序論を与えるので、序論に対応する全文を検索してください。\nquery: " +NLPJournalAbsIntroRetrieval.V2-document: "text: " + +NLPJournalAbsArticleRetrieval.V2-query: "task: 論文の序論を与えるので、序論に対応する全文を検索してください。\nquery: " +NLPJournalAbsArticleRetrieval.V2-document: "text: " + +JaCWIRRetrieval-query: "task: 記事のタイトルを与えるので、そのタイトルと合っている記事の中身を検索してください。\nquery: " +JaCWIRRetrieval-document: "text: " +JaCWIRRetrievalLite-query: "task: 記事のタイトルを与えるので、そのタイトルと合っている記事の中身を検索してください。\nquery: " +JaCWIRRetrievalLite-document: "text: " + +MIRACLRetrieval-query: "task: 質問を与えるので、その質問に答えるのに役立つ関連文書を検索してください。\nquery: " +MIRACLRetrieval-document: "text: " +MIRACLJaRetrievalLite-query: "task: 質問を与えるので、その質問に答えるのに役立つ関連文書を検索してください。\nquery: " +MIRACLJaRetrievalLite-document: "text: " + +MintakaRetrieval-query: "task: 質問を与えるので、その質問に答えられるテキストを検索してください。\nquery: " +MintakaRetrieval-document: "text: " + +MultiLongDocRetrieval-query: "task: 質問を与えるので、その質問に答えるのに役立つWikipediaの文章を検索してください。\nquery: " +MultiLongDocRetrieval-document: "text: " + +### Reranking (5 + 2 lite variants) +ESCIReranking-query: "task: クエリを与えるので、与えられたWeb検索クエリに答える関連文章を検索してください。\nquery: " +ESCIReranking-document: "text: " + +JQaRAReranking-query: "task: 質問を与えるので、その質問に答えるのに役立つWikipediaの文章を検索してください。\nquery: " +JQaRAReranking-document: "text: " +JQaRARerankingLite-query: "task: 質問を与えるので、その質問に答えるのに役立つWikipediaの文章を検索してください。\nquery: " +JQaRARerankingLite-document: "text: " + +JaCWIRReranking-query: "task: 記事のタイトルを与えるので、そのタイトルと合っている記事の中身を検索してください。\nquery: " +JaCWIRReranking-document: "text: " +JaCWIRRerankingLite-query: "task: 記事のタイトルを与えるので、そのタイトルと合っている記事の中身を検索してください。\nquery: " +JaCWIRRerankingLite-document: "text: " + +MIRACLReranking-query: "task: 質問を与えるので、その質問に答えるのに役立つ関連文書を検索してください。\nquery: " +MIRACLReranking-document: "text: " + +MultiLongDocReranking-query: "task: 質問を与えるので、その質問に答えるのに役立つWikipediaの文章を検索してください。\nquery: " +MultiLongDocReranking-document: "text: " diff --git a/src/jmteb/v2/__init__.py b/src/jmteb/v2/__init__.py new file mode 100644 index 0000000..2beee31 --- /dev/null +++ b/src/jmteb/v2/__init__.py @@ -0,0 +1,42 @@ +""" +JMTEB v2.0 - MTEB Integration Layer + +This module provides the v2.0 architecture that integrates MTEB as the underlying +evaluation engine while maintaining backward compatibility with JMTEB v1.x APIs. +""" + +from jmteb.v2.adapters import JMTEBModel +from jmteb.v2.evaluator import JMTEBV2Evaluator +from jmteb.v2.tasks import ( + JMTEB_LITE_TASKS, + JMTEB_TASKS, + get_jmteb_benchmark, + get_jmteb_lite_benchmark, + get_jmteb_lite_tasks, + get_jmteb_tasks, + get_task_by_name, +) +from jmteb.v2.utils import ( + load_batch_sizes, + load_prompts, + load_summary, + save_results, + save_summary, +) + +__all__ = [ + "JMTEBModel", + "JMTEBV2Evaluator", + "JMTEB_TASKS", + "JMTEB_LITE_TASKS", + "get_jmteb_tasks", + "get_jmteb_lite_tasks", + "get_jmteb_benchmark", + "get_jmteb_lite_benchmark", + "get_task_by_name", + "load_prompts", + "load_batch_sizes", + "save_results", + "load_summary", + "save_summary", +] diff --git a/src/jmteb/v2/__main__.py b/src/jmteb/v2/__main__.py new file mode 100644 index 0000000..6eb79a1 --- /dev/null +++ b/src/jmteb/v2/__main__.py @@ -0,0 +1,198 @@ +""" +JMTEB v2.0 main entry point. + +This module provides the CLI interface for running JMTEB v2.0 evaluations using MTEB. +""" + +from __future__ import annotations + +import torch +from jsonargparse import ArgumentParser +from loguru import logger + +from jmteb.v2.adapters import JMTEBModel +from jmteb.v2.evaluator import JMTEBV2Evaluator +from jmteb.v2.tasks import get_jmteb_benchmark, get_jmteb_tasks +from jmteb.v2.utils import load_batch_sizes, load_prompts + + +def get_args(): + """Parse command-line arguments.""" + parser = ArgumentParser(description="JMTEB v2.0 - Japanese Massive Text Embedding Benchmark") + + # Model configuration + parser.add_argument( + "--model_name", + type=str, + required=True, + help="Name or path of the model to evaluate", + ) + parser.add_argument( + "--batch_size", + type=int, + default=32, + help="Default batch size for encoding", + ) + parser.add_argument( + "--fp16", + type=bool, + default=False, + help="Use FP16 precision", + ) + parser.add_argument( + "--bf16", + type=bool, + default=False, + help="Use BF16 precision", + ) + + # Task selection + parser.add_argument( + "--include", + type=list[str], + default=None, + help="List of task names to include in evaluation", + ) + parser.add_argument( + "--exclude", + type=list[str], + default=None, + help="List of task names to exclude from evaluation", + ) + parser.add_argument( + "--task_types", + type=list[str], + default=None, + help="List of task types to evaluate (e.g., ['Retrieval', 'Classification'])", + ) + + # Configuration files + parser.add_argument( + "--prompt_profile", + type=str, + default=None, + help="Path to prompt configuration YAML file", + ) + parser.add_argument( + "--task_batch_sizes", + type=str, + default=None, + help="Path to YAML file with per-task batch sizes", + ) + + # Output configuration + parser.add_argument( + "--save_path", + type=str, + default="results_v2", + help="Path to save evaluation results", + ) + parser.add_argument( + "--overwrite_cache", + type=bool, + default=False, + help="Overwrite cached results and reevaluate", + ) + parser.add_argument( + "--generate_summary", + type=bool, + default=True, + help="Generate/update summary.json file", + ) + parser.add_argument( + "--cache_path", + type=str, + default="./cached_results", + help="Path for caching intermediate results", + ) + + return parser.parse_args() + + +def main(): + """Main function to run JMTEB v2.0 evaluation.""" + args = get_args() + + # Prepare model_kwargs based on fp16/bf16 + model_kwargs = {} + if args.fp16: + model_kwargs["torch_dtype"] = torch.float16 + logger.info("Using FP16 precision") + elif args.bf16: + model_kwargs["torch_dtype"] = torch.bfloat16 + logger.info("Using BF16 precision") + + # Load prompts if provided + prompts = None + if args.prompt_profile: + prompts = load_prompts(args.prompt_profile) + logger.info(f"Loaded prompts from {args.prompt_profile}") + logger.info(f"Prompt keys: {list(prompts.keys())}") + + # Create model + logger.info(f"Loading model: {args.model_name}") + model = JMTEBModel.from_sentence_transformer( + model_name_or_path=args.model_name, + model_kwargs=model_kwargs if model_kwargs else None, + prompts=prompts, + ) + logger.info("Model loaded successfully") + + # Get tasks + if args.include: + logger.info(f"Including specific tasks: {args.include}") + tasks = get_jmteb_tasks(task_names=args.include) + elif args.exclude: + logger.info(f"Excluding tasks: {args.exclude}") + benchmark = get_jmteb_benchmark() + tasks = [t for t in benchmark.tasks if t.metadata.name not in args.exclude] + elif args.task_types: + logger.info(f"Filtering by task types: {args.task_types}") + tasks = get_jmteb_tasks(task_types=args.task_types) + else: + logger.info("Evaluating all JMTEB tasks") + tasks = get_jmteb_tasks() + + logger.info(f"Loaded {len(tasks)} tasks") + + # Load task-specific batch sizes if provided + task_batch_sizes = {} + if args.task_batch_sizes: + task_batch_sizes = load_batch_sizes(args.task_batch_sizes) + logger.info(f"Loaded task-specific batch sizes from {args.task_batch_sizes}") + + # Create save path for model + from pathlib import Path + + model_save_path = Path(args.save_path) / args.model_name + logger.info(f"Results will be saved to: {model_save_path}") + + # Create evaluator + evaluator = JMTEBV2Evaluator( + model=model, + tasks=tasks, + save_path=model_save_path, + batch_size=args.batch_size, + task_batch_sizes=task_batch_sizes, + overwrite_cache=args.overwrite_cache, + generate_summary=args.generate_summary, + cache_path=args.cache_path, + ) + + # Run evaluation + logger.info("=" * 80) + logger.info("Starting evaluation") + logger.info("=" * 80) + + results = evaluator.run() + + logger.info("=" * 80) + logger.info("Evaluation complete!") + logger.info("=" * 80) + + if results: + logger.info(f"Results available in: {model_save_path}") + + +if __name__ == "__main__": + main() diff --git a/src/jmteb/v2/adapters.py b/src/jmteb/v2/adapters.py new file mode 100644 index 0000000..58b0d71 --- /dev/null +++ b/src/jmteb/v2/adapters.py @@ -0,0 +1,182 @@ +""" +Adapters to bridge JMTEB v1 embedders with MTEB evaluation framework. +""" + +from __future__ import annotations + +from typing import Any + +import numpy as np +from sentence_transformers import SentenceTransformer + +from jmteb.embedders.base import TextEmbedder + + +class JMTEBModel: + """ + Adapter that wraps JMTEB v1 TextEmbedder to work with MTEB's evaluation system. + + This adapter allows using existing JMTEB embedders (SentenceBertEmbedder, + OpenAIEmbedder, etc.) with the MTEB evaluation framework while maintaining + their specific behaviors and configurations. + + Example: + >>> from jmteb.embedders import SentenceBertEmbedder + >>> from jmteb.v2.adapters import JMTEBModel + >>> + >>> embedder = SentenceBertEmbedder(model_name_or_path="cl-nagoya/ruri-base") + >>> model = JMTEBModel(embedder) + >>> + >>> # Now use with MTEB + >>> import mteb + >>> tasks = mteb.get_tasks(languages=["jpn"]) + >>> results = mteb.evaluate(model, tasks=tasks[:1]) + """ + + def __init__( + self, + embedder: TextEmbedder | None = None, + sentence_transformer: SentenceTransformer | None = None, + prompts: dict[str, str] | None = None, + **encode_kwargs, + ): + """ + Initialize the JMTEB model adapter. + + Args: + embedder: JMTEB v1 TextEmbedder instance (for backward compatibility) + sentence_transformer: SentenceTransformer model (for direct MTEB usage) + prompts: Dictionary mapping task types to prompt templates + **encode_kwargs: Additional keyword arguments for encoding + """ + if embedder is None and sentence_transformer is None: + raise ValueError("Either embedder or sentence_transformer must be provided") + + self.embedder = embedder + self.sentence_transformer = sentence_transformer + self.prompts = prompts or {} + self.encode_kwargs = encode_kwargs + + def encode( + self, + sentences: list[str], + batch_size: int = 32, + **kwargs, + ) -> np.ndarray: + """ + Encode sentences into embeddings. + + Args: + sentences: List of sentences to encode + batch_size: Batch size for encoding + **kwargs: Additional encoding arguments (prompt_name, task_type, etc.) + + Returns: + Array of embeddings with shape (len(sentences), embedding_dim) + """ + # Merge default encode_kwargs with method-specific kwargs + encode_params = {**self.encode_kwargs, **kwargs} + encode_params["batch_size"] = batch_size + + # Use JMTEB v1 embedder if provided + if self.embedder is not None: + embeddings = self.embedder.encode( + sentences, + batch_size=batch_size, + ) + return np.array(embeddings) + + # Otherwise use SentenceTransformer directly + embeddings = self.sentence_transformer.encode( + sentences, + **encode_params, + ) + return np.array(embeddings) + + @classmethod + def from_sentence_transformer( + cls, + model_name_or_path: str, + device: str | None = None, + model_kwargs: dict[str, Any] | None = None, + prompts: dict[str, str] | None = None, + **encode_kwargs, + ) -> JMTEBModel: + """ + Create a JMTEBModel from a SentenceTransformer model name or path. + + Args: + model_name_or_path: Model name on HuggingFace Hub or local path + device: Device to run the model on + model_kwargs: Additional keyword arguments for model initialization + prompts: Dictionary mapping task types to prompt templates + **encode_kwargs: Additional encoding keyword arguments + + Returns: + JMTEBModel instance + """ + model = SentenceTransformer( + model_name_or_path=model_name_or_path, + device=device, + model_kwargs=model_kwargs, + prompts=prompts, + trust_remote_code=True, + ) + return cls( + sentence_transformer=model, + prompts=prompts, + **encode_kwargs, + ) + + @classmethod + def from_jmteb_embedder( + cls, + embedder: TextEmbedder, + prompts: dict[str, str] | None = None, + **encode_kwargs, + ) -> JMTEBModel: + """ + Create a JMTEBModel from a JMTEB v1 TextEmbedder. + + Args: + embedder: JMTEB v1 TextEmbedder instance + prompts: Dictionary mapping task types to prompt templates + **encode_kwargs: Additional encoding keyword arguments + + Returns: + JMTEBModel instance + """ + return cls( + embedder=embedder, + prompts=prompts, + **encode_kwargs, + ) + + @classmethod + def from_mteb( + cls, + model_name: str, + **model_kwargs, + ) -> JMTEBModel: + """ + Create a JMTEBModel using MTEB's get_model function. + + This method uses MTEB's unified model loading interface, which supports + various model types and handles model-specific configurations automatically. + + Args: + model_name: Name of the model (e.g., "sentence-transformers/all-MiniLM-L6-v2") + **model_kwargs: Additional keyword arguments passed to mteb.get_model + + Returns: + JMTEBModel instance + + Example: + >>> model = JMTEBModel.from_mteb("sentence-transformers/all-MiniLM-L6-v2") + >>> # Or with specific revision + >>> model = JMTEBModel.from_mteb("intfloat/multilingual-e5-base", revision="main") + """ + import mteb + + mteb_model = mteb.get_model(model_name, **model_kwargs) + return cls(sentence_transformer=mteb_model) diff --git a/src/jmteb/v2/evaluator.py b/src/jmteb/v2/evaluator.py new file mode 100644 index 0000000..abcd816 --- /dev/null +++ b/src/jmteb/v2/evaluator.py @@ -0,0 +1,238 @@ +""" +JMTEB v2.0 evaluator using MTEB framework. +""" + +from __future__ import annotations + +import time +from pathlib import Path + +import mteb +from loguru import logger +from mteb import AbsTask +from mteb.cache import ResultCache + +from jmteb.v2.adapters import JMTEBModel +from jmteb.v2.tasks import get_task_category +from jmteb.v2.utils import load_summary, save_summary + + +class JMTEBV2Evaluator: + """ + JMTEB v2.0 evaluator that uses MTEB as the underlying evaluation engine. + + This evaluator provides a high-level interface for running JMTEB benchmarks + while leveraging MTEB's robust evaluation framework and caching. + + Example: + >>> from jmteb.v2 import JMTEBV2Evaluator, JMTEBModel + >>> from jmteb.v2.tasks import get_jmteb_tasks + >>> + >>> # Create model + >>> model = JMTEBModel.from_sentence_transformer("cl-nagoya/ruri-base") + >>> + >>> # Create evaluator + >>> evaluator = JMTEBV2Evaluator( + ... model=model, + ... tasks=get_jmteb_tasks(task_names=["JSTS", "JSICK"]), + ... save_path="results/ruri-base" + ... ) + >>> + >>> # Run evaluation + >>> results = evaluator.run() + """ + + def __init__( + self, + model: JMTEBModel, + tasks: list[AbsTask] | AbsTask, + save_path: str | Path | None = None, + batch_size: int = 32, + task_batch_sizes: dict[str, int] | None = None, + cache_path: str | Path | None = None, + **encode_kwargs, + ): + """ + Initialize the JMTEB v2.0 evaluator. + + Args: + model: JMTEBModel instance to evaluate + tasks: Single task or list of tasks to evaluate + save_path: Path to save summary.json (MTEB handles result caching) + batch_size: Default batch size for encoding + task_batch_sizes: Per-task batch size overrides + cache_path: Path for MTEB's result cache + **encode_kwargs: Additional encoding keyword arguments + """ + self.model = model + self.tasks = tasks if isinstance(tasks, list) else [tasks] + self.save_path = Path(save_path) if save_path else None + self.batch_size = batch_size + self.task_batch_sizes = task_batch_sizes or {} + self.cache_path = cache_path or "./cached_results" + self.encode_kwargs = encode_kwargs + + # Create save directory if needed + if self.save_path: + self.save_path.mkdir(parents=True, exist_ok=True) + + def _get_batch_size(self, task_name: str) -> int: + """Get batch size for a specific task.""" + return self.task_batch_sizes.get(task_name, self.batch_size) + + def _extract_main_score(self, task_result, task_name: str) -> float | None: + """Extract main score from MTEB task result.""" + # Determine the split to use + if task_name == "JSTS": + split = "validation" + elif task_name.startswith("MultiLongDoc"): + split = "dev" + else: + split = "test" + + if split in task_result.scores and len(task_result.scores[split]) > 0: + return task_result.scores[split][0].get("main_score") + return None + + def _update_summary(self, task_result, task_name: str, eval_time: float, summary: dict): + """Update summary with task result.""" + task_category = get_task_category(task_name) + if not task_category or task_category == "Unknown": + return + + main_score = self._extract_main_score(task_result, task_name) + if main_score is None: + return + + # Get task key for summary + from jmteb.v2.utils import _get_task_key + + task_key = _get_task_key(task_name) + + # Update summary + if task_category not in summary: + summary[task_category] = {} + + summary[task_category][task_key] = { + "main_metric": task_result.task.metadata.main_score, + "main_score": main_score * 100, # Convert to percentage + "eval_time (s)": "%.2f" % eval_time, + } + + def run(self) -> list[mteb.MTEBResults] | None: + """ + Run evaluation on all tasks. + + Returns: + List of MTEB results objects (one per task), or None if no results + """ + logger.info(f"Starting JMTEB v2.0 evaluation on {len(self.tasks)} tasks") + # Get task names - handle both AbsTask and direct metadata objects + task_names = [] + for task in self.tasks: + if hasattr(task, "metadata"): + task_names.append(task.metadata.name) + elif hasattr(task, "name"): + task_names.append(task.name) + else: + task_names.append(str(task)) + logger.info(f"Tasks: {task_names}") + + if self.save_path: + logger.info(f"Summary will be saved to: {self.save_path}/summary.json") + + # Load existing summary + summary = {} + if self.save_path: + summary = load_summary(str(self.save_path)) + if summary: + logger.info(f"Loaded existing summary from {self.save_path}/summary.json") + + # Prepare encode_kwargs with batch sizes + all_results = [] + results_summary = [] + + # Evaluate each task + for idx, task in enumerate(self.tasks, 1): + # Get task name safely + if hasattr(task, "metadata"): + task_name = task.metadata.name + elif hasattr(task, "name"): + task_name = task.name + else: + task_name = str(task) + batch_size = self._get_batch_size(task_name) + + logger.info(f"\n[{idx}/{len(self.tasks)}] Task: {task_name} (batch_size={batch_size})") + logger.info("-" * 80) + + start_time = time.time() + + encode_kwargs = { + "batch_size": batch_size, + **self.encode_kwargs, + } + + # MTEB handles all caching automatically + results = mteb.evaluate( + model=self.model, + tasks=task, + encode_kwargs=encode_kwargs, + cache=ResultCache(cache_path=self.cache_path), + ) + + elapsed_time = time.time() - start_time + all_results.append(results) + + logger.info(f"✓ Completed: {task_name} (time: {elapsed_time:.2f}s)") + results_summary.append((task_name, "✓ Success")) + + # Update summary + if self.save_path: + task_result = results.task_results[0] + self._update_summary(task_result, task_name, elapsed_time, summary) + logger.info(f"Summary updated for {task_name}") + # Save after each task + save_summary(summary, str(self.save_path)) + logger.info(f"Summary saved to: {self.save_path}/summary.json") + + # Save final summary + if self.save_path: + if summary: + save_summary(summary, str(self.save_path)) + logger.info(f"Final summary saved to: {self.save_path}/summary.json") + else: + logger.warning("No summary data to save (summary dict is empty)") + + # Print final summary + self._print_summary(results_summary) + + # Return list of all results or None if empty + return all_results if all_results else None + + def _print_summary(self, results_summary: list[tuple[str, str]]): + """Print evaluation summary.""" + logger.info("\n" + "=" * 80) + logger.info("EVALUATION SUMMARY") + logger.info("=" * 80) + logger.info(f"Total tasks: {len(self.tasks)}") + + successful = sum(1 for _, status in results_summary if "✓" in status) + failed = sum(1 for _, status in results_summary if "✗" in status) + + logger.info(f"Successful: {successful}") + logger.info(f"Failed: {failed}") + + if self.save_path: + logger.info(f"\nSummary saved to: {self.save_path}/summary.json") + logger.info(f"MTEB cache: {self.cache_path}") + + logger.info("=" * 80) + + # Print detailed results + if failed > 0: + logger.info("\nDetailed Results:") + for task_name, status in results_summary: + if "✗" in status: + logger.info(f" {status}") + logger.info("=" * 80) diff --git a/src/jmteb/v2/tasks.py b/src/jmteb/v2/tasks.py new file mode 100644 index 0000000..0ea81cd --- /dev/null +++ b/src/jmteb/v2/tasks.py @@ -0,0 +1,348 @@ +""" +JMTEB v2.0 task definitions and utilities using MTEB framework. +""" + +from __future__ import annotations + +import mteb +from mteb import AbsTask + +# JMTEB v2.0 consists of 28 tasks aligned with MTEB's JMTEB(v2) benchmark +JMTEB_TASKS = [ + # Clustering (3 tasks) + "LivedoorNewsClustering.v2", + "MewsC16JaClustering", + "SIB200ClusteringS2S", + # Classification (7 tasks) + "AmazonReviewsClassification", + "AmazonCounterfactualClassification", + "MassiveIntentClassification", + "MassiveScenarioClassification", + "JapaneseSentimentClassification", + "SIB200Classification", + "WRIMEClassification", + # STS (2 tasks) + "JSTS", + "JSICK", + # Retrieval (11 tasks) + "JaqketRetrieval", + "MrTidyRetrieval", + "JaGovFaqsRetrieval", + "NLPJournalTitleAbsRetrieval.V2", + "NLPJournalTitleIntroRetrieval.V2", + "NLPJournalAbsIntroRetrieval.V2", + "NLPJournalAbsArticleRetrieval.V2", + "JaCWIRRetrieval", + "MIRACLRetrieval", + "MintakaRetrieval", + "MultiLongDocRetrieval", + # Reranking (5 tasks) + "ESCIReranking", + "JQaRAReranking", + "JaCWIRReranking", + "MIRACLReranking", + "MultiLongDocReranking", +] + + +# JMTEB-lite consists of the same 28 tasks as JMTEB but with reduced corpus sizes +# for faster evaluation (~5x speedup with 0.97 Spearman correlation to full JMTEB) +# The lightweight tasks (with "Lite" suffix) have reduced corpus sizes: +# JaqketRetrievalLite, MrTyDiJaRetrievalLite, JaCWIRRetrievalLite, +# MIRACLJaRetrievalLite, JQaRARerankingLite, JaCWIRRerankingLite +JMTEB_LITE_TASKS = [ + # Clustering (3 tasks) + "LivedoorNewsClustering.v2", + "MewsC16JaClustering", + "SIB200ClusteringS2S", + # Classification (7 tasks) + "AmazonReviewsClassification", + "AmazonCounterfactualClassification", + "MassiveIntentClassification", + "MassiveScenarioClassification", + "JapaneseSentimentClassification", + "SIB200Classification", + "WRIMEClassification", + # STS (2 tasks) + "JSTS", + "JSICK", + # Retrieval (11 tasks) + "JaqketRetrievalLite", + "MrTyDiJaRetrievalLite", + "JaGovFaqsRetrieval", + "NLPJournalTitleAbsRetrieval.V2", + "NLPJournalTitleIntroRetrieval.V2", + "NLPJournalAbsIntroRetrieval.V2", + "NLPJournalAbsArticleRetrieval.V2", + "JaCWIRRetrievalLite", + "MIRACLJaRetrievalLite", + "MintakaRetrieval", + "MultiLongDocRetrieval", + # Reranking (5 tasks) + "ESCIReranking", + "JQaRARerankingLite", + "JaCWIRRerankingLite", + "MIRACLReranking", + "MultiLongDocReranking", +] + + +# Task type categorization for summary generation +TASK_CATEGORIES = { + # Classification + "AmazonReviewsClassification": "Classification", + "AmazonCounterfactualClassification": "Classification", + "MassiveIntentClassification": "Classification", + "MassiveScenarioClassification": "Classification", + "JapaneseSentimentClassification": "Classification", + "SIB200Classification": "Classification", + "WRIMEClassification": "Classification", + # Clustering + "LivedoorNewsClustering.v2": "Clustering", + "MewsC16JaClustering": "Clustering", + "SIB200ClusteringS2S": "Clustering", + # STS + "JSTS": "STS", + "JSICK": "STS", + # Retrieval + "JaqketRetrieval": "Retrieval", + "MrTidyRetrieval": "Retrieval", + "JaGovFaqsRetrieval": "Retrieval", + "NLPJournalTitleAbsRetrieval.V2": "Retrieval", + "NLPJournalTitleIntroRetrieval.V2": "Retrieval", + "NLPJournalAbsIntroRetrieval.V2": "Retrieval", + "NLPJournalAbsArticleRetrieval.V2": "Retrieval", + "JaCWIRRetrieval": "Retrieval", + "MIRACLRetrieval": "Retrieval", + "MintakaRetrieval": "Retrieval", + "MultiLongDocRetrieval": "Retrieval", + # Reranking + "ESCIReranking": "Reranking", + "JQaRAReranking": "Reranking", + "JaCWIRReranking": "Reranking", + "MIRACLReranking": "Reranking", + "MultiLongDocReranking": "Reranking", +} + + +# Mapping of JMTEB v1 task names to MTEB task names (for backward compatibility) +V1_TO_V2_TASK_MAPPING = { + # v1 name -> v2 name + "livedoor_news": "LivedoorNewsClustering.v2", + "mewsc16": "MewsC16JaClustering", + "amazon_review_classification": "AmazonReviewsClassification", + "amazon_counterfactual_classification": "AmazonCounterfactualClassification", + "massive_intent_classification": "MassiveIntentClassification", + "massive_scenario_classification": "MassiveScenarioClassification", + "jsts": "JSTS", + "jsick": "JSICK", + "jaqket": "JaqketRetrieval", + "mrtydi": "MrTidyRetrieval", + "jagovfaqs_22k": "JaGovFaqsRetrieval", + "nlp_journal_title_abs": "NLPJournalTitleAbsRetrieval.V2", + "nlp_journal_title_intro": "NLPJournalTitleIntroRetrieval.V2", + "nlp_journal_abs_intro": "NLPJournalAbsIntroRetrieval.V2", + "nlp_journal_abs_article": "NLPJournalAbsArticleRetrieval.V2", + "jacwir_retrieval": "JaCWIRRetrieval", + "miracl_retrieval": "MIRACLRetrieval", + "esci": "ESCIReranking", + "jqara": "JQaRAReranking", + "jacwir_reranking": "JaCWIRReranking", + "miracl_reranking": "MIRACLReranking", +} + + +def get_jmteb_benchmark() -> mteb.Benchmark: + """ + Get the JMTEB(v2) benchmark from MTEB. + + Returns: + MTEB Benchmark object containing all JMTEB tasks + + Example: + >>> benchmark = get_jmteb_benchmark() + >>> print(f"JMTEB contains {len(benchmark.tasks)} tasks") + >>> print(benchmark.tasks[0].metadata.name) + """ + return mteb.get_benchmark("JMTEB(v2)") + + +def get_jmteb_lite_benchmark() -> mteb.Benchmark: + """ + Get the JMTEB-lite benchmark from MTEB. + + JMTEB-lite is a lightweight version with reduced corpus sizes for + faster evaluation (~5x faster) while maintaining high correlation + with full JMTEB results. + + Returns: + MTEB Benchmark object containing all JMTEB-lite tasks + + Example: + >>> benchmark = get_jmteb_lite_benchmark() + >>> print(f"JMTEB-lite contains {len(benchmark.tasks)} tasks") + """ + return mteb.get_benchmark("JMTEB-lite(v1)") + + +def _get_tasks_from_benchmark( + benchmark: mteb.Benchmark, + task_names: list[str] | None = None, + task_types: list[str] | None = None, +) -> list[AbsTask]: + """ + Internal helper to get tasks from a benchmark with optional filtering. + + Args: + benchmark: MTEB Benchmark object + task_names: List of specific task names to retrieve. If None, returns all tasks. + task_types: Filter tasks by type (e.g., ["Retrieval", "Classification"]). + + Returns: + List of MTEB task objects + """ + tasks = benchmark.tasks + + # Filter by task names if specified + if task_names is not None: + tasks = [task for task in tasks if task.metadata.name in task_names] + + # Filter by task types if specified + if task_types is not None: + tasks = [task for task in tasks if task.metadata.type in task_types] + + # Extract task names and use mteb.get_tasks to restrict all tasks to Japanese only + # This properly handles multilingual tasks by restricting them to jpn subset + task_name_list = [task.metadata.name for task in tasks] + tasks = mteb.get_tasks(tasks=task_name_list, languages=["jpn"]) + + return tasks + + +def get_jmteb_tasks( + task_names: list[str] | None = None, + task_types: list[str] | None = None, +) -> list[AbsTask]: + """ + Get JMTEB tasks with optional filtering. + + Args: + task_names: List of specific task names to retrieve. If None, returns all tasks. + task_types: Filter tasks by type (e.g., ["Retrieval", "Classification"]). + + Returns: + List of MTEB task objects + + Example: + >>> # Get all JMTEB tasks + >>> tasks = get_jmteb_tasks() + >>> + >>> # Get specific tasks + >>> tasks = get_jmteb_tasks(task_names=["JSTS", "JSICK"]) + >>> + >>> # Get all retrieval tasks + >>> tasks = get_jmteb_tasks(task_types=["Retrieval"]) + """ + return _get_tasks_from_benchmark(get_jmteb_benchmark(), task_names=task_names, task_types=task_types) + + +def get_jmteb_lite_tasks( + task_names: list[str] | None = None, + task_types: list[str] | None = None, +) -> list[AbsTask]: + """ + Get JMTEB-lite tasks with optional filtering. + + JMTEB-lite provides ~5x faster evaluation with reduced corpus sizes + while maintaining high correlation (0.97 Spearman) with full JMTEB results. + + Args: + task_names: List of specific task names to retrieve. If None, returns all tasks. + task_types: Filter tasks by type (e.g., ["Retrieval", "Classification"]). + + Returns: + List of MTEB task objects + + Example: + >>> # Get all JMTEB-lite tasks + >>> tasks = get_jmteb_lite_tasks() + >>> + >>> # Get specific tasks + >>> tasks = get_jmteb_lite_tasks(task_names=["JSTS", "JSICK"]) + >>> + >>> # Get all retrieval tasks + >>> tasks = get_jmteb_lite_tasks(task_types=["Retrieval"]) + """ + return _get_tasks_from_benchmark(get_jmteb_lite_benchmark(), task_names=task_names, task_types=task_types) + + +def get_task_by_name(task_name: str, lite: bool = False) -> AbsTask: + """ + Get a single task by name. + + Args: + task_name: Name of the task (MTEB format) + lite: If True, search in JMTEB-lite benchmark; otherwise search in JMTEB + + Returns: + MTEB task object + + Raises: + ValueError: If task name is not found + + Example: + >>> # Get from JMTEB + >>> task = get_task_by_name("JSTS") + >>> print(task.metadata.description) + >>> + >>> # Get from JMTEB-lite + >>> task = get_task_by_name("JaqketRetrievalLite", lite=True) + """ + if lite: + tasks = get_jmteb_lite_tasks(task_names=[task_name]) + available_tasks = JMTEB_LITE_TASKS + benchmark_name = "JMTEB-lite" + else: + tasks = get_jmteb_tasks(task_names=[task_name]) + available_tasks = JMTEB_TASKS + benchmark_name = "JMTEB" + + if not tasks: + raise ValueError( + f"Task '{task_name}' not found in {benchmark_name} benchmark. " f"Available tasks: {available_tasks}" + ) + return tasks[0] + + +def get_task_category(task_name: str) -> str: + """ + Get the category/type of a task. + + Args: + task_name: Name of the task + + Returns: + Task category (e.g., "Classification", "Retrieval", etc.) + + Example: + >>> category = get_task_category("JSTS") + >>> print(category) # "STS" + """ + return TASK_CATEGORIES.get(task_name, "Unknown") + + +def convert_v1_task_name(v1_name: str) -> str: + """ + Convert JMTEB v1 task name to v2 (MTEB) format. + + Args: + v1_name: JMTEB v1 task name + + Returns: + MTEB task name + + Example: + >>> v2_name = convert_v1_task_name("jsts") + >>> print(v2_name) # "JSTS" + """ + return V1_TO_V2_TASK_MAPPING.get(v1_name, v1_name) diff --git a/src/jmteb/v2/utils.py b/src/jmteb/v2/utils.py new file mode 100644 index 0000000..fd6e0af --- /dev/null +++ b/src/jmteb/v2/utils.py @@ -0,0 +1,217 @@ +""" +JMTEB v2.0 utility functions. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import yaml + +from jmteb.v2.tasks import TASK_CATEGORIES + + +def load_prompts(prompt_config: str | Path) -> dict[str, str]: + """ + Load prompt configuration from a YAML file. + + Args: + prompt_config: Path to YAML file containing prompts + + Returns: + Dictionary mapping task types to prompt templates + + Example: + >>> prompts = load_prompts("prompts/e5.yaml") + >>> print(prompts.get("query", "")) + """ + prompt_path = Path(prompt_config) if isinstance(prompt_config, str) else prompt_config + + with open(prompt_path, encoding="utf-8") as f: + prompts = yaml.safe_load(f) + + return prompts if prompts is not None else {} + + +def load_batch_sizes(batch_size_config: str | Path) -> dict[str, int]: + """ + Load per-task batch size configuration from a YAML file. + + Args: + batch_size_config: Path to YAML file containing batch sizes + + Returns: + Dictionary mapping task names to batch sizes + + Example: + >>> batch_sizes = load_batch_sizes("batch_sizes.yaml") + >>> print(batch_sizes.get("JSTS", 32)) + """ + batch_size_path = Path(batch_size_config) if isinstance(batch_size_config, str) else batch_size_config + + with open(batch_size_path, encoding="utf-8") as f: + batch_sizes = yaml.safe_load(f) + + return batch_sizes if batch_sizes is not None else {} + + +def load_summary(save_path: str | Path) -> dict: + """ + Load existing summary.json if it exists. + + Args: + save_path: Directory containing summary.json + + Returns: + Dictionary containing summary data, or empty dict if not found + """ + summary_path = Path(save_path) / "summary.json" + if summary_path.exists(): + with open(summary_path) as f: + return json.load(f) + return {} + + +def save_summary(summary: dict, save_path: str | Path): + """ + Save summary.json to disk. + + Args: + summary: Summary dictionary to save + save_path: Directory to save summary.json in + """ + summary_path = Path(save_path) / "summary.json" + summary_path.parent.mkdir(parents=True, exist_ok=True) + + try: + with open(summary_path, "w") as f: + json.dump(summary, f, indent=4, ensure_ascii=False) + except Exception as e: + raise RuntimeError(f"Error saving summary: {e}") + + +def extract_and_update_summary( + task_name: str, + main_metric: str, + save_path: str | Path, + summary: dict, + eval_time: float = -1, +): + """ + Extract main score from task result and update summary with timing. + + Args: + task_name: Name of the task + main_metric: Main metric for the task + save_path: Path where results are saved + summary: Summary dictionary to update + eval_time: Time taken to evaluate in seconds, or -1 if cached + """ + task_category = TASK_CATEGORIES.get(task_name) + if not task_category: + return + + result_path = Path(save_path) / f"{task_name}.json" + + if not result_path.exists(): + return + + with open(result_path) as f: + result_data = json.load(f) + + # Determine the split to use + # JSTS uses validation split, MLDR tasks use dev split, others use test + if task_name == "JSTS": + split = "validation" + elif task_name.startswith("MultiLongDoc"): + split = "dev" + else: + split = "test" + + # Extract score from result + if split in result_data and len(result_data[split]) > 0: + score = result_data[split][0].get("main_score") + + if score is not None: + # Create task key (convert MTEB name back to simpler format for summary) + task_key = _get_task_key(task_name) + + # Update summary + if task_category not in summary: + summary[task_category] = {} + + summary[task_category][task_key] = { + "main_metric": main_metric, + "main_score": score * 100, # Convert to percentage + "eval_time (s)": "%.2f" % eval_time if eval_time >= 0 else "cached", + } + + # Save immediately + save_summary(summary, save_path) + + +def _get_task_key(task_name: str) -> str: + """ + Convert MTEB task name to a simpler key for summary. + + Args: + task_name: MTEB task name + + Returns: + Simplified task key + """ + # Map common task names to simpler keys + task_key_mapping = { + "LivedoorNewsClustering.v2": "livedoor_news", + "MewsC16JaClustering": "mewsc16", + "SIB200ClusteringS2S": "sib200_japanese_clustering", + "AmazonReviewsClassification": "amazon_review_classification", + "AmazonCounterfactualClassification": "amazon_counterfactual_classification", + "MassiveIntentClassification": "massive_intent_classification", + "MassiveScenarioClassification": "massive_scenario_classification", + "JapaneseSentimentClassification": "japanese_sentiment_classification", + "SIB200Classification": "sib200_japanese_classification", + "WRIMEClassification": "wrime_classification", + "JSTS": "jsts", + "JSICK": "jsick", + "JaqketRetrieval": "jaqket", + "MrTidyRetrieval": "mrtydi", + "JaGovFaqsRetrieval": "jagovfaqs_22k", + "NLPJournalTitleAbsRetrieval.V2": "nlp_journal_title_abs", + "NLPJournalTitleIntroRetrieval.V2": "nlp_journal_title_intro", + "NLPJournalAbsIntroRetrieval.V2": "nlp_journal_abs_intro", + "NLPJournalAbsArticleRetrieval.V2": "nlp_journal_abs_article", + "JaCWIRRetrieval": "jacwir_retrieval", + "MIRACLRetrieval": "miracl_retrieval", + "MintakaRetrieval": "mintaka_retrieval", + "MultiLongDocRetrieval": "mldr_retrieval", + "ESCIReranking": "esci", + "JQaRAReranking": "jqara", + "JaCWIRReranking": "jacwir_reranking", + "MIRACLReranking": "miracl_reranking", + "MultiLongDocReranking": "mldr_reranking", + } + + return task_key_mapping.get(task_name, task_name.lower()) + + +def save_results( + results: dict, + save_path: str | Path, + filename: str = "results.json", +): + """ + Save evaluation results to a JSON file. + + Args: + results: Results dictionary to save + save_path: Directory to save results in + filename: Name of the file to save + """ + save_path = Path(save_path) + save_path.mkdir(parents=True, exist_ok=True) + + result_file = save_path / filename + with open(result_file, "w") as f: + json.dump(results, f, indent=4, ensure_ascii=False) diff --git a/submission.md b/submission.md deleted file mode 100644 index d229be8..0000000 --- a/submission.md +++ /dev/null @@ -1,30 +0,0 @@ -# Submission Guideline -A guideline for developers who would like to register their own models to the [JMTEB leaderboard](leaderboard.md). - -## Submit the evaluation results -Developers shall open a pull request in regards to each model they would like to add to the leaderboard. Please make the PR following the steps below. - -1. Train your embedding model, and evaluate with JMTEB. - -2. Your results shall be added to [docs/results](docs/results/). The result file (name should be `summary.json`) should be put in a directory named as `owner/model_name`, that results in a two-layer folder, for example, [docs/results/OpenAI/text-embedding-3-large](docs/results/OpenAI/text-embedding-3-large). - -3. Run `pip install tabulate && python make_leaderboard.py`, and you will get a new `leaderboard.md` which contains the results of your model. - -4. Push your update to a new branch `leaderboard/`, and write the PR description (follow the [template](.github/PULL_REQUEST_TEMPLATE/leaderboard_submission.md)) about your model with details as much as possible, including the type, size and structure of your model, and if possible, how it is trained and what training datasets are used. We **strongly recommend** to include information about seen/unseen information of the training dataset, that is, whether a JMTEB evaluation dataset was used in the training of your model. For example, your model used `JAQKET`'s training set in the training stage, so mark `JAQKET` as `seen`, and other datasets as `unseen`. Also, please include **an instruction to reproduce** the evaluation results (e.g., evaluation scripts, special settings needed to fit your model's setting) as possible as you can. - -## Submit your model -For developers who are reluctant to run all the evaluations due to the limits of computing resources, we enable the evaluation with some of [our](https://www.sbintuitions.co.jp/) resources when it is available. Please follow the instructions below if you want us to help you evaluate your model. - -1. Train your embedding model. - -2. Upload your model to somewhere publicly accessible. We recommend [Hugging Face Hub](https://huggingface.co/), as it is the de facto standard to make your models publicly available. - -3. Add an issue to request evaluation. Please refer to step 4 of the last chapter (Submit the evaluation results) as well as the [issue template](.github/ISSUE_TEMPLATE/evaluation_request.md) for the contents. - -4. We may respond within a few business days, if it is available for us to run the evaluation. - -Please note: - -* Please understand that we might be not able to cover all evaluation requests, as our computing resource is also limited. - -* If possible, please include a script for your model, as incorrect settings may result in performance deterioration of your model. At least you need to figure out what special settings are needed for your model. diff --git a/tests/v2/__init__.py b/tests/v2/__init__.py new file mode 100644 index 0000000..0a18a49 --- /dev/null +++ b/tests/v2/__init__.py @@ -0,0 +1,3 @@ +""" +JMTEB v2.0 test package. +""" diff --git a/tests/v2/conftest.py b/tests/v2/conftest.py new file mode 100644 index 0000000..3d80530 --- /dev/null +++ b/tests/v2/conftest.py @@ -0,0 +1,62 @@ +""" +Pytest configuration and fixtures for JMTEB v2.0 tests. +""" + +from unittest.mock import Mock + +import numpy as np +import pytest + + +@pytest.fixture +def mock_sentence_transformer(): + """Mock SentenceTransformer model.""" + model = Mock() + model.encode = Mock(return_value=np.random.rand(10, 768)) + return model + + +@pytest.fixture +def mock_embedder(): + """Mock JMTEB v1 TextEmbedder.""" + embedder = Mock() + embedder.encode = Mock(return_value=np.random.rand(10, 768).tolist()) + return embedder + + +@pytest.fixture +def sample_sentences(): + """Sample sentences for testing.""" + return [ + "これはテストです。", + "日本語のテキスト埋め込み", + "JMTEB v2.0のテスト", + "機械学習モデルの評価", + "自然言語処理", + ] + + +@pytest.fixture +def mock_mteb_task(): + """Mock MTEB task.""" + task = Mock() + task.metadata = Mock() + task.metadata.name = "JSTS" + task.metadata.main_score = "cosine_spearman" + task.metadata.type = "STS" + task.metadata.languages = ["jpn"] + return task + + +@pytest.fixture +def sample_task_results(): + """Sample task evaluation results.""" + return { + "validation": [ + { + "main_score": 0.8234, + "cosine_spearman": 0.8234, + "cosine_pearson": 0.8156, + } + ] + } diff --git a/tests/v2/test_adapters.py b/tests/v2/test_adapters.py new file mode 100644 index 0000000..3a8418f --- /dev/null +++ b/tests/v2/test_adapters.py @@ -0,0 +1,97 @@ +""" +Tests for JMTEB v2.0 adapters (JMTEBModel). +""" + +from unittest.mock import Mock, patch + +import numpy as np +import pytest + +from jmteb.v2.adapters import JMTEBModel + + +class TestJMTEBModel: + """Tests for JMTEBModel adapter.""" + + def test_init_with_embedder(self, mock_embedder): + """Test initialization with v1 embedder.""" + model = JMTEBModel(embedder=mock_embedder) + assert model.embedder == mock_embedder + assert model.sentence_transformer is None + + def test_init_with_sentence_transformer(self, mock_sentence_transformer): + """Test initialization with SentenceTransformer.""" + model = JMTEBModel(sentence_transformer=mock_sentence_transformer) + assert model.sentence_transformer == mock_sentence_transformer + assert model.embedder is None + + def test_init_without_model_raises_error(self): + """Test that initialization without model raises error.""" + with pytest.raises(ValueError, match="Either embedder or sentence_transformer must be provided"): + JMTEBModel() + + def test_encode_with_embedder(self, mock_embedder, sample_sentences): + """Test encoding with v1 embedder.""" + model = JMTEBModel(embedder=mock_embedder) + result = model.encode(sample_sentences, batch_size=32) + + # Check that embedder.encode was called + mock_embedder.encode.assert_called_once() + + # Check result is numpy array + assert isinstance(result, np.ndarray) + assert result.shape[0] == 10 # Mock returns 10 embeddings + + def test_encode_with_sentence_transformer(self, mock_sentence_transformer, sample_sentences): + """Test encoding with SentenceTransformer.""" + model = JMTEBModel(sentence_transformer=mock_sentence_transformer) + result = model.encode(sample_sentences, batch_size=32) + + # Check that st.encode was called + mock_sentence_transformer.encode.assert_called_once() + + # Check result is numpy array + assert isinstance(result, np.ndarray) + assert result.shape[0] == 10 + + def test_encode_with_kwargs(self, mock_sentence_transformer, sample_sentences): + """Test encoding with additional kwargs.""" + model = JMTEBModel(sentence_transformer=mock_sentence_transformer, show_progress_bar=False) + result = model.encode(sample_sentences, batch_size=64, prompt_name="query") + + # Verify kwargs were passed + call_kwargs = mock_sentence_transformer.encode.call_args[1] + assert call_kwargs["batch_size"] == 64 + assert call_kwargs["show_progress_bar"] is False + + # Check result + assert isinstance(result, np.ndarray) + + @patch("jmteb.v2.adapters.SentenceTransformer") + def test_from_sentence_transformer(self, mock_st_class): + """Test creating model from sentence transformer path.""" + mock_model = Mock() + mock_st_class.return_value = mock_model + + model = JMTEBModel.from_sentence_transformer( + "test-model", + device="cuda", + model_kwargs={"torch_dtype": "float16"}, + ) + + # Check SentenceTransformer was called correctly + mock_st_class.assert_called_once() + call_kwargs = mock_st_class.call_args[1] + assert call_kwargs["model_name_or_path"] == "test-model" + assert call_kwargs["device"] == "cuda" + + # Check model was wrapped + assert model.sentence_transformer == mock_model + + def test_from_jmteb_embedder(self, mock_embedder): + """Test creating model from v1 embedder.""" + prompts = {"query": "query: ", "passage": "passage: "} + model = JMTEBModel.from_jmteb_embedder(mock_embedder, prompts=prompts) + + assert model.embedder == mock_embedder + assert model.prompts == prompts diff --git a/tests/v2/test_cli.py b/tests/v2/test_cli.py new file mode 100644 index 0000000..e835c20 --- /dev/null +++ b/tests/v2/test_cli.py @@ -0,0 +1,497 @@ +""" +Tests for JMTEB v2.0 CLI interface. +""" + +import sys +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest +import torch + +from jmteb.v2 import __main__ as cli_module + + +class TestCLIArgumentParsing: + """Tests for CLI argument parsing.""" + + def test_required_model_name(self): + """Test that model_name is required.""" + with patch.object(sys, "argv", ["prog"]): + with pytest.raises(SystemExit): + cli_module.get_args() + + def test_model_name_only(self): + """Test parsing with only model_name.""" + with patch.object(sys, "argv", ["prog", "--model_name", "cl-nagoya/ruri-v3-30m"]): + args = cli_module.get_args() + assert args.model_name == "cl-nagoya/ruri-v3-30m" + assert args.batch_size == 32 # default + assert args.fp16 is False # default + assert args.bf16 is False # default + + def test_batch_size_argument(self): + """Test batch_size argument.""" + with patch.object( + sys, + "argv", + ["prog", "--model_name", "model", "--batch_size", "64"], + ): + args = cli_module.get_args() + assert args.batch_size == 64 + + def test_fp16_argument(self): + """Test fp16 argument.""" + with patch.object(sys, "argv", ["prog", "--model_name", "model", "--fp16", "true"]): + args = cli_module.get_args() + assert args.fp16 is True + + def test_bf16_argument(self): + """Test bf16 argument.""" + with patch.object(sys, "argv", ["prog", "--model_name", "model", "--bf16", "true"]): + args = cli_module.get_args() + assert args.bf16 is True + + def test_include_argument(self): + """Test include argument for task filtering.""" + with patch.object( + sys, + "argv", + ["prog", "--model_name", "model", "--include", '["JSTS", "JSICK"]'], + ): + args = cli_module.get_args() + assert args.include == ["JSTS", "JSICK"] + + def test_exclude_argument(self): + """Test exclude argument for task filtering.""" + with patch.object( + sys, + "argv", + ["prog", "--model_name", "model", "--exclude", '["JSTS"]'], + ): + args = cli_module.get_args() + assert args.exclude == ["JSTS"] + + def test_task_types_argument(self): + """Test task_types argument.""" + with patch.object( + sys, + "argv", + [ + "prog", + "--model_name", + "model", + "--task_types", + '["Retrieval", "Classification"]', + ], + ): + args = cli_module.get_args() + assert args.task_types == ["Retrieval", "Classification"] + + def test_prompt_profile_argument(self): + """Test prompt_profile argument.""" + with patch.object( + sys, + "argv", + [ + "prog", + "--model_name", + "model", + "--prompt_profile", + "src/jmteb/configs/prompts/e5.yaml", + ], + ): + args = cli_module.get_args() + assert args.prompt_profile == "src/jmteb/configs/prompts/e5.yaml" + + def test_task_batch_sizes_argument(self): + """Test task_batch_sizes argument.""" + with patch.object( + sys, + "argv", + [ + "prog", + "--model_name", + "model", + "--task_batch_sizes", + "batch_sizes.yaml", + ], + ): + args = cli_module.get_args() + assert args.task_batch_sizes == "batch_sizes.yaml" + + def test_save_path_argument(self): + """Test save_path argument.""" + with patch.object( + sys, + "argv", + ["prog", "--model_name", "model", "--save_path", "my_results"], + ): + args = cli_module.get_args() + assert args.save_path == "my_results" + + def test_overwrite_cache_argument(self): + """Test overwrite_cache argument.""" + with patch.object( + sys, + "argv", + ["prog", "--model_name", "model", "--overwrite_cache", "true"], + ): + args = cli_module.get_args() + assert args.overwrite_cache is True + + def test_cache_path_argument(self): + """Test cache_path argument.""" + with patch.object( + sys, + "argv", + ["prog", "--model_name", "model", "--cache_path", "./my_cache"], + ): + args = cli_module.get_args() + assert args.cache_path == "./my_cache" + + +class TestCLIExecution: + """Tests for CLI execution flow.""" + + @patch("jmteb.v2.__main__.JMTEBV2Evaluator") + @patch("jmteb.v2.__main__.JMTEBModel") + @patch("jmteb.v2.__main__.get_jmteb_tasks") + def test_basic_execution(self, mock_get_tasks, mock_model_class, mock_evaluator_class): + """Test basic CLI execution flow.""" + # Setup mocks + mock_model = Mock() + mock_model_class.from_sentence_transformer.return_value = mock_model + + mock_tasks = [Mock(), Mock()] + mock_get_tasks.return_value = mock_tasks + + mock_evaluator = Mock() + mock_evaluator.run.return_value = [] + mock_evaluator_class.return_value = mock_evaluator + + # Run CLI + with patch.object(sys, "argv", ["prog", "--model_name", "cl-nagoya/ruri-v3-30m"]): + cli_module.main() + + # Verify model creation + mock_model_class.from_sentence_transformer.assert_called_once() + call_kwargs = mock_model_class.from_sentence_transformer.call_args[1] + assert call_kwargs["model_name_or_path"] == "cl-nagoya/ruri-v3-30m" + + # Verify evaluator creation + mock_evaluator_class.assert_called_once() + assert mock_evaluator.run.called + + @patch("jmteb.v2.__main__.JMTEBV2Evaluator") + @patch("jmteb.v2.__main__.JMTEBModel") + @patch("jmteb.v2.__main__.get_jmteb_tasks") + def test_bf16_execution(self, mock_get_tasks, mock_model_class, mock_evaluator_class): + """Test CLI with bf16 enabled.""" + mock_model = Mock() + mock_model_class.from_sentence_transformer.return_value = mock_model + mock_get_tasks.return_value = [Mock()] + mock_evaluator_class.return_value.run.return_value = [] + + with patch.object(sys, "argv", ["prog", "--model_name", "model", "--bf16", "true"]): + cli_module.main() + + # Verify bf16 was passed to model + call_kwargs = mock_model_class.from_sentence_transformer.call_args[1] + assert "model_kwargs" in call_kwargs + assert call_kwargs["model_kwargs"]["torch_dtype"] == torch.bfloat16 + + @patch("jmteb.v2.__main__.JMTEBV2Evaluator") + @patch("jmteb.v2.__main__.JMTEBModel") + @patch("jmteb.v2.__main__.get_jmteb_tasks") + def test_include_tasks(self, mock_get_tasks, mock_model_class, mock_evaluator_class): + """Test CLI with --include argument.""" + mock_model_class.from_sentence_transformer.return_value = Mock() + mock_get_tasks.return_value = [Mock(), Mock()] + mock_evaluator_class.return_value.run.return_value = [] + + with patch.object( + sys, + "argv", + [ + "prog", + "--model_name", + "model", + "--include", + '["JSTS", "JSICK"]', + ], + ): + cli_module.main() + + # Verify get_jmteb_tasks was called with task_names + mock_get_tasks.assert_called_once_with(task_names=["JSTS", "JSICK"]) + + @patch("jmteb.v2.__main__.JMTEBV2Evaluator") + @patch("jmteb.v2.__main__.JMTEBModel") + @patch("jmteb.v2.__main__.get_jmteb_tasks") + def test_task_types_filter(self, mock_get_tasks, mock_model_class, mock_evaluator_class): + """Test CLI with --task_types argument.""" + mock_model_class.from_sentence_transformer.return_value = Mock() + mock_get_tasks.return_value = [Mock()] + mock_evaluator_class.return_value.run.return_value = [] + + with patch.object( + sys, + "argv", + ["prog", "--model_name", "model", "--task_types", '["STS"]'], + ): + cli_module.main() + + # Verify get_jmteb_tasks was called with task_types + mock_get_tasks.assert_called_once_with(task_types=["STS"]) + + @patch("jmteb.v2.__main__.load_prompts") + @patch("jmteb.v2.__main__.JMTEBV2Evaluator") + @patch("jmteb.v2.__main__.JMTEBModel") + @patch("jmteb.v2.__main__.get_jmteb_tasks") + def test_prompt_profile_loading( + self, + mock_get_tasks, + mock_model_class, + mock_evaluator_class, + mock_load_prompts, + ): + """Test CLI with --prompt_profile argument.""" + mock_prompts = {"query": "query: ", "passage": "passage: "} + mock_load_prompts.return_value = mock_prompts + mock_model_class.from_sentence_transformer.return_value = Mock() + mock_get_tasks.return_value = [Mock()] + mock_evaluator_class.return_value.run.return_value = [] + + with patch.object( + sys, + "argv", + [ + "prog", + "--model_name", + "model", + "--prompt_profile", + "prompts/e5.yaml", + ], + ): + cli_module.main() + + # Verify prompts were loaded and passed to model + mock_load_prompts.assert_called_once_with("prompts/e5.yaml") + call_kwargs = mock_model_class.from_sentence_transformer.call_args[1] + assert call_kwargs["prompts"] == mock_prompts + + @patch("jmteb.v2.__main__.load_batch_sizes") + @patch("jmteb.v2.__main__.JMTEBV2Evaluator") + @patch("jmteb.v2.__main__.JMTEBModel") + @patch("jmteb.v2.__main__.get_jmteb_tasks") + def test_task_batch_sizes_loading( + self, + mock_get_tasks, + mock_model_class, + mock_evaluator_class, + mock_load_batch_sizes, + ): + """Test CLI with --task_batch_sizes argument.""" + mock_batch_sizes = {"JSTS": 128, "JSICK": 128} + mock_load_batch_sizes.return_value = mock_batch_sizes + mock_model_class.from_sentence_transformer.return_value = Mock() + mock_get_tasks.return_value = [Mock()] + mock_evaluator_class.return_value.run.return_value = [] + + with patch.object( + sys, + "argv", + [ + "prog", + "--model_name", + "model", + "--task_batch_sizes", + "batch_sizes.yaml", + ], + ): + cli_module.main() + + # Verify batch sizes were loaded and passed to evaluator + mock_load_batch_sizes.assert_called_once_with("batch_sizes.yaml") + call_kwargs = mock_evaluator_class.call_args[1] + assert call_kwargs["task_batch_sizes"] == mock_batch_sizes + + @patch("jmteb.v2.__main__.JMTEBV2Evaluator") + @patch("jmteb.v2.__main__.JMTEBModel") + @patch("jmteb.v2.__main__.get_jmteb_tasks") + def test_save_path_configuration(self, mock_get_tasks, mock_model_class, mock_evaluator_class): + """Test save_path creates proper directory structure.""" + mock_model_class.from_sentence_transformer.return_value = Mock() + mock_get_tasks.return_value = [Mock()] + mock_evaluator_class.return_value.run.return_value = [] + + with patch.object( + sys, + "argv", + [ + "prog", + "--model_name", + "cl-nagoya/ruri-v3-30m", + "--save_path", + "my_results", + ], + ): + cli_module.main() + + # Verify save_path includes model name + call_kwargs = mock_evaluator_class.call_args[1] + expected_path = Path("my_results") / "cl-nagoya/ruri-v3-30m" + assert call_kwargs["save_path"] == expected_path + + +class TestCLIPrecisionConfiguration: + """Tests for precision configuration.""" + + @patch("jmteb.v2.__main__.JMTEBV2Evaluator") + @patch("jmteb.v2.__main__.JMTEBModel") + @patch("jmteb.v2.__main__.get_jmteb_tasks") + def test_fp16_creates_model_kwargs(self, mock_get_tasks, mock_model_class, mock_evaluator_class): + """Test that fp16 flag creates appropriate model_kwargs.""" + mock_model_class.from_sentence_transformer.return_value = Mock() + mock_get_tasks.return_value = [Mock()] + mock_evaluator_class.return_value.run.return_value = [] + + with patch.object(sys, "argv", ["prog", "--model_name", "model", "--fp16", "true"]): + cli_module.main() + + call_kwargs = mock_model_class.from_sentence_transformer.call_args[1] + assert call_kwargs["model_kwargs"]["torch_dtype"] == torch.float16 + + @patch("jmteb.v2.__main__.JMTEBV2Evaluator") + @patch("jmteb.v2.__main__.JMTEBModel") + @patch("jmteb.v2.__main__.get_jmteb_tasks") + def test_bf16_creates_model_kwargs(self, mock_get_tasks, mock_model_class, mock_evaluator_class): + """Test that bf16 flag creates appropriate model_kwargs.""" + mock_model_class.from_sentence_transformer.return_value = Mock() + mock_get_tasks.return_value = [Mock()] + mock_evaluator_class.return_value.run.return_value = [] + + with patch.object(sys, "argv", ["prog", "--model_name", "model", "--bf16", "true"]): + cli_module.main() + + call_kwargs = mock_model_class.from_sentence_transformer.call_args[1] + assert call_kwargs["model_kwargs"]["torch_dtype"] == torch.bfloat16 + + @patch("jmteb.v2.__main__.JMTEBV2Evaluator") + @patch("jmteb.v2.__main__.JMTEBModel") + @patch("jmteb.v2.__main__.get_jmteb_tasks") + def test_no_precision_flag_no_model_kwargs(self, mock_get_tasks, mock_model_class, mock_evaluator_class): + """Test that no precision flag means no model_kwargs.""" + mock_model_class.from_sentence_transformer.return_value = Mock() + mock_get_tasks.return_value = [Mock()] + mock_evaluator_class.return_value.run.return_value = [] + + with patch.object(sys, "argv", ["prog", "--model_name", "model"]): + cli_module.main() + + call_kwargs = mock_model_class.from_sentence_transformer.call_args[1] + assert call_kwargs.get("model_kwargs") is None + + +class TestCLITaskFiltering: + """Tests for task filtering logic.""" + + @patch("jmteb.v2.__main__.JMTEBV2Evaluator") + @patch("jmteb.v2.__main__.JMTEBModel") + @patch("jmteb.v2.__main__.get_jmteb_benchmark") + @patch("jmteb.v2.__main__.get_jmteb_tasks") + def test_exclude_filters_tasks( + self, + mock_get_tasks, + mock_get_benchmark, + mock_model_class, + mock_evaluator_class, + ): + """Test that exclude argument filters out tasks.""" + mock_task1 = Mock() + mock_task1.metadata.name = "JSTS" + mock_task2 = Mock() + mock_task2.metadata.name = "JSICK" + mock_task3 = Mock() + mock_task3.metadata.name = "JaqketRetrieval" + + mock_benchmark = Mock() + mock_benchmark.tasks = [mock_task1, mock_task2, mock_task3] + mock_get_benchmark.return_value = mock_benchmark + + mock_model_class.from_sentence_transformer.return_value = Mock() + mock_evaluator_class.return_value.run.return_value = [] + + with patch.object( + sys, + "argv", + ["prog", "--model_name", "model", "--exclude", '["JSTS"]'], + ): + cli_module.main() + + # Verify evaluator was created with filtered tasks + call_kwargs = mock_evaluator_class.call_args[1] + filtered_tasks = call_kwargs["tasks"] + assert len(filtered_tasks) == 2 + assert mock_task1 not in filtered_tasks + assert mock_task2 in filtered_tasks + assert mock_task3 in filtered_tasks + + +class TestCLIIntegration: + """Integration tests for complete CLI workflows.""" + + @patch("jmteb.v2.__main__.JMTEBV2Evaluator") + @patch("jmteb.v2.__main__.JMTEBModel") + @patch("jmteb.v2.__main__.get_jmteb_tasks") + @patch("jmteb.v2.__main__.load_prompts") + @patch("jmteb.v2.__main__.load_batch_sizes") + def test_full_configuration( + self, + mock_load_batch_sizes, + mock_load_prompts, + mock_get_tasks, + mock_model_class, + mock_evaluator_class, + ): + """Test CLI with all configuration options.""" + mock_prompts = {"query": "query: "} + mock_batch_sizes = {"JSTS": 128} + mock_load_prompts.return_value = mock_prompts + mock_load_batch_sizes.return_value = mock_batch_sizes + + mock_model_class.from_sentence_transformer.return_value = Mock() + mock_get_tasks.return_value = [Mock()] + mock_evaluator_class.return_value.run.return_value = [] + + with patch.object( + sys, + "argv", + [ + "prog", + "--model_name", + "cl-nagoya/ruri-v3-30m", + "--bf16", + "true", + "--batch_size", + "64", + "--prompt_profile", + "prompts/ruri-v3.yaml", + "--task_batch_sizes", + "batch_sizes.yaml", + "--save_path", + "results", + "--include", + '["JSTS", "JSICK"]', + ], + ): + cli_module.main() + + # Verify all components were configured correctly + assert mock_load_prompts.called + assert mock_load_batch_sizes.called + assert mock_model_class.from_sentence_transformer.called + assert mock_get_tasks.called + assert mock_evaluator_class.called + assert mock_evaluator_class.return_value.run.called diff --git a/tests/v2/test_tasks.py b/tests/v2/test_tasks.py new file mode 100644 index 0000000..feb9a7a --- /dev/null +++ b/tests/v2/test_tasks.py @@ -0,0 +1,155 @@ +""" +Tests for JMTEB v2.0 task utilities. +""" + +from unittest.mock import Mock, patch + +import pytest + +from jmteb.v2 import tasks + + +class TestTaskUtilities: + """Tests for task utility functions.""" + + @patch("jmteb.v2.tasks.mteb.get_benchmark") + def test_get_jmteb_benchmark(self, mock_get_benchmark): + """Test getting JMTEB benchmark.""" + mock_benchmark = Mock() + mock_get_benchmark.return_value = mock_benchmark + + result = tasks.get_jmteb_benchmark() + + mock_get_benchmark.assert_called_once_with("JMTEB(v2)") + assert result == mock_benchmark + + @patch("jmteb.v2.tasks.mteb.get_benchmark") + def test_get_jmteb_lite_benchmark(self, mock_get_benchmark): + """Test getting JMTEB-lite benchmark.""" + mock_benchmark = Mock() + mock_get_benchmark.return_value = mock_benchmark + + result = tasks.get_jmteb_lite_benchmark() + + mock_get_benchmark.assert_called_once_with("JMTEB-lite(v1)") + assert result == mock_benchmark + + @patch("jmteb.v2.tasks.mteb.get_tasks") + @patch("jmteb.v2.tasks.get_jmteb_benchmark") + def test_get_jmteb_tasks_all(self, mock_benchmark, mock_get_tasks): + """Test getting all JMTEB tasks.""" + mock_task1 = Mock() + mock_task1.metadata.name = "JSTS" + mock_task2 = Mock() + mock_task2.metadata.name = "JSICK" + + mock_benchmark.return_value.tasks = [mock_task1, mock_task2] + mock_get_tasks.return_value = [mock_task1, mock_task2] + + result = tasks.get_jmteb_tasks() + + assert len(result) == 2 + # Check that mteb.get_tasks was called with correct task names + mock_get_tasks.assert_called_once_with(tasks=["JSTS", "JSICK"], languages=["jpn"]) + + @patch("jmteb.v2.tasks.mteb.get_tasks") + @patch("jmteb.v2.tasks.get_jmteb_benchmark") + def test_get_jmteb_tasks_filter_by_names(self, mock_benchmark, mock_get_tasks): + """Test filtering tasks by names.""" + mock_task1 = Mock() + mock_task1.metadata.name = "JSTS" + mock_task2 = Mock() + mock_task2.metadata.name = "JSICK" + mock_task3 = Mock() + mock_task3.metadata.name = "JaqketRetrieval" + + mock_benchmark.return_value.tasks = [mock_task1, mock_task2, mock_task3] + mock_get_tasks.return_value = [mock_task1, mock_task2] + + result = tasks.get_jmteb_tasks(task_names=["JSTS", "JSICK"]) + + assert len(result) == 2 + # Verify mteb.get_tasks was called with filtered task names + mock_get_tasks.assert_called_once_with(tasks=["JSTS", "JSICK"], languages=["jpn"]) + + @patch("jmteb.v2.tasks.mteb.get_tasks") + @patch("jmteb.v2.tasks.get_jmteb_benchmark") + def test_get_jmteb_tasks_filter_by_type(self, mock_benchmark, mock_get_tasks): + """Test filtering tasks by type.""" + mock_task1 = Mock() + mock_task1.metadata.name = "JSTS" + mock_task1.metadata.type = "STS" + mock_task2 = Mock() + mock_task2.metadata.name = "JaqketRetrieval" + mock_task2.metadata.type = "Retrieval" + + mock_benchmark.return_value.tasks = [mock_task1, mock_task2] + mock_get_tasks.return_value = [mock_task1] + + result = tasks.get_jmteb_tasks(task_types=["STS"]) + + assert len(result) == 1 + # Verify mteb.get_tasks was called with STS task name + mock_get_tasks.assert_called_once_with(tasks=["JSTS"], languages=["jpn"]) + + def test_get_jmteb_tasks_language_restriction(self): + """Test that get_jmteb_tasks restricts to Japanese language. + + Note: The function always restricts to Japanese (jpn) language, + so there's no need for a languages parameter. + """ + # This test just verifies that the function returns tasks + # The actual language restriction is tested through integration tests + result = tasks.get_jmteb_tasks(task_names=["JSTS"]) + assert len(result) > 0 + # Verify it returns a task with Japanese language + assert "jpn" in result[0].metadata.languages + + @patch("jmteb.v2.tasks.get_jmteb_tasks") + def test_get_task_by_name_success(self, mock_get_tasks): + """Test getting a task by name successfully.""" + mock_task = Mock() + mock_task.metadata.name = "JSTS" + mock_get_tasks.return_value = [mock_task] + + result = tasks.get_task_by_name("JSTS") + + assert result == mock_task + mock_get_tasks.assert_called_once_with(task_names=["JSTS"]) + + @patch("jmteb.v2.tasks.get_jmteb_tasks") + def test_get_task_by_name_not_found(self, mock_get_tasks): + """Test getting a task by name when not found.""" + mock_get_tasks.return_value = [] + + with pytest.raises(ValueError, match="Task 'InvalidTask' not found"): + tasks.get_task_by_name("InvalidTask") + + def test_get_task_category(self): + """Test getting task category.""" + assert tasks.get_task_category("JSTS") == "STS" + assert tasks.get_task_category("JaqketRetrieval") == "Retrieval" + assert tasks.get_task_category("AmazonReviewsClassification") == "Classification" + assert tasks.get_task_category("LivedoorNewsClustering.v2") == "Clustering" + assert tasks.get_task_category("ESCIReranking") == "Reranking" + assert tasks.get_task_category("UnknownTask") == "Unknown" + + def test_convert_v1_task_name(self): + """Test converting v1 task names to v2.""" + assert tasks.convert_v1_task_name("jsts") == "JSTS" + assert tasks.convert_v1_task_name("jaqket") == "JaqketRetrieval" + assert tasks.convert_v1_task_name("livedoor_news") == "LivedoorNewsClustering.v2" + # Unknown task should return as-is + assert tasks.convert_v1_task_name("unknown_task") == "unknown_task" + + def test_jmteb_tasks_constant(self): + """Test that JMTEB_TASKS constant is properly defined.""" + assert len(tasks.JMTEB_TASKS) == 28 + assert "JSTS" in tasks.JMTEB_TASKS + assert "JSICK" in tasks.JMTEB_TASKS + assert "JaqketRetrieval" in tasks.JMTEB_TASKS + + def test_task_categories_constant(self): + """Test that TASK_CATEGORIES constant covers all tasks.""" + for task_name in tasks.JMTEB_TASKS: + assert task_name in tasks.TASK_CATEGORIES diff --git a/tests/v2/test_utils.py b/tests/v2/test_utils.py new file mode 100644 index 0000000..53082fd --- /dev/null +++ b/tests/v2/test_utils.py @@ -0,0 +1,165 @@ +""" +Tests for JMTEB v2.0 utility functions. +""" + +import json + +from jmteb.v2 import utils + + +class TestUtils: + """Tests for utility functions.""" + + def test_load_prompts(self, tmp_path): + """Test loading prompts from YAML file.""" + prompt_file = tmp_path / "prompts.yaml" + prompt_file.write_text("query: 'query: '\npassage: 'passage: '\n") + + prompts = utils.load_prompts(prompt_file) + + assert prompts["query"] == "query: " + assert prompts["passage"] == "passage: " + + def test_load_prompts_empty(self, tmp_path): + """Test loading empty prompts file.""" + prompt_file = tmp_path / "empty.yaml" + prompt_file.write_text("") + + prompts = utils.load_prompts(prompt_file) + + assert prompts == {} + + def test_load_batch_sizes(self, tmp_path): + """Test loading batch sizes from YAML file.""" + batch_file = tmp_path / "batch_sizes.yaml" + batch_file.write_text("JSTS: 128\nJSICK: 64\nJaqketRetrieval: 32\n") + + batch_sizes = utils.load_batch_sizes(batch_file) + + assert batch_sizes["JSTS"] == 128 + assert batch_sizes["JSICK"] == 64 + assert batch_sizes["JaqketRetrieval"] == 32 + + def test_load_summary_exists(self, tmp_path): + """Test loading existing summary.json.""" + summary_file = tmp_path / "summary.json" + summary_data = {"STS": {"jsts": {"main_score": 82.14}}} + summary_file.write_text(json.dumps(summary_data)) + + summary = utils.load_summary(tmp_path) + + assert summary == summary_data + + def test_load_summary_not_exists(self, tmp_path): + """Test loading summary when file doesn't exist.""" + summary = utils.load_summary(tmp_path) + + assert summary == {} + + def test_save_summary(self, tmp_path): + """Test saving summary to file.""" + summary_data = {"STS": {"jsts": {"main_score": 82.14}}} + + utils.save_summary(summary_data, tmp_path) + + summary_file = tmp_path / "summary.json" + assert summary_file.exists() + + loaded = json.loads(summary_file.read_text()) + assert loaded == summary_data + + def test_extract_and_update_summary_jsts(self, tmp_path): + """Test extracting and updating summary for JSTS task.""" + # Create result file + result_file = tmp_path / "JSTS.json" + result_data = {"validation": [{"main_score": 0.8234, "cosine_spearman": 0.8234}]} + result_file.write_text(json.dumps(result_data)) + + summary = {} + utils.extract_and_update_summary( + task_name="JSTS", + main_metric="cosine_spearman", + save_path=tmp_path, + summary=summary, + eval_time=5.67, + ) + + assert "STS" in summary + assert "jsts" in summary["STS"] + assert summary["STS"]["jsts"]["main_score"] == 82.34 # 0.8234 * 100 + assert summary["STS"]["jsts"]["eval_time (s)"] == "5.67" + + def test_extract_and_update_summary_mldr(self, tmp_path): + """Test extracting and updating summary for MLDR task (uses dev split).""" + # Create result file + result_file = tmp_path / "MultiLongDocRetrieval.json" + result_data = {"dev": [{"main_score": 0.7512, "ndcg@10": 0.7512}]} + result_file.write_text(json.dumps(result_data)) + + summary = {} + utils.extract_and_update_summary( + task_name="MultiLongDocRetrieval", + main_metric="ndcg@10", + save_path=tmp_path, + summary=summary, + eval_time=120.5, + ) + + assert "Retrieval" in summary + assert "mldr_retrieval" in summary["Retrieval"] + assert summary["Retrieval"]["mldr_retrieval"]["main_score"] == 75.12 + + def test_extract_and_update_summary_cached(self, tmp_path): + """Test updating summary for cached result.""" + result_file = tmp_path / "JSICK.json" + result_data = {"test": [{"main_score": 0.7689}]} + result_file.write_text(json.dumps(result_data)) + + summary = {} + utils.extract_and_update_summary( + task_name="JSICK", + main_metric="cosine_spearman", + save_path=tmp_path, + summary=summary, + eval_time=-1, # Cached + ) + + assert summary["STS"]["jsick"]["eval_time (s)"] == "cached" + + def test_extract_and_update_summary_unknown_task(self, tmp_path): + """Test that unknown task is skipped.""" + summary = {} + utils.extract_and_update_summary( + task_name="UnknownTask", + main_metric="some_metric", + save_path=tmp_path, + summary=summary, + eval_time=10.0, + ) + + # Summary should remain empty + assert summary == {} + + def test_save_results(self, tmp_path): + """Test saving generic results.""" + results = {"task1": {"score": 0.85}, "task2": {"score": 0.92}} + + utils.save_results(results, tmp_path, filename="custom_results.json") + + result_file = tmp_path / "custom_results.json" + assert result_file.exists() + + loaded = json.loads(result_file.read_text()) + assert loaded == results + + def test_get_task_key_mapping(self): + """Test internal _get_task_key function.""" + # Test some common mappings + assert utils._get_task_key("JSTS") == "jsts" + assert utils._get_task_key("JaqketRetrieval") == "jaqket" + assert utils._get_task_key("LivedoorNewsClustering.v2") == "livedoor_news" + assert utils._get_task_key("AmazonReviewsClassification") == "amazon_review_classification" + assert utils._get_task_key("MultiLongDocRetrieval") == "mldr_retrieval" + + # Unknown task should be lowercased + assert utils._get_task_key("UnknownTask") == "unknowntask"