diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md index 7385a422..645da36f 100644 --- a/python/CHANGELOG.md +++ b/python/CHANGELOG.md @@ -9,13 +9,15 @@ Note that for version number starting with a `0`, i.e., `0.x.y`, a bump of `x` should be considered as a major (and thus potentially breaking) change. See semver guidelines for more details about this. -## [Unreleased] +## [0.6.1-rc0] - 2025-01-23 -- Upgrade model from `standard_v2_1` to `standard_v3_0`. This should result in a 3x faster inference speed, with the same overall accuracy. This new model should also be ~20% faster than `standard_v1`. +- Upgrade model from `standard_v2_1` to `standard_v3_0`. This should result in a 3x faster inference speed, with the same overall accuracy. This new model should also be ~20% faster than `standard_v1`. More details in the [models' changelog notes](../assets/models/CHANGELOG.md). +- With this release, we build a wheel for ubuntu, windows, macos (as we did for 0.6.0-rcX), and now an additional pure-python wheel to support the remaining platforms. The first three wheels ship the rust client; the pure-python wheel falls back to the (slower) python client. Both ship with the same new `standard_v3_0` model and same functionality. - New API: `get_output_content_types()`. This API returns the list of all possible outputs by the module. I.e., all possible values for `MagikaResult.prediction.output.label`. This is the list that is relevant for most clients. - New API: `get_model_content_types()`. This API returns the list of all possible outputs of the deep learning model. I.e., all possible values for `MagikaResult.prediction.dl.label`. Note that, in general, the list of "model outputs" is different than the "tool outputs" as in some cases the model is not even used, or the model's output is overwritten due to a low-confidence score, or other reasons. This API is useful mostly for debugging purposes; the vast majority of client should use `get_output_content_types()`. - `MagikaPrediction` now has an `overwrite_reason` field, specifying why and if the model's prediction was overwritten. + ## [0.6.0-rc3] - 2024-11-20 - Fixed problems with installing Magika via `uv` on MacOS. diff --git a/python/scripts/sync.py b/python/scripts/sync.py index f284e078..8565031b 100755 --- a/python/scripts/sync.py +++ b/python/scripts/sync.py @@ -11,7 +11,6 @@ PYTHON_ROOT_DIR = Path(__file__).parent.parent PUBLISHED_MODELS_NAMES = [ - "standard_v2_1", "standard_v3_0", ] diff --git a/python/src/magika/__init__.py b/python/src/magika/__init__.py index c9d1642d..2131fe83 100644 --- a/python/src/magika/__init__.py +++ b/python/src/magika/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. -__version__ = "0.6.1-dev" +__version__ = "0.6.1-rc0" import dotenv diff --git a/python/src/magika/models/fast_v2_1/config.min.json b/python/src/magika/models/fast_v2_1/config.min.json deleted file mode 100644 index d023c93d..00000000 --- a/python/src/magika/models/fast_v2_1/config.min.json +++ /dev/null @@ -1 +0,0 @@ -{"beg_size": 512, "mid_size": 0, "end_size": 512, "use_inputs_at_offsets": false, "medium_confidence_threshold": 0.5, "min_file_size_for_dl": 8, "padding_token": 256, "block_size": 4096, "target_labels_space": ["3gp", "ace", "ai", "aidl", "apk", "applebplist", "appleplist", "asm", "asp", "autohotkey", "autoit", "awk", "batch", "bazel", "bib", "bmp", "bzip", "c", "cab", "cat", "chm", "clojure", "cmake", "cobol", "coff", "coffeescript", "cpp", "crt", "crx", "cs", "csproj", "css", "csv", "dart", "deb", "dex", "dicom", "diff", "dm", "dmg", "doc", "dockerfile", "docx", "dsstore", "dwg", "dxf", "elf", "elixir", "emf", "eml", "epub", "erb", "erlang", "flac", "flv", "fortran", "gemfile", "gemspec", "gif", "gitattributes", "gitmodules", "go", "gradle", "groovy", "gzip", "h5", "handlebars", "haskell", "hcl", "hlp", "htaccess", "html", "icns", "ico", "ics", "ignorefile", "ini", "internetshortcut", "ipynb", "iso", "jar", "java", "javabytecode", "javascript", "jinja", "jp2", "jpeg", "json", "jsonl", "julia", "kotlin", "latex", "lha", "lisp", "lnk", "lua", "m3u", "m4", "macho", "makefile", "markdown", "matlab", "mht", "midi", "mkv", "mp3", "mp4", "mscompress", "msi", "mum", "npy", "npz", "nupkg", "objectivec", "ocaml", "odp", "ods", "odt", "ogg", "one", "onnx", "otf", "outlook", "parquet", "pascal", "pcap", "pdb", "pdf", "pebin", "pem", "perl", "php", "pickle", "png", "po", "postscript", "powershell", "ppt", "pptx", "prolog", "proteindb", "proto", "psd", "python", "pythonbytecode", "pytorch", "qt", "r", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scss", "sevenzip", "sgml", "shell", "smali", "snap", "solidity", "sql", "sqlite", "squashfs", "srt", "stlbinary", "stltext", "sum", "svg", "swf", "swift", "tar", "tcl", "textproto", "tga", "thumbsdb", "tiff", "toml", "torrent", "tsv", "ttf", "twig", "txt", "typescript", "unknown", "vba", "vcxproj", "verilog", "vhdl", "vtt", "vue", "wasm", "wav", "webm", "webp", "winregistry", "wmf", "woff", "woff2", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "yara", "zig", "zip", "zlibstream"], "thresholds": {"latex": 0.95, "pascal": 0.95}, "overwrite_map": {}} diff --git a/python/src/magika/models/fast_v2_1/model.onnx b/python/src/magika/models/fast_v2_1/model.onnx deleted file mode 100644 index 0c393634..00000000 Binary files a/python/src/magika/models/fast_v2_1/model.onnx and /dev/null differ diff --git a/python/src/magika/models/standard_v2_1/config.min.json b/python/src/magika/models/standard_v2_1/config.min.json deleted file mode 100644 index 31a62593..00000000 --- a/python/src/magika/models/standard_v2_1/config.min.json +++ /dev/null @@ -1 +0,0 @@ -{"beg_size": 2048, "mid_size": 0, "end_size": 2048, "use_inputs_at_offsets": false, "medium_confidence_threshold": 0.5, "min_file_size_for_dl": 8, "padding_token": 256, "block_size": 4096, "target_labels_space": ["3gp", "ace", "ai", "aidl", "apk", "applebplist", "appleplist", "asm", "asp", "autohotkey", "autoit", "awk", "batch", "bazel", "bib", "bmp", "bzip", "c", "cab", "cat", "chm", "clojure", "cmake", "cobol", "coff", "coffeescript", "cpp", "crt", "crx", "cs", "csproj", "css", "csv", "dart", "deb", "dex", "dicom", "diff", "dm", "dmg", "doc", "dockerfile", "docx", "dsstore", "dwg", "dxf", "elf", "elixir", "emf", "eml", "epub", "erb", "erlang", "flac", "flv", "fortran", "gemfile", "gemspec", "gif", "gitattributes", "gitmodules", "go", "gradle", "groovy", "gzip", "h5", "handlebars", "haskell", "hcl", "hlp", "htaccess", "html", "icns", "ico", "ics", "ignorefile", "ini", "internetshortcut", "ipynb", "iso", "jar", "java", "javabytecode", "javascript", "jinja", "jp2", "jpeg", "json", "jsonl", "julia", "kotlin", "latex", "lha", "lisp", "lnk", "lua", "m3u", "m4", "macho", "makefile", "markdown", "matlab", "mht", "midi", "mkv", "mp3", "mp4", "mscompress", "msi", "mum", "npy", "npz", "nupkg", "objectivec", "ocaml", "odp", "ods", "odt", "ogg", "one", "onnx", "otf", "outlook", "parquet", "pascal", "pcap", "pdb", "pdf", "pebin", "pem", "perl", "php", "pickle", "png", "po", "postscript", "powershell", "ppt", "pptx", "prolog", "proteindb", "proto", "psd", "python", "pythonbytecode", "pytorch", "qt", "r", "rar", "rdf", "rpm", "rst", "rtf", "ruby", "rust", "scala", "scss", "sevenzip", "sgml", "shell", "smali", "snap", "solidity", "sql", "sqlite", "squashfs", "srt", "stlbinary", "stltext", "sum", "svg", "swf", "swift", "tar", "tcl", "textproto", "tga", "thumbsdb", "tiff", "toml", "torrent", "tsv", "ttf", "twig", "txt", "typescript", "unknown", "vba", "vcxproj", "verilog", "vhdl", "vtt", "vue", "wasm", "wav", "webm", "webp", "winregistry", "wmf", "woff", "woff2", "xar", "xls", "xlsb", "xlsx", "xml", "xpi", "xz", "yaml", "yara", "zig", "zip", "zlibstream"], "thresholds": {"latex": 0.95, "pascal": 0.95}, "overwrite_map": {}} diff --git a/python/src/magika/models/standard_v2_1/model.onnx b/python/src/magika/models/standard_v2_1/model.onnx deleted file mode 100644 index ec76c2d3..00000000 Binary files a/python/src/magika/models/standard_v2_1/model.onnx and /dev/null differ