Skip to content
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
a04ef3e
feat:add Qwen2.5omni text modal processing
KKkai0315 Jan 22, 2026
c9333ab
add qwen2.5omni vision, audio modal
KKkai0315 Jan 23, 2026
e959822
fix: Enhance quantization modules. Introduced FixedActivationQDQ for …
chenghuaWang Jan 17, 2026
0672432
fix: Suppress deprecated comma-subscript warnings in CMake and remove…
chenghuaWang Jan 17, 2026
927f7eb
feat(qualcomm): Add installation targets for flatbuffers and MllmQNNB…
chenghuaWang Jan 19, 2026
d2e6b36
feat(qualcomm): Refactor Qwen3 model to integrate ConcatObserver for …
chenghuaWang Jan 19, 2026
48c259a
feat(cpu): Implement fill operations for various data types including…
chenghuaWang Jan 20, 2026
e976d11
feat(qnn): Enhance QNNBackend initialization with improved logging an…
chenghuaWang Jan 21, 2026
224d68e
feat(qnn): Update quantization handling and embedding output data typ…
chenghuaWang Jan 23, 2026
d2d5c09
feat(qwen3): Integrate QEmbedding for quantized embeddings and refine…
chenghuaWang Jan 23, 2026
c4f2306
fix
KKkai0315 Jan 23, 2026
a235a13
fix
KKkai0315 Jan 23, 2026
eeac11f
Merge remote-tracking branch 'refs/remotes/origin/main'
KKkai0315 Jan 23, 2026
adc3b64
add ConvTranspose1dOp & TanhOp
KKkai0315 Jan 24, 2026
674f97c
fix: fix Tanh op and add test for Tanh Op and ConvTranspose1d Op
KKkai0315 Jan 25, 2026
e1ba448
add minicpmo45
KKkai0315 Feb 23, 2026
8c0cda7
merge
KKkai0315 Feb 23, 2026
af574ae
add
KKkai0315 Feb 24, 2026
06b754c
add qwen2.5o talker
KKkai0315 Mar 5, 2026
5676edc
add
KKkai0315 Mar 5, 2026
4baacd3
Merge branch 'main' into main
oreomaker Mar 12, 2026
3bdf6e0
fix
KKkai0315 Mar 12, 2026
571b93d
add minicpm-o4.5 system ref audio prompt path
KKkai0315 Mar 12, 2026
d7c1b30
fix
KKkai0315 Mar 25, 2026
f185440
feat(mllm_kernel): simplify JIT usage in README and update kernel exa…
chenghuaWang Feb 17, 2026
289b74b
feat: update dependencies and refactor mobile module structure
chenghuaWang Feb 18, 2026
45c2fb7
feat: enhance configuration management and update dependencies
chenghuaWang Feb 18, 2026
14ce9cd
feat: add main entry points and configuration for pymllm and mllm-kernel
chenghuaWang Feb 18, 2026
027b0df
feat: enhance layer implementations and add new components
chenghuaWang Feb 19, 2026
f6aee67
feat: add initial files for pymllm architecture and launch functionality
chenghuaWang Feb 19, 2026
4fd3d34
feat: update dependencies and enhance configuration structure
chenghuaWang Feb 21, 2026
57ef372
feat: implement store_cache functionality and related components
chenghuaWang Feb 21, 2026
7f78efa
refactor: improve socket initialization in TokenizerProcess
chenghuaWang Feb 21, 2026
7f5d7d9
feat(engine): support batch generation and enable shared memory queue…
chenghuaWang Feb 27, 2026
5d13411
feat(mllm-kernel): add high-performance create_kv_indices CUDA kernel…
chenghuaWang Mar 2, 2026
f10363c
feat(sampling): add sampling module with FlashInfer acceleration and …
chenghuaWang Mar 2, 2026
c366ffc
feat(cuda): add fused GDN decode and RMSNorm+SiLU gating kernels for …
chenghuaWang Mar 9, 2026
506d61a
fix(attention): refine FlashInfer backend logic and improve RadixCach…
chenghuaWang Mar 17, 2026
a420a05
refactor: improve code readability and structure across multiple modules
chenghuaWang Mar 17, 2026
9d33d0d
chore: update installation instructions and add new skills for pymllm
chenghuaWang Mar 17, 2026
fd16226
refactor: enhance installation instructions and improve cache management
chenghuaWang Mar 17, 2026
a6a993a
refactor: enhance configuration management and improve process health…
chenghuaWang Mar 18, 2026
a78e3a0
feat(mllm-kernel): introduce new Marlin kernel implementations for ef…
chenghuaWang Mar 18, 2026
9453134
feat(quantization): implement quantization configuration loading and …
chenghuaWang Mar 18, 2026
5560096
feat(docs): update README files with latest news and model integratio…
chenghuaWang Mar 18, 2026
e78ea11
fix
KKkai0315 Mar 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ add_subdirectory(qwen2vl)
add_subdirectory(qwen2vl_tracer)
add_subdirectory(qwen2_5vl)
add_subdirectory(qwen2_5vl_tracer)
add_subdirectory(qwen2_5omni)
add_subdirectory(minicpm_o45)
add_subdirectory(llama)
add_subdirectory(minicpm_o)
add_subdirectory(minicpm4)
Expand Down
7 changes: 7 additions & 0 deletions examples/minicpm_o45/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
add_executable(mllm-minicpm-o45-runner main.cpp)
target_link_libraries(mllm-minicpm-o45-runner PRIVATE MllmRT MllmCPUBackend)
target_include_directories(mllm-minicpm-o45-runner PRIVATE ${MLLM_INCLUDE_DIR})

add_executable(mllm-minicpm-o45-runner-python main_python.cpp)
target_link_libraries(mllm-minicpm-o45-runner-python PRIVATE MllmRT MllmCPUBackend)
target_include_directories(mllm-minicpm-o45-runner-python PRIVATE ${MLLM_INCLUDE_DIR})
285 changes: 285 additions & 0 deletions examples/minicpm_o45/config_minicpm_o45.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
{
"architectures": [
"MiniCPMO"
],
"version": "4.5",
"attention_bias": false,
"attention_dropout": 0.0,
"audio_chunk_length": 1.0,
"audio_config": {
"_attn_implementation_autoset": true,
"_name_or_path": "openai/whisper-medium",
"activation_dropout": 0.0,
"activation_function": "gelu",
"apply_spec_augment": false,
"architectures": [
"MiniCPMWhisperEncoder"
],
"attention_dropout": 0.0,
"begin_suppress_tokens": [
220,
50257
],
"bos_token_id": 50257,
"classifier_proj_size": 256,
"d_model": 1024,
"decoder_attention_heads": 16,
"decoder_ffn_dim": 4096,
"decoder_layerdrop": 0.0,
"decoder_layers": 24,
"decoder_start_token_id": 50258,
"dropout": 0.0,
"encoder_attention_heads": 16,
"encoder_ffn_dim": 4096,
"encoder_layerdrop": 0.0,
"encoder_layers": 24,
"eos_token_id": 50257,
"forced_decoder_ids": [
[
1,
50259
],
[
2,
50359
],
[
3,
50363
]
],
"init_std": 0.02,
"mask_feature_length": 10,
"mask_feature_min_masks": 0,
"mask_feature_prob": 0.0,
"mask_time_length": 10,
"mask_time_min_masks": 2,
"mask_time_prob": 0.05,
"max_length": 448,
"max_source_positions": 1500,
"max_target_positions": 448,
"median_filter_width": 7,
"model_type": "whisper",
"num_hidden_layers": 24,
"num_mel_bins": 80,
"pad_token_id": 50257,
"scale_embedding": false,
"suppress_tokens": [
1,
2,
7,
8,
9,
10,
14,
25,
26,
27,
28,
29,
31,
58,
59,
60,
61,
62,
63,
90,
91,
92,
93,
359,
503,
522,
542,
873,
893,
902,
918,
922,
931,
1350,
1853,
1982,
2460,
2627,
3246,
3253,
3268,
3536,
3846,
3961,
4183,
4667,
6585,
6647,
7273,
9061,
9383,
10428,
10929,
11938,
12033,
12331,
12562,
13793,
14157,
14635,
15265,
15618,
16553,
16604,
18362,
18956,
20075,
21675,
22520,
26130,
26161,
26435,
28279,
29464,
31650,
32302,
32470,
36865,
42863,
47425,
49870,
50254,
50258,
50358,
50359,
50360,
50361,
50362
],
"torch_dtype": "float32",
"use_cache": true,
"use_weighted_layer_sum": false,
"vocab_size": 51865
},
"audio_pool_step": 5,
"auto_map": {
"AutoConfig": "configuration_minicpmo.MiniCPMOConfig",
"AutoModel": "modeling_minicpmo.MiniCPMO",
"AutoModelForCausalLM": "modeling_minicpmo.MiniCPMO"
},
"batch_vision_input": true,
"bos_token_id": 151643,
"drop_vision_last_layer": false,
"eos_token_id": 151645,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 4096,
"image_size": 448,
"init_audio": true,
"init_tts": true,
"init_vision": true,
"initializer_range": 0.02,
"intermediate_size": 12288,
"listen_speak_type": "asr",
"max_position_embeddings": 40960,
"max_window_layers": 36,
"model_type": "minicpmo",
"num_attention_heads": 32,
"num_hidden_layers": 36,
"num_key_value_heads": 8,
"patch_size": 14,
"query_num": 64,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 1000000,
"slice_config": {
"max_slice_nums": 1,
"model_type": "minicpmv",
"patch_size": 14,
"scale_resolution": 448
},
"slice_mode": true,
"sliding_window": null,
"stream_input": true,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.51.0",
"tts_config": {
"_attn_implementation_autoset": true,
"attention_type": "full_attention",
"attn_implementation": "sdpa",
"audio_bos_token_id": 151687,
"audio_tokenizer_sample_rate": 16000,
"audio_tokenizer_type": "s3tokenizer",
"aug_layer_loss_weight": false,
"aug_loss_weight": false,
"backbone_model": "llama",
"condition_type": "hidden_text_merge",
"cosyvoice_config_path": null,
"cosyvoice_model_dir": null,
"filter_tts_loss": false,
"hidden_act": "silu",
"hidden_size": 768,
"interleaved": false,
"intermediate_size": 3072,
"llm_dim": 4096,
"llm_dim_model_base": 256,
"llm_down_scale": false,
"llm_hidden_size": 4096,
"llm_intermediate_size": 768,
"long_weight": 0.1,
"max_position_embeddings": 4096,
"model_type": "minicpmtts",
"normalize_projected_hidden": true,
"num_attention_heads": 12,
"num_audio_tokens": 6562,
"num_hidden_layers": 20,
"num_key_value_heads": 12,
"num_mel_bins": 100,
"num_text_tokens": 152064,
"num_vq": 1,
"projector_type": "mlp",
"recomputed_chunks": 1,
"s3_stream_chunk_size": 25,
"s3_stream_generate": false,
"s3_stream_n_timesteps": 10,
"s3_stream_prelook_size": 3,
"short_weight": 0.1,
"streaming": false,
"streaming_audio_chunk_size": 50,
"streaming_sliding_window": false,
"streaming_sliding_window_audio_frame_rate": 50,
"streaming_sliding_window_audio_init_text_length": 10,
"streaming_sliding_window_audio_window_size": 300,
"streaming_sliding_window_average_speed": 5,
"streaming_sliding_window_fast_speed": 7,
"streaming_sliding_window_max_text_len": 500,
"streaming_sliding_window_slow_speed": 3,
"streaming_sliding_window_text_window_size": 50,
"streaming_text_chunk_max": 7,
"streaming_text_chunk_min": 3,
"streaming_text_reserved_len": 300,
"text_eos_token_id": 151692,
"tts_filter_loss_fix": false,
"use_llm_hidden_state": false,
"use_text": true,
"window_size": 2
},
"use_cache": true,
"use_image_id": true,
"use_sliding_window": false,
"vision_batch_size": 16,
"vision_config": {
"_attn_implementation_autoset": true,
"attention_dropout": 0.0,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 1152,
"image_size": 980,
"intermediate_size": 4304,
"layer_norm_eps": 1e-06,
"model_type": "siglip_vision_model",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 27,
"patch_size": 14
},
"vocab_size": 151748
}
Loading