.gitignore
.pre-commit-config.yaml
.readthedocs.yaml
CMakeLists.txt
CODE_OF_CONDUCT.md
CONTRIBUTING.md
DCO
Dockerfile
Dockerfile.310p
Dockerfile.310p.openEuler
Dockerfile.a3
Dockerfile.a3.openEuler
Dockerfile.openEuler
LICENSE
README.md
README.zh.md
codecov.yml
collect_env.py
format.sh
mypy.ini
packages.txt
pyproject.toml
requirements-dev.txt
requirements-lint.txt
requirements.txt
setup.py
typos.toml
.gemini/config.yaml
.github/Dockerfile.buildwheel
.github/PULL_REQUEST_TEMPLATE.md
.github/actionlint.yaml
.github/dependabot.yml
.github/format_pr_body.sh
.github/labeler.yml
.github/ISSUE_TEMPLATE/100-documentation.yml
.github/ISSUE_TEMPLATE/110-user-story.yml
.github/ISSUE_TEMPLATE/200-installation.yml
.github/ISSUE_TEMPLATE/300-usage.yml
.github/ISSUE_TEMPLATE/400-bug-report.yml
.github/ISSUE_TEMPLATE/500-feature-request.yml
.github/ISSUE_TEMPLATE/600-new-model.yml
.github/ISSUE_TEMPLATE/700-performance-discussion.yml
.github/ISSUE_TEMPLATE/750-RFC.yml
.github/ISSUE_TEMPLATE/800-others.yml
.github/ISSUE_TEMPLATE/900-release-checklist.yml
.github/ISSUE_TEMPLATE/config.yml
.github/workflows/_accuracy_test.yaml
.github/workflows/_e2e_test.yaml
.github/workflows/accuracy_test.yaml
.github/workflows/format_pr_body.yaml
.github/workflows/image_310p_openeuler.yml
.github/workflows/image_310p_ubuntu.yml
.github/workflows/image_a3_openeuler.yml
.github/workflows/image_a3_ubuntu.yml
.github/workflows/image_openeuler.yml
.github/workflows/image_ubuntu.yml
.github/workflows/label_merge_conflict.yml
.github/workflows/labeler.yml
.github/workflows/multi_node_test.yaml
.github/workflows/nightly_benchmarks.yaml
.github/workflows/pre-commit.yml
.github/workflows/release_code.yml
.github/workflows/release_whl.yml
.github/workflows/reminder_comment.yml
.github/workflows/vllm_ascend_dist.yaml
.github/workflows/vllm_ascend_doctest.yaml
.github/workflows/vllm_ascend_test.yaml
.github/workflows/vllm_ascend_test_310p.yaml
.github/workflows/vllm_ascend_test_full.yaml
.github/workflows/vllm_ascend_test_full_vllm_main.yaml
.github/workflows/vllm_ascend_test_models.yaml
.github/workflows/vllm_ascend_test_pd.yaml
.github/workflows/matchers/actionlint.json
.github/workflows/matchers/mypy.json
.github/workflows/matchers/ruff.json
benchmarks/README.md
benchmarks/requirements-bench.txt
benchmarks/ops/ben_vocabparallelembedding.py
benchmarks/scripts/convert_json_to_markdown.py
benchmarks/scripts/perf_result_template.md
benchmarks/scripts/run-performance-benchmarks.sh
benchmarks/tests/latency-tests.json
benchmarks/tests/serving-tests.json
benchmarks/tests/throughput-tests.json
cmake/utils.cmake
csrc/camem_allocator.cpp
csrc/ops.h
csrc/torch_binding.cpp
csrc/torch_binding_meta.cpp
csrc/utils.h
csrc/kernels/bgmv_expand.cpp
csrc/kernels/bgmv_shrink.cpp
csrc/kernels/get_masked_input_and_mask_kernel.cpp
csrc/kernels/pos_encoding_kernels.cpp
csrc/kernels/sgmv_expand.cpp
csrc/kernels/sgmv_shrink.cpp
csrc/kernels/types.h
csrc/kernels/utils.h
csrc/mla_preprocess/op_host/mla_preprocess.h
csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h
csrc/mla_preprocess/op_kernel/mla_preprocess.h
csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp
csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp
csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp
csrc/mla_preprocess/op_kernel/kernel/common.h
csrc/mla_preprocess/op_kernel/kernel/common_func.h
csrc/mla_preprocess/op_kernel/kernel/hardware.h
csrc/mla_preprocess/op_kernel/kernel/iterator.h
csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h
csrc/mla_preprocess/op_kernel/kernel/layout.h
csrc/mla_preprocess/op_kernel/kernel/mem.h
csrc/mla_preprocess/op_kernel/kernel/mma.h
csrc/mla_preprocess/op_kernel/kernel/set_fpc.h
csrc/mla_preprocess/op_kernel/kernel/simd.h
csrc/mla_preprocess/op_kernel/kernel/utils.h
csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc
docs/Makefile
docs/README.md
docs/requirements-docs.txt
docs/requirements-test.txt
docs/source/conf.py
docs/source/faqs.md
docs/source/index.md
docs/source/installation.md
docs/source/quick_start.md
docs/source/_templates/sections/header.html
docs/source/assets/multi_node_dp_deepseek.png
docs/source/assets/multi_node_dp_kimi.png
docs/source/community/contributors.md
docs/source/community/governance.md
docs/source/community/versioning_policy.md
docs/source/community/user_stories/index.md
docs/source/community/user_stories/llamafactory.md
docs/source/developer_guide/contribution/index.md
docs/source/developer_guide/contribution/testing.md
docs/source/developer_guide/evaluation/index.md
docs/source/developer_guide/evaluation/using_evalscope.md
docs/source/developer_guide/evaluation/using_lm_eval.md
docs/source/developer_guide/evaluation/using_opencompass.md
docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md
docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md
docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md
docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md
docs/source/developer_guide/evaluation/accuracy_report/index.md
docs/source/developer_guide/feature_guide/ACL_Graph.md
docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md
docs/source/developer_guide/feature_guide/index.md
docs/source/developer_guide/feature_guide/patch.md
docs/source/developer_guide/modeling/adding_a_new_model.md
docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md
docs/source/developer_guide/modeling/index.md
docs/source/developer_guide/performance/index.md
docs/source/developer_guide/performance/optimization_and_tuning.md
docs/source/developer_guide/performance/performance_benchmark.md
docs/source/developer_guide/performance/profile_execute_duration.md
docs/source/locale/zh_CN/LC_MESSAGES/faqs.po
docs/source/locale/zh_CN/LC_MESSAGES/index.po
docs/source/locale/zh_CN/LC_MESSAGES/installation.po
docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po
docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po
docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po
docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po
docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po
docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po
docs/source/logos/vllm-ascend-logo-text-dark.png
docs/source/logos/vllm-ascend-logo-text-light.png
docs/source/tutorials/index.md
docs/source/tutorials/multi-node_dsv3.2.md
docs/source/tutorials/multi_node.md
docs/source/tutorials/multi_node_kimi.md
docs/source/tutorials/multi_node_pd_disaggregation_llmdatadist.md
docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md
docs/source/tutorials/multi_node_qwen3vl.md
docs/source/tutorials/multi_node_ray.md
docs/source/tutorials/multi_npu.md
docs/source/tutorials/multi_npu_moge.md
docs/source/tutorials/multi_npu_quantization.md
docs/source/tutorials/multi_npu_qwen3_moe.md
docs/source/tutorials/multi_npu_qwen3_next.md
docs/source/tutorials/single_node_300i.md
docs/source/tutorials/single_node_pd_disaggregation_llmdatadist.md
docs/source/tutorials/single_npu.md
docs/source/tutorials/single_npu_audio.md
docs/source/tutorials/single_npu_multimodal.md
docs/source/tutorials/single_npu_qwen3_embedding.md
docs/source/tutorials/single_npu_qwen3_quantization.md
docs/source/user_guide/release_notes.md
docs/source/user_guide/configuration/additional_config.md
docs/source/user_guide/configuration/env_vars.md
docs/source/user_guide/configuration/index.md
docs/source/user_guide/feature_guide/eplb_swift_balancer.md
docs/source/user_guide/feature_guide/graph_mode.md
docs/source/user_guide/feature_guide/index.md
docs/source/user_guide/feature_guide/lora.md
docs/source/user_guide/feature_guide/quantization.md
docs/source/user_guide/feature_guide/sleep_mode.md
docs/source/user_guide/feature_guide/structured_output.md
docs/source/user_guide/feature_guide/images/eplb_img.png
docs/source/user_guide/feature_guide/images/structured_output_1.png
docs/source/user_guide/support_matrix/index.md
docs/source/user_guide/support_matrix/supported_features.md
docs/source/user_guide/support_matrix/supported_models.md
examples/offline_data_parallel.py
examples/offline_disaggregated_prefill_npu.py
examples/offline_dualbatch_overlap_npu.py
examples/offline_embed.py
examples/offline_external_launcher.py
examples/offline_inference_audio_language.py
examples/offline_inference_npu.py
examples/offline_inference_npu_tp2.py
examples/offline_inference_sleep_mode_npu.py
examples/offline_weight_load.py
examples/prompt_embedding_inference.py
examples/run_dp_server.sh
examples/disaggregated_prefill_v1/README.md
examples/disaggregated_prefill_v1/gen_ranktable.py
examples/disaggregated_prefill_v1/gen_ranktable.sh
examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py
examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py
examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md
examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md
examples/disaggregated_prefill_v1/run_server.sh
examples/eplb/eplb_deepseek.py
examples/eplb/eplb_strategy.py
examples/external_online_dp/README.md
examples/external_online_dp/launch_online_dp.py
examples/external_online_dp/run_dp_template.sh
tests/__init__.py
tests/e2e/__init__.py
tests/e2e/common.sh
tests/e2e/conftest.py
tests/e2e/model_utils.py
tests/e2e/run_disagg_pd.sh
tests/e2e/run_doctests.sh
tests/e2e/utils.py
tests/e2e/310p/test_offline_inference_310p.py
tests/e2e/310p/test_offline_inference_parallel_310p.py
tests/e2e/doctests/001-quickstart-test.sh
tests/e2e/doctests/002-pip-binary-installation-test.sh
tests/e2e/models/conftest.py
tests/e2e/models/report_template.md
tests/e2e/models/test_lm_eval_correctness.py
tests/e2e/models/configs/DeepSeek-V2-Lite.yaml
tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml
tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml
tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml
tests/e2e/models/configs/Qwen3-30B-A3B.yaml
tests/e2e/models/configs/Qwen3-8B-Base.yaml
tests/e2e/models/configs/Qwen3-8B.yaml
tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml
tests/e2e/models/configs/accuracy.txt
tests/e2e/multicard/test_data_parallel.py
tests/e2e/multicard/test_expert_parallel.py
tests/e2e/multicard/test_external_launcher.py
tests/e2e/multicard/test_full_graph_mode.py
tests/e2e/multicard/test_fused_moe_allgather_ep.py
tests/e2e/multicard/test_ilama_lora_tp2.py
tests/e2e/multicard/test_offline_inference_distributed.py
tests/e2e/multicard/test_pipeline_parallel.py
tests/e2e/multicard/test_prefix_caching.py
tests/e2e/multicard/test_qwen3_moe.py
tests/e2e/multicard/test_single_request_aclgraph.py
tests/e2e/multicard/test_torchair_graph_mode.py
tests/e2e/multicard/test_weight_loader.py
tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py
tests/e2e/nightly/models/test_qwen2_5_vl_7b.py
tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py
tests/e2e/nightly/models/test_qwen3_32b.py
tests/e2e/nightly/models/test_qwen3_32b_int8.py
tests/e2e/nightly/multi_node/__init__.py
tests/e2e/nightly/multi_node/test_multi_node.py
tests/e2e/nightly/multi_node/config/__init__.py
tests/e2e/nightly/multi_node/config/multi_node_config.py
tests/e2e/nightly/multi_node/config/utils.py
tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml
tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml
tests/e2e/nightly/multi_node/scripts/lws.yaml
tests/e2e/nightly/multi_node/scripts/run.sh
tests/e2e/pd_disaggreate/run_edge_case_test.sh
tests/e2e/pd_disaggreate/setup_pd.sh
tests/e2e/pd_disaggreate/test_edge_cases.py
tests/e2e/pd_disaggreate/test_pd_e2e.py
tests/e2e/prompts/example.txt
tests/e2e/singlecard/__init__.py
tests/e2e/singlecard/test_aclgraph.py
tests/e2e/singlecard/test_aclgraph_mem.py
tests/e2e/singlecard/test_ascend_scheduler.py
tests/e2e/singlecard/test_bge_model.py
tests/e2e/singlecard/test_camem.py
tests/e2e/singlecard/test_chunked.py
tests/e2e/singlecard/test_embedding.py
tests/e2e/singlecard/test_embedding_aclgraph.py
tests/e2e/singlecard/test_guided_decoding.py
tests/e2e/singlecard/test_ilama_lora.py
tests/e2e/singlecard/test_multistream_overlap_shared_expert.py
tests/e2e/singlecard/test_profile_execute_duration.py
tests/e2e/singlecard/test_quantization.py
tests/e2e/singlecard/test_sampler.py
tests/e2e/singlecard/test_vlm.py
tests/e2e/singlecard/ops/__init__.py
tests/e2e/singlecard/ops/test_bgmv_expand.py
tests/e2e/singlecard/ops/test_bgmv_shrink.py
tests/e2e/singlecard/ops/test_fused_moe.py
tests/e2e/singlecard/ops/test_gating_top_k_softmax.py
tests/e2e/singlecard/ops/test_mla_preprocess.py
tests/e2e/singlecard/ops/test_rotary_embedding.py
tests/e2e/singlecard/ops/test_vocabparallelembedding.py
tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
tests/e2e/vllm_interface/vllm_test.cfg
tests/e2e/vllm_interface/singlecard/test_sampler.py
tests/ut/__init__.py
tests/ut/base.py
tests/ut/conftest.py
tests/ut/test_ascend_config.py
tests/ut/test_envs.py
tests/ut/test_platform.py
tests/ut/test_utils.py
tests/ut/attention/test_attention_mask.py
tests/ut/attention/test_attention_v1.py
tests/ut/attention/test_mla_v1.py
tests/ut/compilation/test_acl_graph.py
tests/ut/core/test_schedule_config.py
tests/ut/core/test_scheduler.py
tests/ut/device_allocator/test_camem.py
tests/ut/distributed/test_communicator.py
tests/ut/distributed/test_determin_expert_map_all.py
tests/ut/distributed/test_parallel_state.py
tests/ut/distributed/device_communicators/test_pyhccl.py
tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py
tests/ut/eplb/adaptor/test_abstract_adaptor.py
tests/ut/eplb/core/test_eplb_device_transfer_loader.py
tests/ut/eplb/core/test_eplb_utils.py
tests/ut/eplb/core/policy/test_policy_abstract.py
tests/ut/eplb/core/policy/test_policy_dynamic_ep.py
tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py
tests/ut/eplb/core/policy/test_policy_factor.py
tests/ut/fake_weight/config.json
tests/ut/kv_connector/test_llmdatadist_connector.py
tests/ut/kv_connector/test_mooncake_connector.py
tests/ut/kv_connector/test_mooncake_layerwise_connector.py
tests/ut/kv_connector/test_remote_decode_lifecycle.py
tests/ut/kv_connector/test_remote_prefill_lifecycle.py
tests/ut/kv_connector/utils.py
tests/ut/models/__init__.py
tests/ut/models/conftest.py
tests/ut/models/test_qwen2_5_vl.py
tests/ut/models/test_qwen2_5_vl_without_padding.py
tests/ut/models/test_qwen2_vl.py
tests/ut/multistream/test_base.py
tests/ut/multistream/test_decorator.py
tests/ut/multistream/test_layers.py
tests/ut/multistream/test_metadata.py
tests/ut/multistream/test_ms_split.py
tests/ut/ops/expert_map.json
tests/ut/ops/test_activation.py
tests/ut/ops/test_comm_utils.py
tests/ut/ops/test_common_fused_moe.py
tests/ut/ops/test_expert_load_balancer.py
tests/ut/ops/test_fused_moe_prepare_and_finalize.py
tests/ut/ops/test_fused_ops.py
tests/ut/ops/test_layernorm.py
tests/ut/ops/test_linear.py
tests/ut/ops/test_moe_comm_method.py
tests/ut/ops/test_rotary_embedding.py
tests/ut/ops/test_token_dispatcher.py
tests/ut/ops/test_vocab_parallel_embedding.py
tests/ut/patch/worker/patch_common/test_patch_distributed.py
tests/ut/patch/worker/patch_common/test_patch_minicpm.py
tests/ut/quantization/test_quant_config.py
tests/ut/quantization/test_utils.py
tests/ut/quantization/test_w4a4_flatquant_dynamic.py
tests/ut/quantization/test_w4a8_dynamic.py
tests/ut/quantization/test_w8a8.py
tests/ut/quantization/test_w8a8_dynamic.py
tests/ut/sample/test_rejection_sampler.py
tests/ut/sample/test_sampler.py
tests/ut/sample/logits_processor/test_builtin.py
tests/ut/torchair/__init__.py
tests/ut/torchair/test_torchair_attention.py
tests/ut/torchair/test_torchair_mla.py
tests/ut/torchair/test_utils.py
tests/ut/torchair/models/test_torchair_deepseek_mtp.py
tests/ut/torchair/models/test_torchair_deepseek_v2.py
tests/ut/torchair/ops/test_torchair_fused_moe.py
tests/ut/torchair/ops/test_torchair_rotary_embedding.py
tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py
tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py
tests/ut/worker/test_input_batch.py
tests/ut/worker/test_model_runner_v1.py
tests/ut/worker/test_worker_v1.py
tools/actionlint.sh
tools/aisbench.py
tools/check_python_src_init.py
tools/check_repo.sh
tools/enforce_regex_import.py
tools/mooncake_installer.sh
tools/mypy.sh
tools/png-lint.sh
tools/send_mm_request.py
tools/shellcheck.sh
tools/sphinx-lint.sh
vllm_ascend/__init__.py
vllm_ascend/_version.py
vllm_ascend/ascend_config.py
vllm_ascend/ascend_forward_context.py
vllm_ascend/cpu_binding.py
vllm_ascend/envs.py
vllm_ascend/meta_registration.py
vllm_ascend/platform.py
vllm_ascend/utils.py
vllm_ascend.egg-info/PKG-INFO
vllm_ascend.egg-info/SOURCES.txt
vllm_ascend.egg-info/dependency_links.txt
vllm_ascend.egg-info/entry_points.txt
vllm_ascend.egg-info/requires.txt
vllm_ascend.egg-info/top_level.txt
vllm_ascend/attention/__init__.py
vllm_ascend/attention/attention_mask.py
vllm_ascend/attention/attention_v1.py
vllm_ascend/attention/mla_v1.py
vllm_ascend/attention/sfa_v1.py
vllm_ascend/attention/utils.py
vllm_ascend/compilation/__init__.py
vllm_ascend/compilation/acl_graph.py
vllm_ascend/core/__init__.py
vllm_ascend/core/recompute_schedule_config.py
vllm_ascend/core/recompute_scheduler.py
vllm_ascend/core/schedule_config.py
vllm_ascend/core/scheduler.py
vllm_ascend/device_allocator/__init__.py
vllm_ascend/device_allocator/camem.py
vllm_ascend/distributed/__init__.py
vllm_ascend/distributed/communicator.py
vllm_ascend/distributed/cpu_offload_connector.py
vllm_ascend/distributed/llmdatadist_c_mgr_connector.py
vllm_ascend/distributed/mooncake_connector.py
vllm_ascend/distributed/mooncake_layerwise_connector.py
vllm_ascend/distributed/parallel_state.py
vllm_ascend/distributed/utils.py
vllm_ascend/distributed/cpu_offload_manager/__init__.py
vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py
vllm_ascend/distributed/cpu_offload_manager/metadata.py
vllm_ascend/distributed/device_communicators/__init__.py
vllm_ascend/distributed/device_communicators/pyhccl.py
vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py
vllm_ascend/distributed/mooncake/__init__.py
vllm_ascend/distributed/mooncake/config_data.py
vllm_ascend/distributed/mooncake/kv_transfer.py
vllm_ascend/distributed/mooncake/mooncake_engine.py
vllm_ascend/distributed/mooncake/mooncake_store.py
vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py
vllm_ascend/distributed/mooncake/transfer_engine.py
vllm_ascend/eplb/__init__.py
vllm_ascend/eplb/eplb_updator.py
vllm_ascend/eplb/utils.py
vllm_ascend/eplb/adaptor/__init__.py
vllm_ascend/eplb/adaptor/abstract_adaptor.py
vllm_ascend/eplb/adaptor/vllm_adaptor.py
vllm_ascend/eplb/core/__init__.py
vllm_ascend/eplb/core/eplb_device_transfer_loader.py
vllm_ascend/eplb/core/eplb_utils.py
vllm_ascend/eplb/core/eplb_worker.py
vllm_ascend/eplb/core/policy/__init__.py
vllm_ascend/eplb/core/policy/policy_abstract.py
vllm_ascend/eplb/core/policy/policy_dynamic_ep.py
vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py
vllm_ascend/eplb/core/policy/policy_factory.py
vllm_ascend/eplb/core/policy/policy_flashlb.py
vllm_ascend/eplb/core/policy/policy_random.py
vllm_ascend/lora/__init__.py
vllm_ascend/lora/lora_ops.py
vllm_ascend/lora/punica_npu.py
vllm_ascend/lora/utils.py
vllm_ascend/models/__init__.py
vllm_ascend/models/deepseek_v3_2.py
vllm_ascend/models/qwen2_5_omni_thinker.py
vllm_ascend/models/qwen2_5_vl.py
vllm_ascend/models/qwen2_5_vl_without_padding.py
vllm_ascend/models/qwen2_vl.py
vllm_ascend/models/qwen3_next.py
vllm_ascend/models/layers/__init__.py
vllm_ascend/models/layers/mla.py
vllm_ascend/models/layers/sfa.py
vllm_ascend/multistream/__init__.py
vllm_ascend/multistream/base.py
vllm_ascend/multistream/context.py
vllm_ascend/multistream/decorator.py
vllm_ascend/multistream/layers.py
vllm_ascend/multistream/metadata.py
vllm_ascend/multistream/ms_split.py
vllm_ascend/ops/__init__.py
vllm_ascend/ops/activation.py
vllm_ascend/ops/attention.py
vllm_ascend/ops/casual_conv1d.py
vllm_ascend/ops/common_fused_moe.py
vllm_ascend/ops/expert_load_balancer.py
vllm_ascend/ops/fla.py
vllm_ascend/ops/layernorm.py
vllm_ascend/ops/linear.py
vllm_ascend/ops/linear_op.py
vllm_ascend/ops/register_custom_ops.py
vllm_ascend/ops/rotary_embedding.py
vllm_ascend/ops/sigmoid_gating.py
vllm_ascend/ops/vocab_parallel_embedding.py
vllm_ascend/ops/weight_prefetch.py
vllm_ascend/ops/moe/__init__.py
vllm_ascend/ops/moe/comm_utils.py
vllm_ascend/ops/moe/experts_selector.py
vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py
vllm_ascend/ops/moe/moe_comm_method.py
vllm_ascend/ops/moe/moe_mlp.py
vllm_ascend/ops/moe/token_dispatcher.py
vllm_ascend/patch/__init__.py
vllm_ascend/patch/platform/__init__.py
vllm_ascend/patch/platform/patch_config.py
vllm_ascend/patch/platform/patch_core.py
vllm_ascend/patch/platform/patch_distributed.py
vllm_ascend/patch/platform/patch_mamba_config.py
vllm_ascend/patch/platform/patch_message_queue.py
vllm_ascend/patch/platform/patch_multiproc_executor.py
vllm_ascend/patch/platform/patch_sched_yield.py
vllm_ascend/patch/worker/__init__.py
vllm_ascend/patch/worker/patch_attention_layer.py
vllm_ascend/patch/worker/patch_deepseek_mtp.py
vllm_ascend/patch/worker/patch_distributed.py
vllm_ascend/patch/worker/patch_logits.py
vllm_ascend/patch/worker/patch_minicpm.py
vllm_ascend/patch/worker/patch_multimodal_merge.py
vllm_ascend/patch/worker/patch_roberta.py
vllm_ascend/patch/worker/patch_triton.py
vllm_ascend/patch/worker/patch_weight_loader.py
vllm_ascend/quantization/__init__.py
vllm_ascend/quantization/quant_config.py
vllm_ascend/quantization/utils.py
vllm_ascend/quantization/w4a4_flatquant_dynamic.py
vllm_ascend/quantization/w4a8_dynamic.py
vllm_ascend/quantization/w8a8.py
vllm_ascend/quantization/w8a8_dynamic.py
vllm_ascend/sample/__init__.py
vllm_ascend/sample/rejection_sampler.py
vllm_ascend/sample/sampler.py
vllm_ascend/sample/logits_processor/__init__.py
vllm_ascend/sample/logits_processor/builtin.py
vllm_ascend/spec_decode/__init__.py
vllm_ascend/spec_decode/eagle_proposer.py
vllm_ascend/spec_decode/interface.py
vllm_ascend/spec_decode/mtp_proposer.py
vllm_ascend/spec_decode/ngram_proposer.py
vllm_ascend/torchair/__init__.py
vllm_ascend/torchair/torchair_attention.py
vllm_ascend/torchair/torchair_mla.py
vllm_ascend/torchair/torchair_model_runner.py
vllm_ascend/torchair/torchair_sfa.py
vllm_ascend/torchair/torchair_worker.py
vllm_ascend/torchair/utils.py
vllm_ascend/torchair/models/__init__.py
vllm_ascend/torchair/models/qwen2.py
vllm_ascend/torchair/models/qwen3_moe.py
vllm_ascend/torchair/models/torchair_deepseek_mtp.py
vllm_ascend/torchair/models/torchair_deepseek_v2.py
vllm_ascend/torchair/models/torchair_deepseek_v3.py
vllm_ascend/torchair/models/torchair_pangu_moe.py
vllm_ascend/torchair/ops/__init__.py
vllm_ascend/torchair/ops/sequence_parallel.py
vllm_ascend/torchair/ops/shared_weight_layer.py
vllm_ascend/torchair/ops/torchair_activation.py
vllm_ascend/torchair/ops/torchair_fused_moe.py
vllm_ascend/torchair/ops/torchair_layernorm.py
vllm_ascend/torchair/ops/torchair_rotary_embedding.py
vllm_ascend/torchair/ops/torchair_vocab_parallel_embedding.py
vllm_ascend/torchair/quantization/__init__.py
vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py
vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py
vllm_ascend/worker/__init__.py
vllm_ascend/worker/block_table.py
vllm_ascend/worker/model_runner_v1.py
vllm_ascend/worker/npu_input_batch.py
vllm_ascend/worker/worker_v1.py