.gitignore
.gitmodules
.nojekyll
LICENSE
README.md
requirements.txt
requirements_dev.txt
setup.py
apex/__init__.py
apex/RNN/README.md
apex/RNN/RNNBackend.py
apex/RNN/__init__.py
apex/RNN/cells.py
apex/RNN/models.py
apex/amp/README.md
apex/amp/__init__.py
apex/amp/__version__.py
apex/amp/_amp_state.py
apex/amp/_initialize.py
apex/amp/_process_optimizer.py
apex/amp/amp.py
apex/amp/compat.py
apex/amp/frontend.py
apex/amp/handle.py
apex/amp/opt.py
apex/amp/rnn_compat.py
apex/amp/scaler.py
apex/amp/utils.py
apex/amp/wrap.py
apex/amp/lists/__init__.py
apex/amp/lists/functional_overrides.py
apex/amp/lists/tensor_overrides.py
apex/amp/lists/torch_overrides.py
apex/contrib/__init__.py
apex/contrib/bottleneck/__init__.py
apex/contrib/bottleneck/bottleneck.py
apex/contrib/bottleneck/bottleneck_module_test.py
apex/contrib/bottleneck/test.py
apex/contrib/csrc/bottleneck/bottleneck.cpp
apex/contrib/csrc/fmha/fmha_api.cpp
apex/contrib/csrc/fmha/src/fmha.h
apex/contrib/csrc/fmha/src/fmha_dgrad_fp16_128_64_kernel.sm80.cu
apex/contrib/csrc/fmha/src/fmha_dgrad_fp16_256_64_kernel.sm80.cu
apex/contrib/csrc/fmha/src/fmha_dgrad_fp16_384_64_kernel.sm80.cu
apex/contrib/csrc/fmha/src/fmha_dgrad_fp16_512_64_kernel.sm80.cu
apex/contrib/csrc/fmha/src/fmha_dgrad_kernel_1xN_reload.h
apex/contrib/csrc/fmha/src/fmha_dgrad_kernel_1xN_reload_nl.h
apex/contrib/csrc/fmha/src/fmha_fprop_fp16_128_64_kernel.sm80.cu
apex/contrib/csrc/fmha/src/fmha_fprop_fp16_256_64_kernel.sm80.cu
apex/contrib/csrc/fmha/src/fmha_fprop_fp16_384_64_kernel.sm80.cu
apex/contrib/csrc/fmha/src/fmha_fprop_fp16_512_64_kernel.sm80.cu
apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN.h
apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN_nl.h
apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN_reload_v.h
apex/contrib/csrc/fmha/src/fmha_kernel.h
apex/contrib/csrc/fmha/src/fmha_noloop_reduce.cu
apex/contrib/csrc/fmha/src/fmha_utils.h
apex/contrib/csrc/fmha/src/fmha/gemm.h
apex/contrib/csrc/fmha/src/fmha/gmem_tile.h
apex/contrib/csrc/fmha/src/fmha/kernel_traits.h
apex/contrib/csrc/fmha/src/fmha/mask.h
apex/contrib/csrc/fmha/src/fmha/smem_tile.h
apex/contrib/csrc/fmha/src/fmha/softmax.h
apex/contrib/csrc/fmha/src/fmha/utils.h
apex/contrib/csrc/groupbn/batch_norm.cu
apex/contrib/csrc/groupbn/batch_norm.h
apex/contrib/csrc/groupbn/batch_norm_add_relu.cu
apex/contrib/csrc/groupbn/batch_norm_add_relu.h
apex/contrib/csrc/groupbn/cuda_utils.h
apex/contrib/csrc/groupbn/interface.cpp
apex/contrib/csrc/groupbn/ipc.cu
apex/contrib/csrc/groupbn/nhwc_batch_norm_kernel.h
apex/contrib/csrc/layer_norm/ln_api.cpp
apex/contrib/csrc/layer_norm/ln_bwd_semi_cuda_kernel.cu
apex/contrib/csrc/layer_norm/ln_fwd_cuda_kernel.cu
apex/contrib/csrc/layer_norm/ln_kernel_traits.h
apex/contrib/csrc/layer_norm/utils.cuh
apex/contrib/csrc/multihead_attn/additive_masked_softmax_dropout.cpp
apex/contrib/csrc/multihead_attn/additive_masked_softmax_dropout_cuda.cu
apex/contrib/csrc/multihead_attn/dropout.h
apex/contrib/csrc/multihead_attn/encdec_multihead_attn.cpp
apex/contrib/csrc/multihead_attn/encdec_multihead_attn_cuda.cu
apex/contrib/csrc/multihead_attn/encdec_multihead_attn_norm_add.cpp
apex/contrib/csrc/multihead_attn/encdec_multihead_attn_norm_add_cuda.cu
apex/contrib/csrc/multihead_attn/layer_norm.h
apex/contrib/csrc/multihead_attn/masked_softmax_dropout.cpp
apex/contrib/csrc/multihead_attn/masked_softmax_dropout_cuda.cu
apex/contrib/csrc/multihead_attn/philox.h
apex/contrib/csrc/multihead_attn/self_multihead_attn.cpp
apex/contrib/csrc/multihead_attn/self_multihead_attn_bias.cpp
apex/contrib/csrc/multihead_attn/self_multihead_attn_bias_additive_mask.cpp
apex/contrib/csrc/multihead_attn/self_multihead_attn_bias_additive_mask_cuda.cu
apex/contrib/csrc/multihead_attn/self_multihead_attn_bias_cuda.cu
apex/contrib/csrc/multihead_attn/self_multihead_attn_cuda.cu
apex/contrib/csrc/multihead_attn/self_multihead_attn_norm_add.cpp
apex/contrib/csrc/multihead_attn/self_multihead_attn_norm_add_cuda.cu
apex/contrib/csrc/multihead_attn/softmax.h
apex/contrib/csrc/multihead_attn/strided_batched_gemm.h
apex/contrib/csrc/optimizers/fused_adam_cuda.cpp
apex/contrib/csrc/optimizers/fused_adam_cuda_kernel.cu
apex/contrib/csrc/optimizers/fused_lamb_cuda.cpp
apex/contrib/csrc/optimizers/fused_lamb_cuda_kernel.cu
apex/contrib/csrc/optimizers/multi_tensor_distopt_adam.cpp
apex/contrib/csrc/optimizers/multi_tensor_distopt_adam_kernel.cu
apex/contrib/csrc/optimizers/multi_tensor_distopt_lamb.cpp
apex/contrib/csrc/optimizers/multi_tensor_distopt_lamb_kernel.cu
apex/contrib/csrc/transducer/transducer_joint.cpp
apex/contrib/csrc/transducer/transducer_joint_kernel.cu
apex/contrib/csrc/transducer/transducer_loss.cpp
apex/contrib/csrc/transducer/transducer_loss_kernel.cu
apex/contrib/csrc/xentropy/interface.cpp
apex/contrib/csrc/xentropy/xentropy_kernel.cu
apex/contrib/examples/multihead_attn/func_test_multihead_attn.py
apex/contrib/examples/multihead_attn/perf_test_multihead_attn.py
apex/contrib/fmha/__init__.py
apex/contrib/fmha/fmha.py
apex/contrib/groupbn/__init__.py
apex/contrib/groupbn/batch_norm.py
apex/contrib/layer_norm/__init__.py
apex/contrib/layer_norm/layer_norm.py
apex/contrib/multihead_attn/MHA_bwd.png
apex/contrib/multihead_attn/MHA_fwd.png
apex/contrib/multihead_attn/README.md
apex/contrib/multihead_attn/__init__.py
apex/contrib/multihead_attn/encdec_multihead_attn.py
apex/contrib/multihead_attn/encdec_multihead_attn_func.py
apex/contrib/multihead_attn/fast_encdec_multihead_attn_func.py
apex/contrib/multihead_attn/fast_encdec_multihead_attn_norm_add_func.py
apex/contrib/multihead_attn/fast_self_multihead_attn_func.py
apex/contrib/multihead_attn/fast_self_multihead_attn_norm_add_func.py
apex/contrib/multihead_attn/mask_softmax_dropout_func.py
apex/contrib/multihead_attn/self_multihead_attn.py
apex/contrib/multihead_attn/self_multihead_attn_func.py
apex/contrib/optimizers/__init__.py
apex/contrib/optimizers/distributed_fused_adam.py
apex/contrib/optimizers/distributed_fused_adam_v2.py
apex/contrib/optimizers/distributed_fused_adam_v3.py
apex/contrib/optimizers/distributed_fused_lamb.py
apex/contrib/optimizers/fp16_optimizer.py
apex/contrib/optimizers/fused_adam.py
apex/contrib/optimizers/fused_lamb.py
apex/contrib/optimizers/fused_sgd.py
apex/contrib/sparsity/README.md
apex/contrib/sparsity/__init__.py
apex/contrib/sparsity/asp.py
apex/contrib/sparsity/sparse_masklib.py
apex/contrib/sparsity/test/checkpointing_test_part1.py
apex/contrib/sparsity/test/checkpointing_test_part2.py
apex/contrib/sparsity/test/checkpointing_test_reference.py
apex/contrib/sparsity/test/toy_problem.py
apex/contrib/test/test_label_smoothing.py
apex/contrib/test/fmha/test_fmha.py
apex/contrib/test/fused_dense/test_fused_dense.py
apex/contrib/test/layer_norm/test_fast_layer_norm.py
apex/contrib/test/multihead_attn/test_encdec_multihead_attn.py
apex/contrib/test/multihead_attn/test_encdec_multihead_attn_norm_add.py
apex/contrib/test/multihead_attn/test_fast_self_multihead_attn_bias.py
apex/contrib/test/multihead_attn/test_mha_fused_softmax.py
apex/contrib/test/multihead_attn/test_self_multihead_attn.py
apex/contrib/test/multihead_attn/test_self_multihead_attn_norm_add.py
apex/contrib/test/transducer/test_transducer_joint.py
apex/contrib/test/transducer/test_transducer_loss.py
apex/contrib/test/transducer/transducer_ref.py
apex/contrib/transducer/__init__.py
apex/contrib/transducer/transducer.py
apex/contrib/xentropy/__init__.py
apex/contrib/xentropy/softmax_xentropy.py
apex/fp16_utils/README.md
apex/fp16_utils/__init__.py
apex/fp16_utils/fp16_optimizer.py
apex/fp16_utils/fp16util.py
apex/fp16_utils/loss_scaler.py
apex/fused_dense/__init__.py
apex/fused_dense/fused_dense.py
apex/mlp/__init__.py
apex/mlp/mlp.py
apex/multi_tensor_apply/__init__.py
apex/multi_tensor_apply/multi_tensor_apply.py
apex/normalization/__init__.py
apex/normalization/fused_layer_norm.py
apex/optimizers/__init__.py
apex/optimizers/fused_adagrad.py
apex/optimizers/fused_adam.py
apex/optimizers/fused_lamb.py
apex/optimizers/fused_novograd.py
apex/optimizers/fused_sgd.py
apex/parallel/LARC.py
apex/parallel/README.md
apex/parallel/__init__.py
apex/parallel/distributed.py
apex/parallel/multiproc.py
apex/parallel/optimized_sync_batchnorm.py
apex/parallel/optimized_sync_batchnorm_kernel.py
apex/parallel/sync_batchnorm.py
apex/parallel/sync_batchnorm_kernel.py
apex/pyprof/FAQs.md
apex/pyprof/README.md
apex/pyprof/__init__.py
apex/pyprof/examples/.gitignore
apex/pyprof/examples/lenet.py
apex/pyprof/examples/operators.py
apex/pyprof/examples/simple.py
apex/pyprof/examples/apex/README.md
apex/pyprof/examples/apex/fused_adam.py
apex/pyprof/examples/apex/fused_layer_norm.py
apex/pyprof/examples/apex/test.sh
apex/pyprof/examples/custom_func_module/README.md
apex/pyprof/examples/custom_func_module/custom_function.py
apex/pyprof/examples/custom_func_module/custom_module.py
apex/pyprof/examples/custom_func_module/test.sh
apex/pyprof/examples/imagenet/imagenet.py
apex/pyprof/examples/imagenet/test.sh
apex/pyprof/examples/jit/README.md
apex/pyprof/examples/jit/jit_script_function.py
apex/pyprof/examples/jit/jit_script_method.py
apex/pyprof/examples/jit/jit_trace_function.py
apex/pyprof/examples/jit/jit_trace_method.py
apex/pyprof/examples/jit/test.sh
apex/pyprof/examples/user_annotation/README.md
apex/pyprof/examples/user_annotation/resnet.py
apex/pyprof/examples/user_annotation/test.sh
apex/pyprof/nvtx/__init__.py
apex/pyprof/nvtx/nvmarker.py
apex/pyprof/parse/__init__.py
apex/pyprof/parse/__main__.py
apex/pyprof/parse/db.py
apex/pyprof/parse/kernel.py
apex/pyprof/parse/nvvp.py
apex/pyprof/parse/parse.py
apex/pyprof/prof/__init__.py
apex/pyprof/prof/__main__.py
apex/pyprof/prof/activation.py
apex/pyprof/prof/base.py
apex/pyprof/prof/blas.py
apex/pyprof/prof/conv.py
apex/pyprof/prof/convert.py
apex/pyprof/prof/data.py
apex/pyprof/prof/dropout.py
apex/pyprof/prof/embedding.py
apex/pyprof/prof/index_slice_join_mutate.py
apex/pyprof/prof/linear.py
apex/pyprof/prof/loss.py
apex/pyprof/prof/misc.py
apex/pyprof/prof/normalization.py
apex/pyprof/prof/optim.py
apex/pyprof/prof/output.py
apex/pyprof/prof/pointwise.py
apex/pyprof/prof/pooling.py
apex/pyprof/prof/prof.py
apex/pyprof/prof/randomSample.py
apex/pyprof/prof/recurrentCell.py
apex/pyprof/prof/reduction.py
apex/pyprof/prof/softmax.py
apex/pyprof/prof/usage.py
apex/pyprof/prof/utility.py
apex/reparameterization/README.md
apex/reparameterization/__init__.py
apex/reparameterization/reparameterization.py
apex/reparameterization/weight_norm.py
csrc/amp_C_frontend.cpp
csrc/compat.h
csrc/flatten_unflatten.cpp
csrc/fused_dense.cpp
csrc/fused_dense_cuda.cu
csrc/layer_norm_cuda.cpp
csrc/layer_norm_cuda_kernel.cu
csrc/mlp.cpp
csrc/mlp_cuda.cu
csrc/multi_tensor_adagrad.cu
csrc/multi_tensor_adam.cu
csrc/multi_tensor_apply.cuh
csrc/multi_tensor_axpby_kernel.cu
csrc/multi_tensor_l2norm_kernel.cu
csrc/multi_tensor_l2norm_scale_kernel.cu
csrc/multi_tensor_lamb.cu
csrc/multi_tensor_lamb_stage_1.cu
csrc/multi_tensor_lamb_stage_2.cu
csrc/multi_tensor_novograd.cu
csrc/multi_tensor_scale_kernel.cu
csrc/multi_tensor_sgd_kernel.cu
csrc/syncbn.cpp
csrc/type_shim.h
csrc/welford.cu
docs/Makefile
docs/source/advanced.rst
docs/source/amp.rst
docs/source/conf.py
docs/source/fp16_utils.rst
docs/source/index.rst
docs/source/layernorm.rst
docs/source/optimizers.rst
docs/source/parallel.rst
docs/source/_static/css/pytorch_theme.css
docs/source/_static/img/nv-pytorch2.png
docs/source/_templates/layout.html
examples/README.md
examples/dcgan/README.md
examples/dcgan/main_amp.py
examples/docker/Dockerfile
examples/docker/README.md
examples/imagenet/README.md
examples/imagenet/main_amp.py
examples/simple/distributed/README.md
examples/simple/distributed/distributed_data_parallel.py
examples/simple/distributed/run.sh
pytorch_extension.egg-info/PKG-INFO
pytorch_extension.egg-info/SOURCES.txt
pytorch_extension.egg-info/dependency_links.txt
pytorch_extension.egg-info/top_level.txt
tests/L0/run_test.py
tests/L0/run_amp/__init__.py
tests/L0/run_amp/test_add_param_group.py
tests/L0/run_amp/test_basic_casts.py
tests/L0/run_amp/test_cache.py
tests/L0/run_amp/test_checkpointing.py
tests/L0/run_amp/test_fused_sgd.py
tests/L0/run_amp/test_larc.py
tests/L0/run_amp/test_multi_tensor_axpby.py
tests/L0/run_amp/test_multi_tensor_l2norm.py
tests/L0/run_amp/test_multi_tensor_scale.py
tests/L0/run_amp/test_multiple_models_optimizers_losses.py
tests/L0/run_amp/test_promotion.py
tests/L0/run_amp/test_rnn.py
tests/L0/run_amp/utils.py
tests/L0/run_fp16util/__init__.py
tests/L0/run_fp16util/test_fp16util.py
tests/L0/run_fused_layer_norm/test_fused_layer_norm.py
tests/L0/run_mlp/test_mlp.py
tests/L0/run_optimizers/__init__.py
tests/L0/run_optimizers/test_dist_adam.py
tests/L0/run_optimizers/test_fused_novograd.py
tests/L0/run_optimizers/test_fused_optimizer.py
tests/L0/run_optimizers/test_lamb.py
tests/L0/run_pyprof_data/__init__.py
tests/L0/run_pyprof_data/test_pyprof_data.py
tests/L0/run_pyprof_nvtx/__init__.py
tests/L0/run_pyprof_nvtx/test_pyprof_nvtx.py
tests/L1/common/compare.py
tests/L1/common/main_amp.py
tests/L1/common/run_test.sh
tests/L1/cross_product/run.sh
tests/L1/cross_product_distributed/run.sh
tests/distributed/DDP/ddp_race_condition_test.py
tests/distributed/DDP/run_race_test.sh
tests/distributed/amp_master_params/amp_master_params.py
tests/distributed/amp_master_params/compare.py
tests/distributed/amp_master_params/run.sh
tests/distributed/synced_batchnorm/python_single_gpu_unit_test.py
tests/distributed/synced_batchnorm/single_gpu_unit_test.py
tests/distributed/synced_batchnorm/test_batchnorm1d.py
tests/distributed/synced_batchnorm/test_groups.py
tests/distributed/synced_batchnorm/two_gpu_test_different_batch_size.py
tests/distributed/synced_batchnorm/two_gpu_unit_test.py
tests/distributed/synced_batchnorm/unit_test.sh
tests/docker_extension_builds/run.sh