CMakeLists.txt
LICENSE
MANIFEST.in
NOTICE
README.md
setup.py
3rdparty/cub/cub/cub.cuh
3rdparty/cub/cub/util_allocator.cuh
3rdparty/cub/cub/util_arch.cuh
3rdparty/cub/cub/util_debug.cuh
3rdparty/cub/cub/util_device.cuh
3rdparty/cub/cub/util_macro.cuh
3rdparty/cub/cub/util_namespace.cuh
3rdparty/cub/cub/util_ptx.cuh
3rdparty/cub/cub/util_type.cuh
3rdparty/cub/cub/agent/agent_histogram.cuh
3rdparty/cub/cub/agent/agent_radix_sort_downsweep.cuh
3rdparty/cub/cub/agent/agent_radix_sort_upsweep.cuh
3rdparty/cub/cub/agent/agent_reduce.cuh
3rdparty/cub/cub/agent/agent_reduce_by_key.cuh
3rdparty/cub/cub/agent/agent_rle.cuh
3rdparty/cub/cub/agent/agent_scan.cuh
3rdparty/cub/cub/agent/agent_segment_fixup.cuh
3rdparty/cub/cub/agent/agent_select_if.cuh
3rdparty/cub/cub/agent/agent_spmv_orig.cuh
3rdparty/cub/cub/agent/single_pass_scan_operators.cuh
3rdparty/cub/cub/block/block_adjacent_difference.cuh
3rdparty/cub/cub/block/block_discontinuity.cuh
3rdparty/cub/cub/block/block_exchange.cuh
3rdparty/cub/cub/block/block_histogram.cuh
3rdparty/cub/cub/block/block_load.cuh
3rdparty/cub/cub/block/block_radix_rank.cuh
3rdparty/cub/cub/block/block_radix_sort.cuh
3rdparty/cub/cub/block/block_raking_layout.cuh
3rdparty/cub/cub/block/block_reduce.cuh
3rdparty/cub/cub/block/block_scan.cuh
3rdparty/cub/cub/block/block_shuffle.cuh
3rdparty/cub/cub/block/block_store.cuh
3rdparty/cub/cub/block/specializations/block_histogram_atomic.cuh
3rdparty/cub/cub/block/specializations/block_histogram_sort.cuh
3rdparty/cub/cub/block/specializations/block_reduce_raking.cuh
3rdparty/cub/cub/block/specializations/block_reduce_raking_commutative_only.cuh
3rdparty/cub/cub/block/specializations/block_reduce_warp_reductions.cuh
3rdparty/cub/cub/block/specializations/block_scan_raking.cuh
3rdparty/cub/cub/block/specializations/block_scan_warp_scans.cuh
3rdparty/cub/cub/block/specializations/block_scan_warp_scans2.cuh
3rdparty/cub/cub/block/specializations/block_scan_warp_scans3.cuh
3rdparty/cub/cub/device/device_histogram.cuh
3rdparty/cub/cub/device/device_partition.cuh
3rdparty/cub/cub/device/device_radix_sort.cuh
3rdparty/cub/cub/device/device_reduce.cuh
3rdparty/cub/cub/device/device_run_length_encode.cuh
3rdparty/cub/cub/device/device_scan.cuh
3rdparty/cub/cub/device/device_segmented_radix_sort.cuh
3rdparty/cub/cub/device/device_segmented_reduce.cuh
3rdparty/cub/cub/device/device_select.cuh
3rdparty/cub/cub/device/device_spmv.cuh
3rdparty/cub/cub/device/dispatch/dispatch_histogram.cuh
3rdparty/cub/cub/device/dispatch/dispatch_radix_sort.cuh
3rdparty/cub/cub/device/dispatch/dispatch_reduce.cuh
3rdparty/cub/cub/device/dispatch/dispatch_reduce_by_key.cuh
3rdparty/cub/cub/device/dispatch/dispatch_rle.cuh
3rdparty/cub/cub/device/dispatch/dispatch_scan.cuh
3rdparty/cub/cub/device/dispatch/dispatch_select_if.cuh
3rdparty/cub/cub/device/dispatch/dispatch_spmv_orig.cuh
3rdparty/cub/cub/grid/grid_barrier.cuh
3rdparty/cub/cub/grid/grid_even_share.cuh
3rdparty/cub/cub/grid/grid_mapping.cuh
3rdparty/cub/cub/grid/grid_queue.cuh
3rdparty/cub/cub/host/mutex.cuh
3rdparty/cub/cub/iterator/arg_index_input_iterator.cuh
3rdparty/cub/cub/iterator/cache_modified_input_iterator.cuh
3rdparty/cub/cub/iterator/cache_modified_output_iterator.cuh
3rdparty/cub/cub/iterator/constant_input_iterator.cuh
3rdparty/cub/cub/iterator/counting_input_iterator.cuh
3rdparty/cub/cub/iterator/discard_output_iterator.cuh
3rdparty/cub/cub/iterator/tex_obj_input_iterator.cuh
3rdparty/cub/cub/iterator/tex_ref_input_iterator.cuh
3rdparty/cub/cub/iterator/transform_input_iterator.cuh
3rdparty/cub/cub/thread/thread_load.cuh
3rdparty/cub/cub/thread/thread_operators.cuh
3rdparty/cub/cub/thread/thread_reduce.cuh
3rdparty/cub/cub/thread/thread_scan.cuh
3rdparty/cub/cub/thread/thread_search.cuh
3rdparty/cub/cub/thread/thread_store.cuh
3rdparty/cub/cub/warp/warp_reduce.cuh
3rdparty/cub/cub/warp/warp_scan.cuh
3rdparty/cub/cub/warp/specializations/warp_reduce_shfl.cuh
3rdparty/cub/cub/warp/specializations/warp_reduce_smem.cuh
3rdparty/cub/cub/warp/specializations/warp_scan_shfl.cuh
3rdparty/cub/cub/warp/specializations/warp_scan_smem.cuh
3rdparty/cub/examples/block/example_block_radix_sort.cu
3rdparty/cub/examples/block/example_block_reduce.cu
3rdparty/cub/examples/block/example_block_scan.cu
3rdparty/cub/examples/block/reduce_by_key.cu
3rdparty/cub/examples/device/example_device_partition_flagged.cu
3rdparty/cub/examples/device/example_device_partition_if.cu
3rdparty/cub/examples/device/example_device_radix_sort.cu
3rdparty/cub/examples/device/example_device_reduce.cu
3rdparty/cub/examples/device/example_device_scan.cu
3rdparty/cub/examples/device/example_device_select_flagged.cu
3rdparty/cub/examples/device/example_device_select_if.cu
3rdparty/cub/examples/device/example_device_select_unique.cu
3rdparty/cub/examples/device/example_device_sort_find_non_trivial_runs.cu
3rdparty/cub/experimental/histogram_compare.cu
3rdparty/cub/experimental/sparse_matrix.h
3rdparty/cub/experimental/spmv_compare.cu
3rdparty/cub/experimental/defunct/example_coo_spmv.cu
3rdparty/cub/experimental/defunct/test_device_seg_reduce.cu
3rdparty/cub/experimental/histogram/histogram_cub.h
3rdparty/cub/experimental/histogram/histogram_gmem_atomics.h
3rdparty/cub/experimental/histogram/histogram_smem_atomics.h
3rdparty/cub/test/half.h
3rdparty/cub/test/link_a.cu
3rdparty/cub/test/link_b.cu
3rdparty/cub/test/link_main.cpp
3rdparty/cub/test/mersenne.h
3rdparty/cub/test/test_allocator.cu
3rdparty/cub/test/test_block_histogram.cu
3rdparty/cub/test/test_block_load_store.cu
3rdparty/cub/test/test_block_radix_sort.cu
3rdparty/cub/test/test_block_reduce.cu
3rdparty/cub/test/test_block_scan.cu
3rdparty/cub/test/test_device_histogram.cu
3rdparty/cub/test/test_device_radix_sort.cu
3rdparty/cub/test/test_device_reduce.cu
3rdparty/cub/test/test_device_reduce_by_key.cu
3rdparty/cub/test/test_device_run_length_encode.cu
3rdparty/cub/test/test_device_scan.cu
3rdparty/cub/test/test_device_select_if.cu
3rdparty/cub/test/test_device_select_unique.cu
3rdparty/cub/test/test_grid_barrier.cu
3rdparty/cub/test/test_iterator.cu
3rdparty/cub/test/test_util.h
3rdparty/cub/test/test_warp_reduce.cu
3rdparty/cub/test/test_warp_scan.cu
3rdparty/cub/tune/tune_device_reduce.cu
3rdparty/pybind11/CMakeLists.txt
3rdparty/pybind11/docs/requirements.txt
3rdparty/pybind11/include/pybind11/attr.h
3rdparty/pybind11/include/pybind11/buffer_info.h
3rdparty/pybind11/include/pybind11/cast.h
3rdparty/pybind11/include/pybind11/chrono.h
3rdparty/pybind11/include/pybind11/common.h
3rdparty/pybind11/include/pybind11/complex.h
3rdparty/pybind11/include/pybind11/eigen.h
3rdparty/pybind11/include/pybind11/embed.h
3rdparty/pybind11/include/pybind11/eval.h
3rdparty/pybind11/include/pybind11/functional.h
3rdparty/pybind11/include/pybind11/iostream.h
3rdparty/pybind11/include/pybind11/numpy.h
3rdparty/pybind11/include/pybind11/operators.h
3rdparty/pybind11/include/pybind11/options.h
3rdparty/pybind11/include/pybind11/pybind11.h
3rdparty/pybind11/include/pybind11/pytypes.h
3rdparty/pybind11/include/pybind11/stl.h
3rdparty/pybind11/include/pybind11/stl_bind.h
3rdparty/pybind11/include/pybind11/detail/class.h
3rdparty/pybind11/include/pybind11/detail/common.h
3rdparty/pybind11/include/pybind11/detail/descr.h
3rdparty/pybind11/include/pybind11/detail/init.h
3rdparty/pybind11/include/pybind11/detail/internals.h
3rdparty/pybind11/include/pybind11/detail/typeid.h
3rdparty/pybind11/tests/CMakeLists.txt
3rdparty/pybind11/tests/constructor_stats.h
3rdparty/pybind11/tests/cross_module_gil_utils.cpp
3rdparty/pybind11/tests/local_bindings.h
3rdparty/pybind11/tests/object.h
3rdparty/pybind11/tests/pybind11_cross_module_tests.cpp
3rdparty/pybind11/tests/pybind11_tests.cpp
3rdparty/pybind11/tests/pybind11_tests.h
3rdparty/pybind11/tests/test_async.cpp
3rdparty/pybind11/tests/test_buffers.cpp
3rdparty/pybind11/tests/test_builtin_casters.cpp
3rdparty/pybind11/tests/test_call_policies.cpp
3rdparty/pybind11/tests/test_callbacks.cpp
3rdparty/pybind11/tests/test_chrono.cpp
3rdparty/pybind11/tests/test_class.cpp
3rdparty/pybind11/tests/test_constants_and_functions.cpp
3rdparty/pybind11/tests/test_copy_move.cpp
3rdparty/pybind11/tests/test_docstring_options.cpp
3rdparty/pybind11/tests/test_eigen.cpp
3rdparty/pybind11/tests/test_enum.cpp
3rdparty/pybind11/tests/test_eval.cpp
3rdparty/pybind11/tests/test_exceptions.cpp
3rdparty/pybind11/tests/test_factory_constructors.cpp
3rdparty/pybind11/tests/test_gil_scoped.cpp
3rdparty/pybind11/tests/test_iostream.cpp
3rdparty/pybind11/tests/test_kwargs_and_defaults.cpp
3rdparty/pybind11/tests/test_local_bindings.cpp
3rdparty/pybind11/tests/test_methods_and_attributes.cpp
3rdparty/pybind11/tests/test_modules.cpp
3rdparty/pybind11/tests/test_multiple_inheritance.cpp
3rdparty/pybind11/tests/test_numpy_array.cpp
3rdparty/pybind11/tests/test_numpy_dtypes.cpp
3rdparty/pybind11/tests/test_numpy_vectorize.cpp
3rdparty/pybind11/tests/test_opaque_types.cpp
3rdparty/pybind11/tests/test_operator_overloading.cpp
3rdparty/pybind11/tests/test_pickling.cpp
3rdparty/pybind11/tests/test_pytypes.cpp
3rdparty/pybind11/tests/test_sequences_and_iterators.cpp
3rdparty/pybind11/tests/test_smart_ptr.cpp
3rdparty/pybind11/tests/test_stl.cpp
3rdparty/pybind11/tests/test_stl_binders.cpp
3rdparty/pybind11/tests/test_tagbased_polymorphic.cpp
3rdparty/pybind11/tests/test_union.cpp
3rdparty/pybind11/tests/test_virtual_functions.cpp
3rdparty/pybind11/tests/test_cmake_build/CMakeLists.txt
3rdparty/pybind11/tests/test_cmake_build/embed.cpp
3rdparty/pybind11/tests/test_cmake_build/main.cpp
3rdparty/pybind11/tests/test_cmake_build/installed_embed/CMakeLists.txt
3rdparty/pybind11/tests/test_cmake_build/installed_function/CMakeLists.txt
3rdparty/pybind11/tests/test_cmake_build/installed_target/CMakeLists.txt
3rdparty/pybind11/tests/test_cmake_build/subdirectory_embed/CMakeLists.txt
3rdparty/pybind11/tests/test_cmake_build/subdirectory_function/CMakeLists.txt
3rdparty/pybind11/tests/test_cmake_build/subdirectory_target/CMakeLists.txt
3rdparty/pybind11/tests/test_embed/CMakeLists.txt
3rdparty/pybind11/tests/test_embed/catch.cpp
3rdparty/pybind11/tests/test_embed/external_module.cpp
3rdparty/pybind11/tests/test_embed/test_interpreter.cpp
3rdparty/pybind11/tools/FindCatch.cmake
3rdparty/pybind11/tools/FindEigen3.cmake
3rdparty/pybind11/tools/FindPythonLibsNew.cmake
3rdparty/pybind11/tools/pybind11Tools.cmake
lightseq/__init__.py
lightseq.egg-info/PKG-INFO
lightseq.egg-info/SOURCES.txt
lightseq.egg-info/dependency_links.txt
lightseq.egg-info/entry_points.txt
lightseq.egg-info/not-zip-safe
lightseq.egg-info/requires.txt
lightseq.egg-info/top_level.txt
lightseq/inference/kernels/CMakeLists.txt
lightseq/inference/kernels/common.h
lightseq/inference/kernels/embKernels.cc.cu
lightseq/inference/kernels/embKernels.h
lightseq/inference/kernels/gptKernels.cc.cu
lightseq/inference/kernels/gptKernels.h
lightseq/inference/kernels/multilgKernels.cc.cu
lightseq/inference/kernels/multilgKernels.h
lightseq/inference/kernels/transformerKernels.cc.cu
lightseq/inference/kernels/transformerKernels.h
lightseq/inference/model/CMakeLists.txt
lightseq/inference/model/bert_encoder.cc.cu
lightseq/inference/model/bert_encoder.h
lightseq/inference/model/decoder.cc.cu
lightseq/inference/model/decoder.h
lightseq/inference/model/encoder.cc.cu
lightseq/inference/model/encoder.h
lightseq/inference/model/gpt_encoder.cc.cu
lightseq/inference/model/gpt_encoder.h
lightseq/inference/proto/CMakeLists.txt
lightseq/inference/proto/bert.proto
lightseq/inference/proto/bert_weight.cc
lightseq/inference/proto/bert_weight.h
lightseq/inference/proto/gpt.proto
lightseq/inference/proto/gpt_weight.cc
lightseq/inference/proto/gpt_weight.h
lightseq/inference/proto/transformer.proto
lightseq/inference/proto/transformer_weight.cc
lightseq/inference/proto/transformer_weight.h
lightseq/inference/pywrapper/CMakeLists.txt
lightseq/inference/pywrapper/bert.cc.cu
lightseq/inference/pywrapper/gpt.cc.cu
lightseq/inference/pywrapper/transformer.cc.cu
lightseq/inference/pywrapper/transformer_decoder.cc.cu
lightseq/inference/pywrapper/wrapper.cc
lightseq/inference/server/CMakeLists.txt
lightseq/inference/server/custom.h
lightseq/inference/server/decoder_generate_server.cc.cu
lightseq/inference/server/generate_server.cc.cu
lightseq/inference/server/gpt_generate_server.cc.cu
lightseq/inference/server/gptlm_server.cc.cu
lightseq/inference/server/libserver.ldscript
lightseq/inference/server/model_config.h
lightseq/inference/server/model_config.proto
lightseq/inference/server/model_config_cuda.h
lightseq/inference/server/transformer_server.cc.cu
lightseq/inference/tools/CMakeLists.txt
lightseq/inference/tools/util.cc.cu
lightseq/inference/tools/util.h
lightseq/training/__init__.py
lightseq/training/cli/__init__.py
lightseq/training/cli/lightseq_deepspeed_cli.py
lightseq/training/cli/lightseq_fairseq_generate_cli.py
lightseq/training/cli/lightseq_fairseq_train_cli.py
lightseq/training/cli/lightseq_fairseq_validate_cli.py
lightseq/training/cli/fs_modules/__init__.py
lightseq/training/cli/fs_modules/ls_adam.py
lightseq/training/cli/fs_modules/ls_fs_transformer_decoder_layer.py
lightseq/training/cli/fs_modules/ls_label_smoothed_cross_entropy.py
lightseq/training/cli/fs_modules/ls_transformer.py
lightseq/training/csrc/kernels/cross_entropy.cu
lightseq/training/csrc/kernels/cublas_wrappers.cu
lightseq/training/csrc/kernels/cuda_util.cu
lightseq/training/csrc/kernels/dropout_kernels.cu
lightseq/training/csrc/kernels/embedding_kernels.cu
lightseq/training/csrc/kernels/fused_adam_kernel.cu
lightseq/training/csrc/kernels/general_kernels.cu
lightseq/training/csrc/kernels/normalize_kernels.cu
lightseq/training/csrc/kernels/softmax_kernels.cu
lightseq/training/csrc/kernels/transform_kernels.cu
lightseq/training/csrc/kernels/includes/block_reduce.h
lightseq/training/csrc/kernels/includes/cublas_wrappers.h
lightseq/training/csrc/kernels/includes/cuda_util.h
lightseq/training/csrc/kernels/includes/fused_adam_kernel.h
lightseq/training/csrc/kernels/includes/kernels.h
lightseq/training/csrc/kernels/includes/ls_cub.cuh
lightseq/training/csrc/kernels/includes/multi_tensor_apply.cuh
lightseq/training/csrc/ops/cross_entropy_layer.cpp
lightseq/training/csrc/ops/transformer_decoder_layer.cpp
lightseq/training/csrc/ops/transformer_embedding_layer.cpp
lightseq/training/csrc/ops/transformer_encoder_layer.cpp
lightseq/training/csrc/ops/includes/context.h
lightseq/training/csrc/ops/includes/cross_entropy_layer.h
lightseq/training/csrc/ops/includes/dropout.h
lightseq/training/csrc/ops/includes/feed_forward.h
lightseq/training/csrc/ops/includes/normalize_layer.h
lightseq/training/csrc/ops/includes/softmax.h
lightseq/training/csrc/ops/includes/strided_batch_gemm.h
lightseq/training/csrc/ops/includes/transformer_decoder_layer.h
lightseq/training/csrc/ops/includes/transformer_embedding_layer.h
lightseq/training/csrc/ops/includes/transformer_encoder_layer.h
lightseq/training/csrc/torch/pybind_adam.cpp
lightseq/training/csrc/torch/pybind_kernel.cpp
lightseq/training/csrc/torch/pybind_op.cpp
lightseq/training/ops/__init__.py
lightseq/training/ops/pytorch/__init__.py
lightseq/training/ops/pytorch/adam.py
lightseq/training/ops/pytorch/cross_entropy_layer.py
lightseq/training/ops/pytorch/export.py
lightseq/training/ops/pytorch/transformer.py
lightseq/training/ops/pytorch/transformer_decoder_layer.py
lightseq/training/ops/pytorch/transformer_embedding_layer.py
lightseq/training/ops/pytorch/transformer_encoder_layer.py
lightseq/training/ops/pytorch/util.py
lightseq/training/ops/pytorch/builder/__init__.py
lightseq/training/ops/pytorch/builder/adam_builder.py
lightseq/training/ops/pytorch/builder/builder.py
lightseq/training/ops/pytorch/builder/kernel_builder.py
lightseq/training/ops/pytorch/builder/transformer_builder.py
lightseq/training/ops/tensorflow/__init__.py