# Description:
#   GPU-specific components in XLA service implementation.

load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
load(
    "@local_config_rocm//rocm:build_defs.bzl",
    "if_rocm_is_configured",
)
load("//xla:xla.default.bzl", "xla_cc_test", "xla_internal")
load("//xla/tests:build_defs.bzl", "xla_test")
load(
    "//xla/tsl:tsl.bzl",
    "if_google",
    "internal_visibility",
    "tsl_copts",
    "tsl_gpu_library",
)
load("//xla/tsl:tsl.default.bzl", "filegroup", "get_compatible_with_portable")
load(
    "//xla/tsl/platform:build_config.bzl",
    "tf_proto_library",
)
load(
    "//xla/tsl/platform/default:cuda_build_defs.bzl",
    "if_cuda_is_configured",
)

package(
    # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
    default_visibility = internal_visibility([":friends"]),
    licenses = ["notice"],
)

package_group(
    name = "friends",
    includes = [
        "//xla:friends",
    ],
)

# Filegroup used to collect source files for dependency checking.
filegroup(
    name = "c_srcs",
    data = glob([
        "**/*.cc",
        "**/*.h",
    ]),
)

exports_files(srcs = ["gpu_compiler_test_autotune_db.textproto"])

tf_proto_library(
    name = "backend_configs",
    srcs = ["backend_configs.proto"],
    make_default_target_header_only = True,
    protodeps = [
        "//xla:xla_data_proto",
        "//xla:autotuning_proto",
        "//xla/tsl/protobuf:dnn_proto",
    ],
)

xla_cc_test(
    name = "backend_configs_test",
    srcs = ["backend_configs_test.cc"],
    deps = [
        ":backend_configs_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_executable_run_options",
    srcs = ["gpu_executable_run_options.cc"],
    hdrs = ["gpu_executable_run_options.h"],
    compatible_with = get_compatible_with_portable(),
    visibility = ["//visibility:public"],
    deps = [
        "//xla:executable_run_options",
        "//xla/backends/gpu/collectives:gpu_collectives",
        "//xla/core/collectives:clique_id",
        "//xla/core/collectives:clique_key",
        "//xla/service:global_device_id",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/functional:any_invocable",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "gpu_constants",
    hdrs = ["gpu_constants.h"],
    # copybara:uncomment compatible_with = ["//buildenv/target:non_prod"],
    deps = ["//xla/codegen/emitters:kernel_arguments"],
)

cc_library(
    name = "gpu_memory_space_assignment",
    hdrs = ["gpu_memory_space_assignment.h"],
    deps = [
        ":backend_configs_cc",
        "//xla/hlo/analysis:hlo_alias_analysis",
        "//xla/hlo/analysis:hlo_ordering",
        "//xla/hlo/ir:hlo",
        "//xla/service:buffer_assignment",
        "//xla/service:buffer_value",
        "//xla/service:hlo_value",
        "@com_google_absl//absl/base:no_destructor",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
    ],
)

cc_library(
    name = "launch_dimensions",
    srcs = [
        "launch_dimensions.cc",
    ],
    hdrs = [
        "launch_dimensions.h",
    ],
    compatible_with = get_compatible_with_portable(),
    deps = [
        ":launch_dimensions_proto_cc",
        "//xla:shape_util",
        "//xla:util",
        "//xla/runtime:work_cluster",
        "//xla/runtime:work_dimensions",
        "//xla/runtime:work_group",
        "//xla/runtime:work_item",
        "//xla/service:platform_util",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:launch_dim",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
    ],
)

tf_proto_library(
    name = "launch_dimensions_proto",
    srcs = ["launch_dimensions.proto"],
    protodeps = ["//xla/stream_executor:launch_dim_proto"],
)

xla_cc_test(
    name = "launch_dimensions_test",
    srcs = ["launch_dimensions_test.cc"],
    deps = [
        ":launch_dimensions",
        "//xla/stream_executor:launch_dim",
        "//xla/tsl/platform:status_matchers",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/util/proto:proto_matchers",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:protobuf",
    ],
)

xla_test(
    name = "custom_call_test",
    srcs = ["custom_call_test.cc"],
    backends = ["gpu"],
    local_defines = if_cuda_is_configured(["GOOGLE_CUDA=1"]),
    deps = [
        "//xla:debug_options_flags",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:xla_data_proto_cc",
        "//xla/ffi",
        "//xla/ffi:execution_context",
        "//xla/ffi:ffi_api",
        "//xla/hlo/builder:xla_builder",
        "//xla/hlo/builder/lib:constants",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/parser:hlo_parser",
        "//xla/hlo/testlib:test_helpers",
        "//xla/service:custom_call_status",
        "//xla/service:custom_call_target_registry",
        "//xla/service:executable",
        "//xla/service:hlo_module_config",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:device_memory_allocator",
        "//xla/stream_executor:scratch_allocator",
        "//xla/stream_executor:stream",
        "//xla/stream_executor/gpu:gpu_types_header",
        "//xla/tests:client_library_test_runner_mixin",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/platform:test",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
    ] + if_cuda_is_configured([
        "@local_config_cuda//cuda:cuda_headers",
    ]) + if_rocm_is_configured([
        "@local_config_rocm//rocm:rocm_headers",
    ]),
)

xla_cc_test(
    name = "gpu_copy_insertion_test",
    srcs = ["gpu_copy_insertion_test.cc"],
    deps = [
        ":alias_info",
        ":gpu_device_info_for_tests",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/testlib:test",
        "//xla/hlo/testlib:test_helpers",
        "//xla/service:copy_insertion",
        "//xla/stream_executor:device_description",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "hlo_to_ir_bindings",
    srcs = ["hlo_to_ir_bindings.cc"],
    hdrs = ["hlo_to_ir_bindings.h"],
    deps = [
        "//xla:shape_tree",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service/llvm_ir:buffer_assignment_util",
        "//xla/service/llvm_ir:ir_array",
        "//xla/service/llvm_ir:llvm_util",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Core",
        "@llvm-project//llvm:Support",
        "@local_tsl//tsl/platform:logging",
    ],
)

cc_library(
    name = "target_util",
    srcs = ["target_util.cc"],
    hdrs = ["target_util.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service/llvm_ir:llvm_type_conversion_util",
        "//xla/service/llvm_ir:llvm_util",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Core",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:TargetParser",
        "@local_tsl//tsl/platform:logging",
    ],
)

xla_cc_test(
    name = "target_util_test",
    srcs = ["target_util_test.cc"],
    deps = [
        ":target_util",
        "//xla:xla_data_proto_cc",
        "//xla/tests:xla_internal_test_main",
        "@com_google_googletest//:gtest",
        "@llvm-project//llvm:Core",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:TargetParser",
        "@local_tsl//tsl/platform:test",
    ],
)

cc_library(
    name = "gpu_device_info_for_tests",
    # This is *not* a test library because it is used in a cc_binary which is used for testing but
    # test_only libraries are not allowed in cc_binaries.
    testonly = 0,
    srcs = ["gpu_device_info_for_tests.cc"],
    hdrs = ["gpu_device_info_for_tests.h"],
    compatible_with = get_compatible_with_portable(),
    local_defines = if_rocm_is_configured(["TENSORFLOW_USE_ROCM=1"]),
    deps = [
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:semantic_version",
        "//xla/stream_executor/cuda:cuda_compute_capability",
    ],
)

cc_library(
    name = "ir_emitter_context",
    srcs = ["ir_emitter_context.cc"],
    hdrs = ["ir_emitter_context.h"],
    deps = [
        ":execution_stream_assignment",
        ":gpu_constants",
        ":gpu_executable",
        ":ir_emission_utils",
        ":kernel_reuse_cache",
        "//xla/backends/gpu/runtime:collective_thunk",
        "//xla/hlo/ir:hlo",
        "//xla/service:buffer_assignment",
        "//xla/service:name_uniquer",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/strings:string_view",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:TargetParser",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:IR",
    ],
)

cc_library(
    name = "ir_emitter_unnested",
    srcs = ["ir_emitter_unnested.cc"],
    hdrs = ["ir_emitter_unnested.h"],
    tags = ["gpu"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":execution_stream_assignment",
        ":gpu_constants",
        ":gpu_conv_runner",
        ":gpu_norm_runner",
        ":hlo_fusion_analysis",
        ":ir_emission_utils",
        ":ir_emitter",
        ":ir_emitter_context",
        ":kernel_reuse_cache",
        ":launch_dimensions",
        ":matmul_utils",
        ":parallel_loop_emitter",
        ":stream_executor_util",
        ":triton_call",
        "//xla:autotuning_proto_cc",
        "//xla:literal",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/backends/gpu/codegen:fusion_emitter",
        "//xla/backends/gpu/codegen:fusions",
        "//xla/backends/gpu/codegen/triton:fusion_emitter",
        "//xla/backends/gpu/collectives:gpu_clique_key",
        "//xla/backends/gpu/runtime:all_gather_thunk",
        "//xla/backends/gpu/runtime:all_reduce_thunk",
        "//xla/backends/gpu/runtime:all_to_all_thunk",
        "//xla/backends/gpu/runtime:cholesky_thunk",
        "//xla/backends/gpu/runtime:collective_broadcast_thunk",
        "//xla/backends/gpu/runtime:collective_group_thunk",
        "//xla/backends/gpu/runtime:collective_permute_thunk",
        "//xla/backends/gpu/runtime:collective_thunk",
        "//xla/backends/gpu/runtime:command_buffer_cmd",
        "//xla/backends/gpu/runtime:command_buffer_cmd_emitter",
        "//xla/backends/gpu/runtime:command_buffer_thunk",
        "//xla/backends/gpu/runtime:conditional_thunk",
        "//xla/backends/gpu/runtime:convolution_thunk",
        "//xla/backends/gpu/runtime:copy_thunk",
        "//xla/backends/gpu/runtime:cub_sort_thunk",
        "//xla/backends/gpu/runtime:cudnn_thunk",
        "//xla/backends/gpu/runtime:custom_call_target",
        "//xla/backends/gpu/runtime:custom_call_thunk",
        "//xla/backends/gpu/runtime:fft_thunk",
        "//xla/backends/gpu/runtime:gemm_thunk",
        "//xla/backends/gpu/runtime:gpublas_lt_matmul_thunk",
        "//xla/backends/gpu/runtime:host_send_recv_thunk",
        "//xla/backends/gpu/runtime:infeed_thunk",
        "//xla/backends/gpu/runtime:kernel_thunk",
        "//xla/backends/gpu/runtime:norm_thunk",
        "//xla/backends/gpu/runtime:nvshmem_collective_permute_thunk",
        "//xla/backends/gpu/runtime:nvshmem_collective_thunk",
        "//xla/backends/gpu/runtime:nvshmem_recv_thunk",
        "//xla/backends/gpu/runtime:nvshmem_send_thunk",
        "//xla/backends/gpu/runtime:outfeed_thunk",
        "//xla/backends/gpu/runtime:p2p_thunk_common",
        "//xla/backends/gpu/runtime:ragged_all_to_all_thunk",
        "//xla/backends/gpu/runtime:recv_thunk",
        "//xla/backends/gpu/runtime:replica_id_thunk",
        "//xla/backends/gpu/runtime:send_thunk",
        "//xla/backends/gpu/runtime:sequential_thunk",
        "//xla/backends/gpu/runtime:thunk",
        "//xla/backends/gpu/runtime:topk",
        "//xla/backends/gpu/runtime:triangular_solve_thunk",
        "//xla/backends/gpu/runtime:wait_for_streams_thunk",
        "//xla/backends/gpu/runtime:while_thunk",
        "//xla/codegen/emitters:kernel_arguments",
        "//xla/ffi:attribute_map",
        "//xla/ffi:ffi_api",
        "//xla/ffi/api:c_api",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_traversal",
        "//xla/mlir/utils:error_util",
        "//xla/mlir_hlo:transforms_gpu_passes",
        "//xla/service:buffer_assignment",
        "//xla/service:call_graph",
        "//xla/service:collective_ops_utils",
        "//xla/service:custom_call_status",
        "//xla/service:custom_call_target_registry",
        "//xla/service:global_device_id",
        "//xla/service:name_uniquer",
        "//xla/service:platform_util",
        "//xla/service/gpu/kernels:custom_kernel",
        "//xla/service/gpu/model:tiled_hlo_instruction_or_computation",
        "//xla/service/llvm_ir:buffer_assignment_util",
        "//xla/service/llvm_ir:ir_array",
        "//xla/service/llvm_ir:kernel_support_library",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/service/llvm_ir:loop_emitter",
        "//xla/service/llvm_ir:sort_util",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:gpu_solver_context",
        "//xla/stream_executor:launch_dim",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:stream",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/stream_executor/gpu:gpu_blas_lt",
        "//xla/stream_executor/platform:platform_object_registry",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/protobuf:dnn_proto_cc",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Linker",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:AsmParser",
        "@llvm-project//mlir:BuiltinToLLVMIRTranslation",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:LLVMDialect",
        "@llvm-project//mlir:LLVMToLLVMIRTranslation",
        "@llvm-project//mlir:MemRefTransforms",
        "@llvm-project//mlir:NVVMToLLVMIRTranslation",
        "@llvm-project//mlir:Parser",
        "@llvm-project//mlir:ROCDLToLLVMIRTranslation",
        "@llvm-project//mlir:Support",
        "@llvm-project//mlir:ToLLVMIRTranslation",
        "@local_tsl//tsl/platform:human_readable_json",
        "@triton//:TritonDialects",
    ],
)

cc_library(
    name = "ir_emitter",
    srcs = [
        "ir_emitter.cc",
        "ir_emitter_nested.cc",
    ],
    hdrs = [
        "ir_emitter.h",
        "ir_emitter_nested.h",
    ],
    deps = [
        ":hlo_to_ir_bindings",
        ":ir_emission_utils",
        ":ir_emitter_context",
        ":kernel_reuse_cache",
        "//xla:literal",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:elemental_ir_emitter",
        "//xla/service/llvm_ir:buffer_assignment_util",
        "//xla/service/llvm_ir:fused_ir_emitter",
        "//xla/service/llvm_ir:ir_array",
        "//xla/service/llvm_ir:ir_builder_mixin",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/service/llvm_ir:loop_emitter",
        "//xla/service/llvm_ir:tuple_ops",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/hash",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Core",
        "@llvm-project//llvm:Support",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "triton_call",
    srcs = ["triton_call.cc"],
    hdrs = ["triton_call.h"],
    deps = [
        "@com_google_absl//absl/strings:string_view",
        "@llvm-project//mlir:AsmParser",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:Parser",
        "@llvm-project//mlir:Support",
    ],
)

cc_library(
    name = "parallel_loop_emitter",
    srcs = ["parallel_loop_emitter.cc"],
    hdrs = ["parallel_loop_emitter.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        ":launch_dimensions",
        ":target_util",
        "//xla:shape_util",
        "//xla/service/llvm_ir:ir_array",
        "//xla/service/llvm_ir:kernel_support_library",
        "//xla/service/llvm_ir:llvm_loop",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/service/llvm_ir:loop_emitter",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Core",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
    ],
)

cc_library(
    name = "buffer_allocations",
    srcs = ["buffer_allocations.cc"],
    hdrs = ["buffer_allocations.h"],
    deps = [
        "//xla/service:buffer_assignment",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:device_memory_allocator",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:logging",
    ],
)

bool_flag(
    name = "enable_xlir",
    build_setting_default = if_google(True, False),
)

cc_library(
    name = "gpu_executable",
    srcs = [
        "gpu_executable.cc",
    ],
    hdrs = [
        "gpu_executable.h",
    ],
    deps = [
        ":alias_info",
        ":backend_configs_cc",
        ":buffer_allocations",
        ":gpu_constants",
        ":gpu_executable_proto_cc",
        ":gpu_executable_run_options",
        ":ir_emission_utils",
        ":resource_requests",
        ":stream_executor_util",
        "//xla:executable_run_options",
        "//xla:shape_tree",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla/backends/gpu/collectives:gpu_clique_key",
        "//xla/backends/gpu/runtime:annotation",
        "//xla/backends/gpu/runtime:sequential_thunk",
        "//xla/backends/gpu/runtime:thunk",
        "//xla/hlo/ir:hlo",
        "//xla/service:buffer_assignment",
        "//xla/service:executable",
        "//xla/service:hlo_value",
        "//xla/service:maybe_owning_device_memory",
        "//xla/service:rendezvous",
        "//xla/service:shaped_buffer",
        "//xla/service:stream_pool",
        "//xla/service:xla_debug_info_manager",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:device_memory_allocator",
        "//xla/stream_executor:event_based_timer",
        "//xla/stream_executor:module_spec",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:scoped_module_handle",
        "//xla/stream_executor:stream",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/stream_executor/cuda:cuda_platform_id",
        "//xla/stream_executor/rocm:rocm_platform_id",
        "//xla/stream_executor/sycl:sycl_platform_id",
        "//xla/tsl/platform:env_time",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:logging",
        "//xla/tsl/platform:status",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:random",
        "@local_tsl//tsl/profiler/lib:scoped_annotation",
        "@local_tsl//tsl/profiler/lib:traceme",
    ],
)

tf_proto_library(
    name = "gpu_executable_proto",
    srcs = ["gpu_executable.proto"],
    protodeps = [
        ":ir_emission_utils_proto",
        "//xla:xla_data_proto",
        "//xla:xla_proto",
        "//xla:shape_util_proto",
        "//xla/service:hlo_proto",
        "//xla/backends/gpu/runtime:thunk_proto",
        "//xla/stream_executor/cuda:cuda_compute_capability_proto",
        "//xla/stream_executor:device_description_proto",
    ],
)

xla_cc_test(
    name = "gpu_executable_test",
    srcs = ["gpu_executable_test.cc"],
    deps = [
        ":gpu_executable",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/tsl/platform:status_matchers",
        "//xla/tsl/util/proto:proto_matchers",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:test",
    ],
)

cc_library(
    name = "ir_emission_utils",
    srcs = ["ir_emission_utils.cc"],
    hdrs = ["ir_emission_utils.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        ":backend_configs_cc",
        ":ir_emission_utils_proto_cc",
        ":matmul_indexing_utils",
        ":target_util",
        "//xla:literal",
        "//xla:permutation_util",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/codegen:ir_emission_utils",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_traversal",
        "//xla/service:buffer_assignment",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/stream_executor:device_description",
        "//xla/tsl/lib/strings:proto_serialization",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Core",
        "@llvm-project//llvm:Support",
        "@local_tsl//tsl/platform:protobuf",
    ],
)

tf_proto_library(
    name = "ir_emission_utils_proto",
    srcs = ["ir_emission_utils.proto"],
)

xla_cc_test(
    name = "ir_emission_utils_test",
    srcs = ["ir_emission_utils_test.cc"],
    deps = [
        ":backend_configs_cc",
        ":ir_emission_utils",
        ":ir_emission_utils_proto_cc",
        "//xla:literal",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:types",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/utils:hlo_traversal",
        "//xla/service:buffer_assignment",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/platform:status_matchers",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/protobuf:dnn_proto_cc",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
    ],
)

cc_library(
    name = "reduction_utils",
    srcs = ["reduction_utils.cc"],
    hdrs = ["reduction_utils.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        ":ir_emission_utils",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:logging",
    ],
)

xla_cc_test(
    name = "reduction_utils_test",
    srcs = ["reduction_utils_test.cc"],
    deps = [
        ":reduction_utils",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/parser:hlo_parser",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
    ],
)

cc_library(
    name = "cublas_cudnn",
    srcs = ["cublas_cudnn.cc"],
    hdrs = ["cublas_cudnn.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "dynamic_slicing_utils",
    srcs = ["dynamic_slicing_utils.cc"],
    hdrs = ["dynamic_slicing_utils.h"],
    # copybara:uncomment compatible_with = ["//buildenv/target:non_prod"],
    deps = [
        ":gpu_constants",
        ":ir_emission_utils",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/analysis:while_loop_analysis",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/hlo/utils:hlo_traversal",
        "//xla/service:call_graph",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "triton_tiling_propagation",
    srcs = ["triton_tiling_propagation.cc"],
    hdrs = ["triton_tiling_propagation.h"],
    deps = [
        "//xla:permutation_util",
        "//xla:shape_util",
        "//xla/backends/gpu/codegen/triton:support",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:instruction_fusion",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings",
    ],
)

xla_cc_test(
    name = "triton_tiling_propagation_test",
    srcs = ["triton_tiling_propagation_test.cc"],
    deps = [
        ":triton_tiling_propagation",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_googletest//:gtest",
    ],
)

cc_library(
    name = "triton_fusion_analysis",
    srcs = ["triton_fusion_analysis.cc"],
    hdrs = ["triton_fusion_analysis.h"],
    deps = [
        ":cudnn_support_utils",
        ":matmul_indexing_utils",
        ":triton_tiling_propagation",
        "//xla:autotuning_proto_cc",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:instruction_fusion",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "triton_fusion_analysis_test",
    srcs = ["triton_fusion_analysis_test.cc"],
    deps = [
        ":triton_fusion_analysis",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/testlib:verified_hlo_module",
        "//xla/service/gpu/transforms:gemm_fusion",
        "//xla/stream_executor:device_description",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "//xla/tsl/platform:status_matchers",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "split_k_gemm_rewriter",
    srcs = ["split_k_gemm_rewriter.cc"],
    hdrs = ["split_k_gemm_rewriter.h"],
    deps = [
        ":ir_emission_utils",
        ":matmul_indexing_utils",
        ":matmul_utils",
        ":triton_fusion_analysis",
        ":triton_tiling_propagation",
        "//xla:autotuning_proto_cc",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/backends/gpu/codegen/triton:support",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:algorithm_util",
        "//xla/service:hlo_creation_utils",
        "//xla/tsl/platform:errors",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/types:span",
    ],
)

xla_cc_test(
    name = "split_k_gemm_rewriter_test",
    srcs = ["split_k_gemm_rewriter_test.cc"],
    deps = [
        ":matmul_utils",
        ":split_k_gemm_rewriter",
        ":triton_fusion_analysis",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/testlib:pattern_matcher_gmock",
        "//xla/hlo/testlib:verified_hlo_module",
        "//xla/service:hlo_verifier",
        "//xla/service:layout_assignment",
        "//xla/service:pattern_matcher",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_googletest//:gtest",
    ],
)

cc_library(
    name = "matmul_indexing_utils",
    srcs = ["matmul_indexing_utils.cc"],
    hdrs = ["matmul_indexing_utils.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        "//xla:autotuning_proto_cc",
        "//xla:permutation_util",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "matmul_indexing_utils_test",
    srcs = ["matmul_indexing_utils_test.cc"],
    deps = [
        ":matmul_indexing_utils",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/parser:hlo_parser",
        "//xla/hlo/testlib:test",
        "//xla/tests:xla_internal_test_main",  # build_cleaner: keep
        "//xla/tsl/lib/core:status_test_util",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:status_matchers",
    ],
)

cc_library(
    name = "matmul_utils",
    srcs = ["matmul_utils.cc"],
    hdrs = ["matmul_utils.h"],
    deps = [
        ":backend_configs_cc",
        ":matmul_indexing_utils",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:types",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:algorithm_util",
        "//xla/stream_executor:blas",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:numeric_options",
        "//xla/stream_executor:stream",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/gpu:gpu_blas_lt",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Support",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "matmul_utils_test",
    srcs = ["matmul_utils_test.cc"],
    deps = [
        ":matmul_utils",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/parser:hlo_parser",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/testlib:test",
        "//xla/tests:xla_internal_test_main",  # build_cleaner: keep
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_conv_runner",
    srcs = ["gpu_conv_runner.cc"],
    hdrs = ["gpu_conv_runner.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":stream_executor_util",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:dnn",
        "//xla/stream_executor:lazy_op_runner",
        "//xla/stream_executor:stream_executor_h",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@eigen_archive//:eigen3",
        "@local_tsl//tsl/platform:ml_dtypes",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_norm_runner",
    srcs = ["gpu_norm_runner.cc"],
    hdrs = ["gpu_norm_runner.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":stream_executor_util",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:types",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:dnn",
        "//xla/stream_executor:lazy_op_runner",
        "//xla/stream_executor:stream_executor_h",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:statusor",
    ],
)

tf_proto_library(
    name = "fusion_process_dump_proto",
    srcs = ["fusion_process_dump.proto"],
    protodeps = [
        "//xla/stream_executor:device_description_proto",
    ],
)

cc_library(
    name = "fusion_process_dump",
    srcs = ["fusion_process_dump.cc"],
    hdrs = ["fusion_process_dump.h"],
    deps = [
        ":fusion_process_dump_proto_cc",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_graph_dumper",
        "//xla/stream_executor:device_description",
        "//xla/tools:hlo_module_loader",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "fusion_process_dump_test",
    srcs = ["fusion_process_dump_test.cc"],
    deps = [
        ":fusion_process_dump",
        ":fusion_process_dump_proto_cc",
        ":gpu_device_info_for_tests",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/parser:hlo_parser",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/testlib:pattern_matcher_gmock",
        "//xla/hlo/testlib:test",
        "//xla/service:pattern_matcher",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "cudnn_support_utils",
    srcs = ["cudnn_support_utils.cc"],
    hdrs = ["cudnn_support_utils.h"],
    deps = [
        ":cublas_cudnn",
        "//xla:shape_util",
        "//xla:util",
        "//xla:window_util",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/status:statusor",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "cudnn_support_utils_test",
    srcs = ["cudnn_support_utils_test.cc"],
    deps = [
        ":cudnn_support_utils",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/parser:hlo_parser",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/testlib:test",
        "//xla/hlo/testlib:verified_hlo_module",
        "//xla/stream_executor:device_description",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:status_matchers",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "cublas_padding_requirements",
    srcs = ["cublas_padding_requirements.cc"],
    hdrs = ["cublas_padding_requirements.h"],
    deps = [
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/functional:overload",
    ],
)

tf_proto_library(
    name = "executable_proto",
    srcs = ["executable.proto"],
    protodeps = [
        "//xla/service:hlo_proto",
        "//xla:xla_proto",
    ],
)

cc_library(
    name = "target_constants",
    hdrs = ["target_constants.h"],
)

cc_library(
    name = "gpu_transfer_manager",
    srcs = ["gpu_transfer_manager.cc"],
    hdrs = ["gpu_transfer_manager.h"],
    deps = [
        ":io_feed_manager",
        ":target_constants",
        "//xla:literal",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/service:compiler",
        "//xla/service:generic_transfer_manager",
        "//xla/service:shaped_buffer",
        "//xla/service:transfer_manager",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:event",
        "//xla/stream_executor:memory_allocation",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/cuda:cuda_platform_id",
        "//xla/stream_executor/rocm:rocm_platform_id",
        "//xla/stream_executor/sycl:sycl_platform_id",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/cleanup",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:node_hash_map",
        "@com_google_absl//absl/functional:function_ref",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/synchronization",
        "@llvm-project//llvm:Core",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:numbers",
        "@local_tsl//tsl/platform:statusor",
    ],
    alwayslink = True,  # Contains per-platform transfer manager registration
)

cc_library(
    name = "gpu_float_support",
    srcs = ["gpu_float_support.cc"],
    hdrs = ["gpu_float_support.h"],
    deps = [
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/backends/gpu/codegen/triton:support",
        "//xla/hlo/ir:hlo",
        "//xla/service:collective_ops_utils",
        "//xla/service:float_support",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
    ],
)

cc_library(
    name = "compile_module_to_llvm_ir",
    srcs = [
        "compile_module_to_llvm_ir.cc",
    ],
    hdrs = [
        "compile_module_to_llvm_ir.h",
    ],
    tags = ["gpu"],
    deps = [
        ":alias_info",
        ":executable_proto_cc",
        ":execution_stream_assignment",
        ":gpu_constants",
        ":gpu_executable",
        ":gpu_memory_space_assignment",
        ":ir_emitter_context",
        ":ir_emitter_unnested",
        ":metrics",
        ":runtime_intrinsics",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/gpu/runtime:sequential_thunk",
        "//xla/hlo/analysis:hlo_ordering",
        "//xla/hlo/ir:hlo",
        "//xla/service:buffer_assignment",
        "//xla/service:buffer_value",
        "//xla/service:dump",
        "//xla/service:hlo_proto_cc",
        "//xla/service:logical_buffer",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:platform",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/stream_executor/rocm:rocm_platform_id",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@llvm-project//llvm:AsmParser",
        "@llvm-project//llvm:TargetParser",
        "@llvm-project//llvm:TransformUtils",
        "@llvm-project//llvm:ir_headers",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:Pass",
        "@llvm-project//mlir:Support",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/profiler/lib:scoped_annotation",
        "@local_tsl//tsl/profiler/lib:traceme",
    ],
)

cc_library(
    name = "fusion_dispatch_pipeline",
    srcs = ["fusion_dispatch_pipeline.cc"],
    hdrs = ["fusion_dispatch_pipeline.h"],
    deps = [
        "//xla:shape_util",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/hlo/transforms/simplifiers:hlo_dce",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:pattern_matcher",
        "//xla/service/gpu/transforms:fusion_block_level_rewriter",
        "//xla/service/gpu/transforms:fusion_dynamic_memcpy_rewriter",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Support",
    ],
)

cc_library(
    name = "fusion_pipeline",
    srcs = ["fusion_pipeline.cc"],
    hdrs = ["fusion_pipeline.h"],
    deps = [
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/hlo/transforms/simplifiers:hlo_dce",
        "//xla/service:cpu_gpu_shape_verifier",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:hlo_cse",
        "//xla/service:hlo_verifier",
        "//xla/service:layout_assignment",
        "//xla/service/gpu/model:gpu_hlo_cost_analysis",
        "//xla/service/gpu/transforms:horizontal_loop_fusion",
        "//xla/service/gpu/transforms:multi_output_fusion",
        "//xla/service/gpu/transforms:priority_fusion",
        "//xla/service/gpu/transforms:variadic_op_splitter",
        "//xla/stream_executor:device_description",
        "@local_tsl//tsl/platform:env",
    ],
)

cc_library(
    name = "pre_scheduling_copy_insertion_pipeline",
    srcs = ["pre_scheduling_copy_insertion_pipeline.cc"],
    hdrs = ["pre_scheduling_copy_insertion_pipeline.h"],
    deps = [
        ":alias_info",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/hlo/transforms/simplifiers:hlo_dce",
        "//xla/service:copy_insertion",
        "//xla/service:cpu_gpu_shape_verifier",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_verifier",
        "//xla/service:layout_assignment",
        "//xla/service:loop_schedule_linearizer",
        "//xla/service/gpu/transforms:alias_passthrough_params",
        "//xla/service/gpu/transforms:copy_fusion",
        "//xla/service/gpu/transforms:horizontal_loop_fusion",
        "//xla/service/gpu/transforms:sanitize_constant_names",
        "//xla/stream_executor:device_description",
    ],
)

cc_library(
    name = "gpu_compiler",
    srcs = [
        "gpu_compiler.cc",
    ],
    hdrs = [
        "gpu_compiler.h",
    ],
    tags = ["gpu"],
    deps = [
        ":alias_info",
        ":compile_module_to_llvm_ir",
        ":conv_layout_normalization",
        ":cublas_cudnn",
        ":executable_proto_cc",
        ":execution_stream_assignment",
        ":flag_utils",
        ":fusion_dispatch_pipeline",
        ":fusion_pipeline",
        ":gpu_constants",
        ":gpu_executable",
        ":gpu_float_support",
        ":gpu_hlo_schedule",
        ":gpu_latency_hiding_scheduler",
        ":gpu_spmd_pipeline",
        ":hlo_fusion_stats",
        ":ir_emission_utils",
        ":ir_emitter",
        ":ir_emitter_context",
        ":ir_emitter_unnested",
        ":kernel_reuse_cache",
        ":matmul_utils",
        ":metrics",
        ":pre_scheduling_copy_insertion_pipeline",
        ":reduction_utils",
        ":runtime_intrinsics",
        ":stream_executor_util",
        "//xla:autotune_results_proto_cc",
        "//xla:debug_options_flags",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:types",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/gpu/codegen/triton:support",
        "//xla/backends/gpu/runtime:sequential_thunk",
        "//xla/backends/gpu/runtime:thunk",
        "//xla/hlo/analysis:alias_info",
        "//xla/hlo/analysis:hlo_dataflow_analysis",
        "//xla/hlo/analysis:hlo_ordering",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/ir:hlo_module_group",
        "//xla/hlo/pass:hlo_pass",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/hlo/transforms:convert_memory_placement_to_internal_annotations",
        "//xla/hlo/transforms:host_offload_legalize",
        "//xla/hlo/transforms:host_offloader",
        "//xla/hlo/transforms:operand_upcaster",
        "//xla/hlo/transforms:while_loop_trip_count_annotator",
        "//xla/hlo/transforms/collectives:all_gather_broadcast_reorder",
        "//xla/hlo/transforms/collectives:all_gather_combiner",
        "//xla/hlo/transforms/collectives:all_gather_remove_degenerate_dims",
        "//xla/hlo/transforms/collectives:all_reduce_combiner",
        "//xla/hlo/transforms/collectives:all_reduce_contiguous",
        "//xla/hlo/transforms/collectives:async_collective_creator",
        "//xla/hlo/transforms/collectives:collective_permute_combiner",
        "//xla/hlo/transforms/collectives:collective_quantizer",
        "//xla/hlo/transforms/collectives:collectives_schedule_linearizer",
        "//xla/hlo/transforms/collectives:convert_async_collectives_to_sync",
        "//xla/hlo/transforms/expanders:bitcast_dtypes_expander",
        "//xla/hlo/transforms/expanders:comparison_expander",
        "//xla/hlo/transforms/expanders:convolution_4d_expander",
        "//xla/hlo/transforms/expanders:convolution_pred_expander",
        "//xla/hlo/transforms/expanders:dot_decomposer",
        "//xla/hlo/transforms/expanders:dynamic_index_splitter",
        "//xla/hlo/transforms/expanders:eigh_expander",
        "//xla/hlo/transforms/expanders:logistic_expander",
        "//xla/hlo/transforms/expanders:optimization_barrier_expander",
        "//xla/hlo/transforms/expanders:qr_expander",
        "//xla/hlo/transforms/expanders:ragged_dot_rewriter",
        "//xla/hlo/transforms/expanders:real_imag_expander",
        "//xla/hlo/transforms/expanders:reduce_decomposer",
        "//xla/hlo/transforms/expanders:reshape_decomposer",
        "//xla/hlo/transforms/expanders:rng_bit_generator_expander",
        "//xla/hlo/transforms/expanders:rng_expander",
        "//xla/hlo/transforms/expanders:stable_sort_expander",
        "//xla/hlo/transforms/expanders:stochastic_convert_decomposer",
        "//xla/hlo/transforms/simplifiers:algebraic_simplifier",
        "//xla/hlo/transforms/simplifiers:all_reduce_folder",
        "//xla/hlo/transforms/simplifiers:broadcast_canonicalizer",
        "//xla/hlo/transforms/simplifiers:conditional_canonicalizer",
        "//xla/hlo/transforms/simplifiers:convert_mover",
        "//xla/hlo/transforms/simplifiers:dot_merger",
        "//xla/hlo/transforms/simplifiers:dynamic_dimension_simplifier",
        "//xla/hlo/transforms/simplifiers:flatten_call_graph",
        "//xla/hlo/transforms/simplifiers:float_normalization",
        "//xla/hlo/transforms/simplifiers:gather_simplifier",
        "//xla/hlo/transforms/simplifiers:hlo_computation_deduplicator",
        "//xla/hlo/transforms/simplifiers:hlo_constant_folding",
        "//xla/hlo/transforms/simplifiers:hlo_dce",
        "//xla/hlo/transforms/simplifiers:hlo_rematerialization",
        "//xla/hlo/transforms/simplifiers:host_memory_transfer_asyncifier",
        "//xla/hlo/transforms/simplifiers:optimize_input_output_buffer_alias",
        "//xla/hlo/transforms/simplifiers:reduce_window_rewriter",
        "//xla/hlo/transforms/simplifiers:reshape_mover",
        "//xla/hlo/transforms/simplifiers:result_caster",
        "//xla/hlo/transforms/simplifiers:simplify_fp_conversions",
        "//xla/hlo/transforms/simplifiers:slice_sinker",
        "//xla/hlo/transforms/simplifiers:sort_simplifier",
        "//xla/hlo/transforms/simplifiers:sub_byte_normalization",
        "//xla/hlo/transforms/simplifiers:tuple_simplifier",
        "//xla/hlo/transforms/simplifiers:zero_sized_hlo_elimination",
        "//xla/hlo/utils:hlo_query",
        "//xla/hlo/utils:hlo_traversal",
        "//xla/pjrt/distributed:key_value_store_interface",
        "//xla/service:all_reduce_promotion",
        "//xla/service:all_reduce_reassociate",
        "//xla/service:all_reduce_simplifier",
        "//xla/service:batched_gather_scatter_normalizer",
        "//xla/service:batchnorm_expander",
        "//xla/service:buffer_assignment",
        "//xla/service:buffer_value",
        "//xla/service:call_inliner",
        "//xla/service:collective_ops_utils",
        "//xla/service:collective_permute_decomposer",
        "//xla/service:collective_pipeliner",
        "//xla/service:collective_pipeliner_utils",
        "//xla/service:collective_utils",
        "//xla/service:compiler",
        "//xla/service:conditional_simplifier",
        "//xla/service:copy_insertion",
        "//xla/service:cpu_gpu_shape_verifier",
        "//xla/service:dump",
        "//xla/service:dynamic_dimension_inference",
        "//xla/service:dynamic_padder",
        "//xla/service:executable",
        "//xla/service:export_hlo",
        "//xla/service:float_support",
        "//xla/service:gather_expander",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:hlo_cse",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_proto_cc",
        "//xla/service:hlo_value",
        "//xla/service:hlo_verifier",
        "//xla/service:layout_assignment",
        "//xla/service:layout_normalization",
        "//xla/service:llvm_compiler",
        "//xla/service:logical_buffer",
        "//xla/service:loop_schedule_linearizer",
        "//xla/service:reduce_scatter_combiner",
        "//xla/service:reduce_scatter_reassociate",
        "//xla/service:scatter_determinism_expander",
        "//xla/service:scatter_expander",
        "//xla/service:scatter_simplifier",
        "//xla/service:select_and_scatter_expander",
        "//xla/service:sharding_remover",
        "//xla/service:slow_operation_alarm",
        "//xla/service:topk_rewriter",
        "//xla/service:transpose_folding",
        "//xla/service:while_loop_all_reduce_code_motion",
        "//xla/service:while_loop_constant_sinking",
        "//xla/service:while_loop_simplifier",
        "//xla/service/gpu/autotuning:autotuner_util",
        "//xla/service/gpu/autotuning:custom_kernel_fusion_autotuner",
        "//xla/service/gpu/model:collective_ptable_stats_collection",
        "//xla/service/gpu/model:gpu_cost_model_stats_collection",
        "//xla/service/gpu/model:gpu_hlo_cost_analysis",
        "//xla/service/gpu/model:matmul_ptable_stats_collection",
        "//xla/service/gpu/model:sol_gpu_cost_model_stats_collection",
        "//xla/service/gpu/transforms:add_tracking_suffix_to_instruction_names",
        "//xla/service/gpu/transforms:algebraic_simplifier",
        "//xla/service/gpu/transforms:algorithm_checker",
        "//xla/service/gpu/transforms:async_wrapper",
        "//xla/service/gpu/transforms:command_buffer_conversion_pass",
        "//xla/service/gpu/transforms:command_buffer_scheduling",
        "//xla/service/gpu/transforms:conv_rewriter",
        "//xla/service/gpu/transforms:cudnn_custom_call_converter",
        "//xla/service/gpu/transforms:custom_kernel_fusion_rewriter",
        "//xla/service/gpu/transforms:dot_algorithm_rewriter",
        "//xla/service/gpu/transforms:dot_dimension_sorter",
        "//xla/service/gpu/transforms:dot_normalizer",
        "//xla/service/gpu/transforms:dot_operand_converter",
        "//xla/service/gpu/transforms:double_buffer_loop_unrolling",
        "//xla/service/gpu/transforms:dynamic_slice_fusion_rewriter",
        "//xla/service/gpu/transforms:explicit_collectives_group_async_wrapper",
        "//xla/service/gpu/transforms:explicit_stream_annotation_async_wrapper",
        "//xla/service/gpu/transforms:fusion_block_level_rewriter",
        "//xla/service/gpu/transforms:fusion_wrapper",
        "//xla/service/gpu/transforms:gemm_broadcast_folding_rewriter",
        "//xla/service/gpu/transforms:gemm_fusion",
        "//xla/service/gpu/transforms:gemm_fusion_swap_operands",
        "//xla/service/gpu/transforms:gemm_rewriter",
        "//xla/service/gpu/transforms:gemv_rewriter",
        "//xla/service/gpu/transforms:layout_assignment",
        "//xla/service/gpu/transforms:move_copy_to_users",
        "//xla/service/gpu/transforms:nest_gemm_fusion",
        "//xla/service/gpu/transforms:ragged_all_to_all_canonicalizer",
        "//xla/service/gpu/transforms:ragged_all_to_all_decomposer",
        "//xla/service/gpu/transforms:reduce_scatter_creator",
        "//xla/service/gpu/transforms:reduction_degenerate_dim_remover",
        "//xla/service/gpu/transforms:reduction_dimension_grouper",
        "//xla/service/gpu/transforms:reduction_layout_normalizer",
        "//xla/service/gpu/transforms:reduction_splitter",
        "//xla/service/gpu/transforms:rename_fusions",
        "//xla/service/gpu/transforms:sanitize_constant_names",
        "//xla/service/gpu/transforms:scalar_constant_sinker",
        "//xla/service/gpu/transforms:scatter_expander",
        "//xla/service/gpu/transforms:scatter_slice_simplifier",
        "//xla/service/gpu/transforms:softmax_rewriter_triton",
        "//xla/service/gpu/transforms:sort_rewriter",
        "//xla/service/gpu/transforms:splitk_rewriter",
        "//xla/service/gpu/transforms:stream_attribute_annotator",
        "//xla/service/gpu/transforms:stream_attribute_async_wrapper",
        "//xla/service/gpu/transforms:thunk_pass_pipeline",
        "//xla/service/gpu/transforms:topk_specializer",
        "//xla/service/gpu/transforms:topk_splitter",
        "//xla/service/gpu/transforms:transpose_dimension_grouper",
        "//xla/service/gpu/transforms:tree_reduction_rewriter",
        "//xla/service/gpu/transforms:triton_fusion_numerics_verifier",
        "//xla/service/gpu/transforms:windowed_einsum_handler",
        "//xla/service/gpu/transforms/collectives:all_gather_dynamic_slice_simplifier",
        "//xla/service/gpu/transforms/collectives:all_gather_optimizer",
        "//xla/service/gpu/transforms/collectives:all_reduce_blueconnect",
        "//xla/service/gpu/transforms/collectives:all_reduce_decomposer",
        "//xla/service/gpu/transforms/collectives:all_reduce_splitter",
        "//xla/service/gpu/transforms/collectives:async_collective_annotator",
        "//xla/service/gpu/transforms/collectives:collective_backend_assigner",
        "//xla/service/gpu/transforms/collectives:collective_combiner_annotator",
        "//xla/service/gpu/transforms/collectives:collective_ops_utils",
        "//xla/service/gpu/transforms/collectives:collective_permute_cycle_decomposer",
        "//xla/service/gpu/transforms/collectives:collective_pipelining_analyzer",
        "//xla/service/gpu/transforms/collectives:collective_select_folder",
        "//xla/service/gpu/transforms/collectives:convert_async_collectives_to_sync",
        "//xla/service/gpu/transforms/collectives:gpu_all_gather_combiner",
        "//xla/service/gpu/transforms/collectives:gpu_all_reduce_combiner",
        "//xla/service/gpu/transforms/collectives:gpu_collective_combiner_utils",
        "//xla/service/gpu/transforms/collectives:gpu_reduce_scatter_combiner",
        "//xla/service/llvm_ir:llvm_command_line_options",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/service/spmd:collective_permute_motion",
        "//xla/service/spmd:schedule_aware_collective_ops_cse",
        "//xla/service/spmd/shardy:shardy_xla_pass",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/stream_executor:device_memory_allocator",
        "//xla/stream_executor:dnn",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:platform_manager",
        "//xla/stream_executor:semantic_version",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/stream_executor/integrations:device_mem_allocator",
        "//xla/tsl/lib/monitoring:counter",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/types:span",
        "@com_google_absl//absl/types:variant",
        "@llvm-project//llvm:AsmParser",
        "@llvm-project//llvm:BitReader",
        "@llvm-project//llvm:BitWriter",
        "@llvm-project//llvm:Core",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:TargetParser",
        "@llvm-project//llvm:TransformUtils",
        "@llvm-project//mlir:FuncDialect",
        "@llvm-project//mlir:IR",
        "@llvm-project//mlir:Pass",
        "@llvm-project//mlir:Support",
        "@local_tsl//tsl/platform:casts",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:numbers",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:platform_port",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/profiler/lib:scoped_annotation",
        "@local_tsl//tsl/profiler/lib:traceme",
    ] + xla_internal(["service:export_hlo"]) + if_google([
        "//xla/hlo/experimental/auto_sharding",
        "//xla/hlo/experimental/auto_sharding:auto_sharding_option",
        "//xla/hlo/experimental/auto_sharding:auto_sharding_stablehlo_pass",
    ]),
)

xla_test(
    name = "gpu_compiler_test",
    srcs = ["gpu_compiler_test.cc"],
    backends = ["gpu"],
    data = ["gpu_compiler_test_autotune_db.textproto"],
    deps = [
        ":alias_info",
        ":backend_configs_cc",
        ":gpu_compiler",
        ":gpu_executable",
        ":gpu_hlo_schedule",
        ":metrics",
        "//xla:autotune_results_proto_cc",
        "//xla:error_spec",
        "//xla:literal",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/backends/gpu/runtime:sequential_thunk",
        "//xla/backends/gpu/runtime:thunk",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/ir:hlo_module_group",
        "//xla/hlo/testlib:filecheck",
        "//xla/hlo/testlib:pattern_matcher_gmock",
        "//xla/hlo/testlib:verified_hlo_module",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_runner_interface",
        "//xla/service:pattern_matcher",
        "//xla/service:xla_debug_info_manager",
        "//xla/service/gpu/autotuning:autotuner_util",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:platform_manager",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/tests:hlo_test_base",
        "//xla/tests:literal_test_util",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/lib/monitoring:collected_metrics",
        "//xla/tsl/lib/monitoring:collection_registry",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:logging",
        "//xla/tsl/platform:status_matchers",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/platform:test",
        "@com_google_absl//absl/base:log_severity",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/log:log_sink",
        "@com_google_absl//absl/log:scoped_mock_log",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform",
        "@local_tsl//tsl/platform:casts",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:regexp",
    ],
)

xla_test(
    name = "gpu_offloading_test",
    srcs = ["gpu_offloading_test.cc"],
    backends = ["gpu"],
    deps = [
        ":backend_configs_cc",
        "//xla:autotune_results_proto_cc",
        "//xla:error_spec",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/analysis:alias_info",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/transforms/simplifiers:hlo_memory_scheduler",
        "//xla/hlo/transforms/simplifiers:hlo_rematerialization",
        "//xla/hlo/utils:hlo_matchers",
        "//xla/service:buffer_value",
        "//xla/service:hlo_cost_analysis",
        "//xla/service/gpu/transforms:stream_attribute_annotator",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/lib/core:status_test_util",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_test(
    name = "auto_sharding_gpu_compiler_test",
    srcs = ["auto_sharding_gpu_compiler_test.cc"],
    backends = ["gpu"],
    tags = ["no_oss"],  # TODO(b/277355322): Make autosharding work in OSS
    deps = [
        "//xla:xla_data_proto_cc",
        "//xla/hlo/experimental/auto_sharding:auto_sharding_option",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:pattern_matcher_gmock",
        "//xla/service:hlo_module_config",
        "//xla/service:pattern_matcher",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/log",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:logging",
    ],
)

cc_library(
    name = "nvptx_compiler",
    srcs = [
        "nvptx_compiler_registration.cc",
    ],
    tags = [
        "cuda-only",
        "gpu",
        "manual",
    ],
    deps = [
        ":nvptx_compiler_impl",
        "//xla/service:compiler",
        "//xla/stream_executor/cuda:cuda_platform_id",
        "@local_tsl//tsl/platform:path",
    ],
    alwayslink = True,  # Contains compiler registration
)

cc_library(
    name = "nvptx_compiler_impl",
    srcs = [
        "nvptx_compiler.cc",
    ],
    hdrs = [
        "nvptx_compiler.h",
    ],
    tags = [
        "cuda-only",
        "gpu",
        "manual",
    ],
    deps = [
        ":alias_info",
        ":cublas_padding_requirements",
        ":gpu_compiler",
        ":ir_emission_utils",
        ":metrics",
        ":nvptx_alias_info",
        ":ptx_compile_options_from_debug_options",
        ":target_constants",
        "//xla:autotune_results_proto_cc",
        "//xla:debug_options_flags",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/backends/gpu/autotuner:factory",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/hlo/transforms/simplifiers:algebraic_simplifier",
        "//xla/hlo/transforms/simplifiers:convert_mover",
        "//xla/hlo/transforms/simplifiers:dot_dimension_merger",
        "//xla/hlo/transforms/simplifiers:float_normalization",
        "//xla/hlo/transforms/simplifiers:hlo_constant_folding",
        "//xla/hlo/transforms/simplifiers:hlo_dce",
        "//xla/hlo/transforms/simplifiers:reshape_mover",
        "//xla/hlo/transforms/simplifiers:tuple_simplifier",
        "//xla/pjrt/distributed:key_value_store_interface",
        "//xla/service:call_inliner",
        "//xla/service:dump",
        "//xla/service:float_support",
        "//xla/service:hlo_cse",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_verifier",
        "//xla/service/gpu/autotuning:autotuner_pass",
        "//xla/service/gpu/autotuning:autotuner_util",
        "//xla/service/gpu/autotuning:conv_algorithm_picker",
        "//xla/service/gpu/autotuning:gemm_algorithm_picker",
        "//xla/service/gpu/autotuning:gemm_fusion_autotuner",
        "//xla/service/gpu/llvm_gpu_backend:nvptx_backend",
        "//xla/service/gpu/llvm_gpu_backend:nvptx_utils",
        "//xla/service/gpu/transforms:algebraic_simplifier",
        "//xla/service/gpu/transforms:block_scaling_rewriter",
        "//xla/service/gpu/transforms:conv_padding_legalization",
        "//xla/service/gpu/transforms:conv_rewriter",
        "//xla/service/gpu/transforms:cublas_pad_for_gemms",
        "//xla/service/gpu/transforms:cudnn_custom_call_compiler",
        "//xla/service/gpu/transforms:cudnn_fused_conv_rewriter",
        "//xla/service/gpu/transforms:cudnn_fusion_compiler",
        "//xla/service/gpu/transforms:cudnn_norm_rewriter",
        "//xla/service/gpu/transforms:cudnn_pad_for_convolutions",
        "//xla/service/gpu/transforms:cudnn_simplify_padding",
        "//xla/service/gpu/transforms:cudnn_vectorize_convolutions",
        "//xla/service/gpu/transforms:dot_sparsity_rewriter",
        "//xla/service/gpu/transforms:gpusolver_rewriter",
        "//xla/service/gpu/transforms:triangular_solve_rewriter",
        "//xla/service/llvm_ir:llvm_util",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:dnn",
        "//xla/stream_executor:semantic_version",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/cuda:assemble_compilation_provider",
        "//xla/stream_executor/cuda:caching_compilation_provider",
        "//xla/stream_executor/cuda:compilation_options",
        "//xla/stream_executor/cuda:compilation_provider",
        "//xla/stream_executor/cuda:compilation_provider_options",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/stream_executor/cuda:cuda_diagnostics",
        "//xla/stream_executor/cuda:cuda_platform_id",
        "//xla/stream_executor/cuda:cuda_solver_context",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:IRReader",
        "@llvm-project//llvm:Support",
        "@llvm-project//llvm:ir_headers",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/profiler/lib:scoped_annotation",
        "@local_tsl//tsl/profiler/lib:traceme",
    ],
)

xla_test(
    name = "nvptx_compiler_test",
    srcs = [
        "nvptx_compiler_test.cc",
    ],
    backends = [
        "v100",
        "a100",
        "b200",
    ],
    tags = [
        "cuda-only",
        "nomsan",  # Pulls in precompiled NVIDIA libraries which cause false positives in msan.
    ],
    deps = [
        ":alias_info",
        ":gpu_constants",
        ":gpu_hlo_schedule",
        ":gpu_latency_hiding_scheduler",
        ":nvptx_compiler_impl",
        "//xla:util",
        "//xla:xla_proto_cc",
        "//xla/hlo/analysis:hlo_ordering",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:backend",
        "//xla/service:buffer_assignment",
        "//xla/service:buffer_value",
        "//xla/service:logical_buffer",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
    ],
)

xla_test(
    name = "ptx_compilation_test",
    srcs = [
        "ptx_compilation_test.cc",
    ],
    backends = [
        "gpu",
    ],
    tags = [
        "cuda-only",
        "nomsan",  # Pulls in precompiled NVIDIA libraries which cause false positives in msan.
    ],
    deps = [
        ":gpu_executable",
        ":nvptx_compiler_impl",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:executable",
        "//xla/service:hlo_module_config",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor/cuda:nvjitlink_support",
        "//xla/stream_executor/cuda:ptx_compilation_method",
        "//xla/stream_executor/cuda:ptx_compiler_support",
        "//xla/stream_executor/cuda:ptx_linking_method",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:status_matchers",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/container:btree",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest",
        "@llvm-project//llvm:Object",
        "@llvm-project//llvm:Support",
        "@local_tsl//tsl/platform:path",
    ],
)

xla_cc_test(
    name = "gpu_aot_compilation_test",
    srcs = [
        "gpu_aot_compilation_test.cc",
    ],
    tags = [
        "gpu",
        "no_oss",
        "nomsan",  # Pulls in precompiled NVIDIA libraries which cause false positives in msan.
        "requires-gpu-nvidia",
    ] + if_google([
        "ignore_for_dep=third_party/tensorflow/compiler/xla/service/gpu/amdgpu_compiler.h",
    ]),
    deps = if_cuda_is_configured([
        ":nvptx_compiler_impl",
    ]) + if_rocm_is_configured([
        ":amdgpu_compiler_impl",
    ]) + [
        ":gpu_transfer_manager",
        "//xla:literal_util",
        "//xla/backends/gpu/codegen/triton:support",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/ir:hlo_module_group",
        "//xla/service:compiler",
        "//xla/service:executable",
        "//xla/service:gpu_plugin",
        "//xla/service:hlo_runner_interface",
        "//xla/service:platform_util",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:platform_manager",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tests:hlo_test_base",
        "//xla/tests:literal_test_util",
        "//xla/tests:xla_internal_test_main",  # build_cleaner: keep
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@llvm-project//mlir:IR",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "amdgpu_compiler",
    srcs = [
        "amdgpu_compiler_registration.cc",
    ],
    tags = [
        "gpu",
        "manual",
        "rocm-only",
    ],
    deps = [
        ":amdgpu_compiler_impl",
        "//xla/service:compiler",
        "//xla/stream_executor/rocm:rocm_platform_id",
    ],
    alwayslink = True,  # Contains compiler registration
)

cc_library(
    name = "amdgpu_compiler_impl",
    srcs = [
        "amdgpu_compiler.cc",
    ],
    hdrs = [
        "amdgpu_compiler.h",
    ],
    tags = [
        "gpu",
        "manual",
        "rocm-only",
    ],
    deps = [
        ":alias_info",
        ":cublas_padding_requirements",
        ":gpu_compiler",
        ":target_constants",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/hlo/transforms/simplifiers:algebraic_simplifier",
        "//xla/hlo/transforms/simplifiers:convert_mover",
        "//xla/hlo/transforms/simplifiers:dot_dimension_merger",
        "//xla/hlo/transforms/simplifiers:float_normalization",
        "//xla/hlo/transforms/simplifiers:hlo_constant_folding",
        "//xla/hlo/transforms/simplifiers:reshape_mover",
        "//xla/hlo/transforms/simplifiers:tuple_simplifier",
        "//xla/pjrt/distributed:key_value_store_interface",
        "//xla/service:call_inliner",
        "//xla/service:float_support",
        "//xla/service:hlo_module_config",
        "//xla/service:hlo_verifier",
        "//xla/service/gpu/autotuning:autotuner_util",
        "//xla/service/gpu/autotuning:conv_algorithm_picker",
        "//xla/service/gpu/autotuning:gemm_algorithm_picker",
        "//xla/service/gpu/autotuning:gemm_fusion_autotuner",
        "//xla/service/gpu/llvm_gpu_backend:amdgpu_backend",
        "//xla/service/gpu/transforms:algebraic_simplifier",
        "//xla/service/gpu/transforms:conv_padding_legalization",
        "//xla/service/gpu/transforms:conv_rewriter",
        "//xla/service/gpu/transforms:cublas_pad_for_gemms",
        "//xla/service/gpu/transforms:cudnn_fused_conv_rewriter",
        "//xla/service/gpu/transforms:gpusolver_rewriter",
        "//xla/service/gpu/transforms:triangular_solve_rewriter",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:dnn",
        "//xla/stream_executor:semantic_version",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor/rocm:rocm_platform_id",
        "//xla/stream_executor/rocm:rocm_solver_context",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@llvm-project//llvm:ir_headers",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "xfeed_queue",
    hdrs = ["xfeed_queue.h"],
    deps = [
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/synchronization",
        "@local_tsl//tsl/platform:logging",
    ],
)

cc_library(
    name = "io_feed_manager",
    srcs = [
        "infeed_manager.cc",
        "outfeed_manager.cc",
    ],
    hdrs = [
        "infeed_manager.h",
        "outfeed_manager.h",
    ],
    deps = [
        ":xfeed_queue",
        "//xla:literal",
        "//xla:shape_tree",
        "//xla:shape_util",
        "//xla:util",
        "//xla/stream_executor:device_memory_handle",
        "//xla/stream_executor:stream_executor_h",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/synchronization",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:notification",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "gpu_hlo_schedule",
    srcs = ["gpu_hlo_schedule.cc"],
    hdrs = ["gpu_hlo_schedule.h"],
    deps = [
        ":alias_info",
        ":backend_configs_cc",
        ":flag_utils",
        ":gpu_latency_hiding_scheduler",
        ":ir_emission_utils",
        "//xla:shape_util",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/hlo/transforms/collectives:async_collective_creator",
        "//xla/hlo/transforms/simplifiers:hlo_memory_scheduler",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:buffer_value",
        "//xla/service:hlo_module_config",
        "//xla/service:latency_hiding_scheduler",
        "//xla/service:legalize_scheduling_annotations",
        "//xla/service:p2p_schedule_preparation",
        "//xla/service:profile_guided_latency_estimator",
        "//xla/service/gpu/model:analytical_latency_estimator",
        "//xla/service/gpu/model:gpu_hlo_cost_analysis",
        "//xla/service/gpu/model:sol_latency_estimator",
        "//xla/service/gpu/transforms:pgle_accuracy_checker",
        "//xla/service/gpu/transforms:scheduling_instruction_annotator",
        "//xla/service/gpu/transforms/collectives:async_collective_annotator",
        "//xla/service/gpu/transforms/collectives:collective_ops_utils",
        "//xla/stream_executor:device_description",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:errors",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:path",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/profiler/lib:traceme",
        "@local_tsl//tsl/profiler/protobuf:profiled_instructions_proto_cc",
    ],
)

xla_test(
    name = "gpu_hlo_schedule_test",
    srcs = [
        "gpu_hlo_schedule_test.cc",
    ],
    backends = ["gpu"],
    deps = [
        ":alias_info",
        ":gpu_compiler",
        ":gpu_hlo_schedule",
        "//xla:shape_util",
        "//xla/hlo/analysis:hlo_ordering",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:filecheck",
        "//xla/hlo/testlib:verified_hlo_module",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:backend",
        "//xla/service:hlo_module_config",
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "//xla/tests:test_utils",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:logging",
        "//xla/tsl/platform:status",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/base:log_severity",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:log_sink",
        "@com_google_absl//absl/log:scoped_mock_log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/profiler/protobuf:profiled_instructions_proto_cc",
    ],
)

cc_library(
    name = "gpu_spmd_pipeline",
    srcs = ["gpu_spmd_pipeline.cc"],
    hdrs = ["gpu_spmd_pipeline.h"],
    deps = [
        ":runtime_intrinsics",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/pass:hlo_pass",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/hlo/transforms/simplifiers:algebraic_simplifier",
        "//xla/hlo/transforms/simplifiers:hlo_constant_folding",
        "//xla/hlo/transforms/simplifiers:hlo_constant_splitter",
        "//xla/hlo/transforms/simplifiers:hlo_dce",
        "//xla/hlo/transforms/simplifiers:reshape_mover",
        "//xla/hlo/transforms/simplifiers:sort_simplifier",
        "//xla/hlo/transforms/simplifiers:tuple_simplifier",
        "//xla/service:conditional_simplifier",
        "//xla/service:gather_expander",
        "//xla/service:hlo_module_config",
        "//xla/service:scatter_expander",
        "//xla/service:sharding_propagation",
        "//xla/service:while_loop_constant_sinking",
        "//xla/service:while_loop_simplifier",
        "//xla/service/gpu/transforms:algebraic_simplifier",
        "//xla/service/spmd:collective_permute_motion",
        "//xla/service/spmd:stateful_rng_spmd_partitioner",
        "//xla/service/spmd/shardy:shardy_xla_pass",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/functional:function_ref",
        "@com_google_absl//absl/log:check",
    ],
)

xla_cc_test(
    name = "gpu_spmd_pipeline_test",
    srcs = [
        "gpu_spmd_pipeline_test.cc",
    ],
    deps = [
        ":gpu_spmd_pipeline",
        "//xla:shape_util",
        "//xla:util",
        "//xla/client:executable_build_options",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/parser:hlo_parser",
        "//xla/hlo/pass:hlo_pass_pipeline",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/transforms/simplifiers:algebraic_simplifier",
        "//xla/service:hlo_module_config",
        "//xla/service/spmd/shardy:constants",
        "//xla/stream_executor:device_description",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "while_transformer_test",
    srcs = ["while_transformer_test.cc"],
    tags = [
        "nomsan",
    ],
    deps = [
        "//xla:comparison_util",
        "//xla:literal_util",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/analysis:while_loop_analysis",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/testlib:test",
        "//xla/tests:xla_internal_test_main",
    ],
)

cc_library(
    name = "stream_executor_util",
    srcs = ["stream_executor_util.cc"],
    hdrs = ["stream_executor_util.h"],
    compatible_with = get_compatible_with_portable(),
    copts = tsl_copts(),
    deps = [
        ":cublas_cudnn",
        ":launch_dimensions",
        "//xla:autotuning_proto_cc",
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_module_config",
        "//xla/stream_executor:data_type",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:dnn",
        "//xla/stream_executor:kernel",
        "//xla/stream_executor:kernel_spec",
        "//xla/stream_executor:launch_dim",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:stream",
        "//xla/stream_executor:stream_executor_h",
        "//xla/stream_executor:typed_kernel_factory",
        "//xla/stream_executor/gpu:gpu_kernel_registry",
        "//xla/stream_executor/gpu:repeat_buffer_kernel",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/protobuf:dnn_proto_cc",
        "//xla/tsl/util/proto:proto_utils",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
        "@com_google_absl//absl/types:span",
        "@eigen_archive//:eigen3",
        "@local_tsl//tsl/platform:ml_dtypes",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_cc_test(
    name = "stream_executor_util_test",
    srcs = ["stream_executor_util_test.cc"],
    deps = [
        ":stream_executor_util",
        "//xla:autotuning_proto_cc",
        "//xla/service:hlo_module_config",
        "//xla/tsl/util/proto:proto_utils",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/time",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "gpu_asm_opts_util",
    srcs = ["gpu_asm_opts_util.cc"],
    hdrs = ["gpu_asm_opts_util.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        "//xla:xla_proto_cc",
        "//xla/stream_executor/gpu:gpu_asm_opts",
    ],
)

xla_cc_test(
    name = "gpu_asm_opts_util_test",
    srcs = ["gpu_asm_opts_util_test.cc"],
    deps = [
        ":gpu_asm_opts_util",
        "//xla:xla_proto_cc",
        "//xla/tests:xla_internal_test_main",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:test",
    ],
)

cc_library(
    name = "hlo_fusion_analysis",
    srcs = ["hlo_fusion_analysis.cc"],
    hdrs = ["hlo_fusion_analysis.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        ":backend_configs_cc",
        ":ir_emission_utils",
        ":reduction_utils",
        "//xla:shape_util",
        "//xla/codegen:hlo_fusion_spec",
        "//xla/codegen:ir_emission_utils",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_traversal",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@llvm-project//llvm:Support",
    ],
)

xla_cc_test(
    name = "hlo_fusion_analysis_test",
    srcs = ["hlo_fusion_analysis_test.cc"],
    deps = [
        ":backend_configs_cc",
        ":gpu_device_info_for_tests",
        ":hlo_fusion_analysis",
        ":ir_emission_utils",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/utils:hlo_traversal",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_description_proto_cc",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/util/proto:proto_matchers",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "alias_info",
    srcs = ["alias_info.cc"],
    hdrs = ["alias_info.h"],
    deps = [
        ":backend_configs_cc",
        ":hlo_fusion_analysis",
        "//xla:shape_util",
        "//xla/hlo/analysis:alias_info",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:device_description_proto_cc",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_set",
        "@llvm-project//llvm:Support",
    ],
)

xla_cc_test(
    name = "alias_info_test",
    srcs = ["alias_info_test.cc"],
    deps = [
        ":alias_info",
        ":gpu_device_info_for_tests",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/testlib:test",
        "//xla/hlo/testlib:test_helpers",
        "//xla/service:copy_insertion",
        "//xla/stream_executor:device_description",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "nvptx_alias_info",
    srcs = ["nvptx_alias_info.cc"],
    hdrs = ["nvptx_alias_info.h"],
    deps = [
        ":alias_info",
        ":backend_configs_cc",
        ":cublas_cudnn",
        ":ir_emission_utils",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
    ],
)

cc_library(
    name = "gpu_fusible",
    srcs = ["gpu_fusible.cc"],
    hdrs = ["gpu_fusible.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        ":backend_configs_cc",
        ":hlo_fusion_analysis",
        ":ir_emission_utils",
        ":launch_dimensions",
        ":reduction_utils",
        "//xla:permutation_util",
        "//xla:shape_util",
        "//xla:side_effect_util",
        "//xla:util",
        "//xla/hlo/analysis:hlo_dataflow_analysis",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_traversal",
        "//xla/service:instruction_fusion",
        "//xla/stream_executor:device_description",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/synchronization",
    ],
)

xla_cc_test(
    name = "gpu_fusible_test",
    srcs = ["gpu_fusible_test.cc"],
    tags = [
        "nomsan",
    ],
    deps = [
        ":gpu_fusible",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/parser:hlo_parser",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:instruction_fusion",
        "//xla/stream_executor:device_description",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/platform:test",
        "@com_google_absl//absl/strings",
    ],
)

xla_test(
    name = "float_support_test",
    srcs = ["float_support_test.cc"],
    backends = [
        "a100",
        "h100",
        "b200",
        "amdgpu_any",
    ],
    deps = [
        "//xla:error_spec",
        "//xla:xla_proto_cc",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/functional:overload",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
    ],
)

xla_test(
    name = "conv_layout_normalization_test",
    srcs = ["conv_layout_normalization_test.cc"],
    backends = ["gpu"],
    deps = [
        "//xla:error_spec",
        "//xla/hlo/ir:hlo",
        "//xla/service/gpu/tests:gpu_codegen_test",  # fixdeps: keep
        "//xla/stream_executor:device_description",
        "//xla/tests:hlo_test_base",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:test",
    ],
)

cc_library(
    name = "hlo_algorithm_denylist",
    srcs = ["hlo_algorithm_denylist.cc"],
    hdrs = ["hlo_algorithm_denylist.h"],
    deps = [
        ":backend_configs_cc",
        "//xla:autotuning_proto_cc",
        "//xla:debug_options_flags",
        "//xla/hlo/ir:backend_config",
        "//xla/hlo/ir:hlo",
        "//xla/service/gpu/autotuning:gpu_autotuning_proto_cc",
        "//xla/stream_executor:dnn",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:status",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:status",
    ],
)

xla_cc_test(
    name = "hlo_algorithm_denylist_test",
    srcs = ["hlo_algorithm_denylist_test.cc"],
    data = ["data/hlo_algorithm_denylist.pbtxt"],
    deps = [
        ":hlo_algorithm_denylist",
        "//xla:debug_options_flags",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/testlib:verified_hlo_module",
        "//xla/service:hlo_proto_cc",
        "//xla/stream_executor:dnn",
        "//xla/tests:test_utils",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:status_matchers",
        "//xla/tsl/platform:statusor",
        "//xla/tsl/platform:test",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:path",
    ],
)

xla_cc_test(
    name = "gpu_float_support_test",
    srcs = ["gpu_float_support_test.cc"],
    deps = [
        ":backend_configs_cc",
        ":gpu_float_support",
        ":ir_emission_utils",
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/hlo/testlib:test_helpers",
        "//xla/hlo/transforms/simplifiers:float_normalization",
        "//xla/service:hlo_verifier",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor/cuda:cuda_compute_capability",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "metrics",
    srcs = ["metrics.cc"],
    hdrs = ["metrics.h"],
    deps = [
        "//xla/tsl/lib/monitoring:counter",
        "//xla/tsl/lib/monitoring:gauge",
        "//xla/tsl/lib/monitoring:sampler",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@local_tsl//tsl/platform",
        "@local_tsl//tsl/platform:stacktrace",
    ],
)

xla_cc_test(
    name = "metrics_test",
    srcs = ["metrics_test.cc"],
    tags = [
        # Streamz recording doesn't work in OSS.
        "no_oss",
    ],
    deps = [
        ":metrics",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/lib/monitoring:collected_metrics",
        "//xla/tsl/lib/monitoring:collection_registry",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:test",
    ],
)

tsl_gpu_library(
    name = "runtime_intrinsics",
    srcs = ["runtime_intrinsics.cc"],
    hdrs = ["runtime_intrinsics.h"],
    deps = [
        "//xla:shape_util",
        "//xla:util",
        "//xla:xla_data_proto_cc",
        "//xla/service:collective_ops_utils",
        "//xla/service:custom_call_status",
        "//xla/service:custom_call_target_registry",
        "//xla/service:platform_util",
        "//xla/stream_executor:device_memory",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:platform_manager",
        "//xla/stream_executor:stream",
        "//xla/stream_executor:stream_finder",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:statusor",
    ],
    alwayslink = 1,
)

xla_test(
    name = "runtime_intrinsics_test",
    srcs = ["runtime_intrinsics_test.cc"],
    backends = ["gpu"],
    deps = [
        ":runtime_intrinsics",
        "//xla/hlo/ir:hlo",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "hlo_fusion_stats",
    srcs = ["hlo_fusion_stats.cc"],
    hdrs = ["hlo_fusion_stats.h"],
    deps = [
        "//xla/hlo/ir:hlo",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@local_tsl//tsl/platform:errors",
    ],
)

xla_cc_test(
    name = "hlo_fusion_stats_test",
    srcs = ["hlo_fusion_stats_test.cc"],
    tags = [
        "nomsan",
    ],
    deps = [
        ":hlo_fusion_stats",
        "//xla/hlo/parser:hlo_parser",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/lib/core:status_test_util",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest",
    ],
)

cc_library(
    name = "conv_layout_normalization",
    srcs = ["conv_layout_normalization.cc"],
    hdrs = ["conv_layout_normalization.h"],
    deps = [
        ":cublas_cudnn",
        "//xla:shape_util",
        "//xla:status_macros",
        "//xla:util",
        "//xla/hlo/ir:hlo",
        "//xla/service:hlo_creation_utils",
        "@com_google_absl//absl/status:statusor",
        "@local_tsl//tsl/platform:protobuf",
        "@local_tsl//tsl/platform:statusor",
    ],
)

xla_test(
    name = "dot_algorithm_support_test",
    srcs = ["dot_algorithm_support_test.cc"],
    backends = [
        "v100",
        "a100",
        "h100",
        "b200",
        "amdgpu_any",
    ],
    tags = [
        "no_mac",
    ],
    deps = [
        "//xla:shape_util",
        "//xla:xla_data_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:semantic_version",
        "//xla/tests:hlo_test_base",
        "//xla/tests:xla_internal_test_main",  # fixdeps: keep
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_googletest//:gtest",
    ],
)

cc_library(
    name = "kernel_reuse_cache",
    srcs = ["kernel_reuse_cache.cc"],
    hdrs = ["kernel_reuse_cache.h"],
    deps = [
        ":executable_proto_cc",
        ":launch_dimensions",
        "//xla:status_macros",
        "//xla:util",
        "//xla/codegen/emitters:kernel_arguments",
        "//xla/hlo/ir:hlo",
        "//xla/stream_executor:launch_dim",
        "//xla/stream_executor/gpu:tma_metadata",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
        "@local_tsl//tsl/platform:env",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
    ],
)

xla_cc_test(
    name = "kernel_reuse_cache_test",
    srcs = ["kernel_reuse_cache_test.cc"],
    deps = [
        ":executable_proto_cc",
        ":kernel_reuse_cache",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/lib/core:status_test_util",
        "@com_google_absl//absl/log:check",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:env",
    ],
)

xla_test(
    name = "determinism_test",
    srcs = ["determinism_test.cc"],
    backends = ["gpu"],
    deps = [
        "//xla:literal",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:filecheck",
        "//xla/service:backend",
        "//xla/service:platform_util",
        "//xla/service/gpu/autotuning:autotuner_util",
        "//xla/service/gpu/tests:gpu_codegen_test",
        "//xla/stream_executor:device_description",
        "//xla/stream_executor:mock_stream_executor",
        "//xla/stream_executor:platform",
        "//xla/stream_executor:stream_executor_h",
        "//xla/tests:hlo_test_base",
        "//xla/tests:literal_test_util",
        "//xla/tests:test_utils",
        "//xla/tsl/lib/core:status_test_util",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "gpu_symbol_repository",
    hdrs = ["gpu_symbol_repository.h"],
    compatible_with = get_compatible_with_portable(),
    deps = [
        "//xla:autotune_results_proto_cc",
        "//xla:xla_proto_cc",
        "//xla/service:symbol_repository",
    ],
)

cc_library(
    name = "execution_stream_assignment",
    srcs = ["execution_stream_assignment.cc"],
    hdrs = ["execution_stream_assignment.h"],
    deps = [
        "//xla:side_effect_util",
        "//xla/backends/gpu/runtime:thunk",
        "//xla/hlo/ir:hlo",
        "//xla/service:call_graph",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
    ],
)

xla_cc_test(
    name = "execution_stream_assignment_test",
    srcs = ["execution_stream_assignment_test.cc"],
    deps = [
        ":execution_stream_assignment",
        "//xla/backends/gpu/runtime:thunk",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/tsl/platform:status_matchers",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "gpu_latency_hiding_scheduler",
    srcs = ["gpu_latency_hiding_scheduler.cc"],
    hdrs = ["gpu_latency_hiding_scheduler.h"],
    deps = [
        ":backend_configs_cc",
        ":cublas_cudnn",
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/utils:hlo_query",
        "//xla/service:collective_ops_utils",
        "//xla/service:collective_permute_decomposer",
        "//xla/service:hlo_cost_analysis",
        "//xla/service:latency_hiding_scheduler",
        "//xla/service:profile_guided_latency_estimator",
        "//xla/service/gpu/transforms/collectives:collective_ops_utils",
        "//xla/stream_executor:stream_executor_h",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings:string_view",
    ],
)

xla_cc_test(
    name = "gpu_latency_hiding_scheduler_test",
    srcs = ["gpu_latency_hiding_scheduler_test.cc"],
    deps = [
        ":alias_info",
        ":gpu_device_info_for_tests",
        ":gpu_hlo_schedule",
        ":gpu_latency_hiding_scheduler",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "//xla/service:hlo_module_config",
        "//xla/service:profile_guided_latency_estimator",
        "//xla/tests:xla_internal_test_main",
        "//xla/tsl/lib/core:status_test_util",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@com_google_googletest//:gtest",
        "@local_tsl//tsl/platform:errors",
        "@local_tsl//tsl/platform:logging",
        "@local_tsl//tsl/platform:statusor",
    ],
)

cc_library(
    name = "fusion_deduplication_cache",
    srcs = ["fusion_deduplication_cache.cc"],
    hdrs = ["fusion_deduplication_cache.h"],
    deps = [
        "//xla:shape_util",
        "//xla/hlo/ir:hlo",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/functional:function_ref",
        "@com_google_absl//absl/hash",
        "@com_google_absl//absl/log:check",
        "@com_google_absl//absl/strings",
    ],
)

xla_cc_test(
    name = "fusion_deduplication_cache_test",
    srcs = ["fusion_deduplication_cache_test.cc"],
    deps = [
        ":fusion_deduplication_cache",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/testlib:hlo_hardware_independent_test_base",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:status",
        "@local_tsl//tsl/platform:statusor",
        "@local_tsl//tsl/platform:test",
    ],
)

cc_library(
    name = "ptx_compile_options_from_debug_options",
    srcs = ["ptx_compile_options_from_debug_options.cc"],
    hdrs = ["ptx_compile_options_from_debug_options.h"],
    deps = [
        "//xla:xla_proto_cc",
        "//xla/stream_executor/cuda:compilation_options",
    ],
)

xla_cc_test(
    name = "ptx_compile_options_from_debug_options_test",
    srcs = ["ptx_compile_options_from_debug_options_test.cc"],
    deps = [
        ":ptx_compile_options_from_debug_options",
        "//xla:xla_proto_cc",
        "//xla/stream_executor/cuda:compilation_options",
        "@com_google_googletest//:gtest_main",
        "@local_tsl//tsl/platform:test",
    ],
)

cc_library(
    name = "flag_utils",
    hdrs = ["flag_utils.h"],
    deps = [
        "//xla/hlo/ir:hlo",
        "//xla/service:collective_pipeliner",
        "//xla/service:hlo_module_config",
        "//xla/service:latency_hiding_scheduler",
        "//xla/service/gpu/transforms:double_buffer_loop_unrolling",
    ],
)

xla_cc_test(
    name = "flag_utils_test",
    srcs = ["flag_utils_test.cc"],
    deps = [
        ":flag_utils",
        "//xla:xla_proto_cc",
        "//xla/hlo/ir:hlo",
        "//xla/hlo/transforms/simplifiers:hlo_dce",
        "//xla/service:collective_pipeliner",
        "//xla/service:hlo_module_config",
        "//xla/service:latency_hiding_scheduler",
        "//xla/service/gpu/transforms:double_buffer_loop_unrolling",
        "@com_google_googletest//:gtest_main",
    ],
)

cc_library(
    name = "resource_requests",
    srcs = ["resource_requests.cc"],
    hdrs = ["resource_requests.h"],
    deps = [
        ":gpu_executable_run_options",
        "//xla/backends/gpu/collectives:gpu_clique",
        "//xla/backends/gpu/collectives:gpu_clique_key",
        "//xla/backends/gpu/collectives:gpu_cliques",
        "//xla/backends/gpu/runtime:thunk",
        "//xla/core/collectives:rank_id",
        "//xla/tsl/platform:env",
        "//xla/tsl/platform:statusor",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/log",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/synchronization",
        "@local_tsl//tsl/profiler/lib:traceme",
        "@local_tsl//tsl/profiler/lib:traceme_encode",
    ],
)
