o
    Ih                  	   @   s
  U d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
Z	d dlmZ d dlmZmZmZ e jdddkZdZdee fd	d
Zdee fddZdee fddZdee fddZdee fddZdefddZe jdddkZdZdZdZeddddZ ee!d< e Z"ee e!d< e Z#ee e!d< dZ$ee!d< e Z%ee e!d< e Z&ee e!d< e jd dkZ'ee!d!< dZ(ee) e!d"< d#Z*ed$ e!d%< d#Z+ed$ e!d&< e jd'd(dkZ,ee!d)< e jd*ddkZ-dZ.dZ/e jd+ddkZ0e jd,dkZ1e jd-ddkZ2dZ3dZ4dZ5e jd.d(dkZ6e jd/dkZ7e jd0d1Z8ed2 e!d3< dZ9dZ:e Z;dZ<dZ=dZ>dZ?e	j@jAjBe!d4< dZCe	j@jAjBe!d5< dZDeee	jEjFgdf  e!d6< dZGeee	jEjFgdf  e!d7< dZHeee	jEjIjFgdf  e!d8< dZJeeeKd9 geKd9 f  e!d:< dZLdZMdZNdZOdZPi ZQeReSeReSef f e!d;< i ZTeReSeReSef f e!d<< dZUe jd=ddkZVdZWdZXdd>d?dd@ZYeReSef e!dA< dBZZedC e!dD< dZ[g dEZ\eKeeSeeKd9 geKd9 f f  e!dF< dZ]dGZ^dHZ_dIZ`eddJdKdLZaee!dM< e jdNdkZbe jdOdkZce jdPdkZddQZeee) e!dR< dZfe r.dne jdSdkZge jdTdUh Zie jdVdWh Zje jdXdYh ZkedZ e!d[< e jd\ddkZld]Zme jd^dkZne jd_dkZoe jd`dkZpdaZqdbZrdcZse jdddkZte jdedkZue jdfdkZve)e jdgdZwe jdhdiZxe jdjdkZydleSdefdmdnZzdleSdefdodpZ{dleSdefdqdrZ|e jdsdYZ}e	j~jsdnd(Ze jdtedkZe jdud(dkZe jdvddkZe jdwdkZdxZdyZdzZdZdZdZe jd{dkZee!d|< e jd}dkZee!d~< e jddiZe jdd(dkZee!d< dQZe jdddkZd?ZdZdzZdZdzZdZdZdZe jdd(dkZdZdZdZdZdZdZd?Zd?ZdZdZdZe jdd(dkZde	jv pde	jv Ze peZe jdddkZdZeeS e!d< dZeeS e!d< deSfddZe ZeSe!d< dZdIZddgZeKeed eSf  e!d< dZee!d< G dd dZdefddZde)fddZe rdne Zee) e!d< eeS e!d< e r'z d dlmZ eree jede jdZnedZW n eefy&   dZY nw dZdQZe jdddkZe jdddkZdZdZdZdZdZdZdZee!d< e jdd(dkZdZdZdZdZe jddiZedikZedkrrdineZe jddZeeS e!d< e jddkZdZe jdd(dkZee!d< dZee!d< dZee!d< dZee!d< dZee!d< dZee!d< dZee!d< e jdd(dkZe jdd(dkZee!d< G dd dZG dd dZG dd dZG dd dZG dd dZdZed e!d< dZed e!d< G dd dZG ddĄ dăZg dŢZeKeS e!d< g dǢZeKeS e!d< g ZeKee	je	je	jgdf  e!d< G dd˄ d˃ZerGd dlT eeje  dS )    N)AnyCallableLiteralOptionalTYPE_CHECKINGUnion)	is_fbcode)Configget_tristate_envinstall_config_moduleTORCHINDUCTOR_INPLACE_PADDING1Freturnc                   C      t dS )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr
    r   r   J/var/www/vscode/kcb/lib/python3.10/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_default      r   c                   C   s,   t jddkr
dS t jddkrdS d S )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   r   vec_isa_ok_default   s
   r   c                   C   r   )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   autotune_remote_cache_default   r   r   c                   C   r   )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   %bundled_autotune_remote_cache_default   r   r   c                   C   s   t dt sdS d S )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r
   r   r   r   r   r   )bundle_triton_into_fx_graph_cache_default#   s   r!   c                  C   s@   d} dt jv rt jddkS t rd}tj|}|| kS dS )Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionT)r   r   r   r   torch_utils_internaljustknobs_getval_int)ENABLE_PROLOGUE_FUSION_VERSIONjk_nameversionr   r   r   prologue_fusion_enabled*   s   
r)   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2Tz0pytorch/remote_cache:enable_local_fx_graph_cacheTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_forcedefaultfx_graph_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cacheautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cache"TORCHINDUCTOR_FORCE_DISABLE_CACHESforce_disable_cachessleep_sec_TESTING_ONLYneeds_fixed_stride_order)r8   flexible_layout#custom_op_default_layout_constraint'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapperTORCHINDUCTOR_ONLINE_SOFTMAXTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)nonerE   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)r.   tritonatenrY   mixed_mm_choice)reorder_compute_for_overlap
sink_waitsraise_comms'reorder_for_compute_comm_overlap_passesr.   i,     *TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)r.   r-   r,   use_experimental_benchmarkerTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM
   autotune_num_choices_displayed"TORCHINDUCTOR_FORCE_SAME_PRECISION(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPP(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)rm   
EXHAUSTIVEmax_autotune_gemm_search_space'TORCHINDUCTOR_AUTOTUNE_FALLBACK_TO_ATENi    #TORCHINDUCTOR_SEARCH_AUTOTUNE_CACHETORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g      ?g       @#TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUS#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT TORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mmnamec                 C   s   t | pt| S )N)collect_autoheuristicuse_autoheuristicr|   r   r   r   run_autoheuristic  s   r   c                 C      | t jjjdv S N,)r#   	_inductorconfigautoheuristic_collectsplitr   r   r   r   r}        r}   c                 C   r   r   )r#   r   r   autoheuristic_user   r   r   r   r   r~     r   r~   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT         TORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLES(TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusion'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   TORCHINDUCTOR_BENCHMARK_KERNEL%TORCHINDUCTOR_EMULATE_PRECISION_CASTSdevgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesc                  C   s4   dt jv rt jd } nd} | dv sJ d|  | S )NTORCHINDUCTOR_WORKER_START
subprocess)r   forkspawnzInvalid start method: )r   r   )start_methodr   r   r   decide_worker_start_methodo  s   

r   worker_start_methodfuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tpc                   @   s&   e Zd ZU dZeed< dZeed< dS )_collectiveFauto_selecti   #one_shot_all_reduce_threshold_bytesN)__name__
__module____qualname__r   bool__annotations__r   intr   r   r   r   r     s   
 r   c                  C   s   d} d}t j|}| |kS )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    rS   z0pytorch/inductor:enable_parallel_compile_version)r#   r$   r%   )ENABLE_PARALLEL_COMPILE_VERSIONr'   r(   r   r   r   #parallel_compile_enabled_internally  s   r   c                  C   s   ddl } | t}dtjv rttjd }|d| |S tjdkr+d}|d |S t	 r:t
 s:d}|d |S ttd	rFttdnt }|sNJ td
|}|d| |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32rS   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcodesched_getaffinity    zcompile_threads set to %d)logging	getLoggerr   r   r   r   infosysplatformr   r   hasattrlenr   	cpu_countmin)r   logcompile_threadsr   r   r   r   decide_compile_threads  s,   





r   r   global_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   i   force_shape_padTORCHINDUCTOR_PERMUTE_FUSIONTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmassume_aligned_inputs.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_trainingc                   @   s  e Zd ZU dZejdddkZejdddkZdZ	e
e ed< eejdd	Zdejd
ejdkr6dndfZeed ef ed< ejdddkZejdddkZdZe
e ed< dZe
e ed< edZe
e ed< dZeeed f ed< eejddZejdddkZejdddkZejddZ ejdddkZ!dZ"eejd dZ#ejd!dZ$ejd"dZ%d#Z&dZ'dS )$cpp$TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE4096CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr#   r   inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST)(r   r   r   threadsr   r   r   no_redundant_loopsdynamic_threadsr   r   r   r   min_chunk_sizer   r   r   tupler   strenable_kernel_profileweight_prepackr   r   r
   r   r   r   r   max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearr   r   r   r   r   g  sL   
 
r   c                   @   s  e Zd ZU ejddkZdZdZdZ	dZ
e rdndZdZdZee ed< dZdZdZdZdZdZdZdZdZeed	< dZdZd
Zee ed< dZeed< dZ dZ!ejdddkZ"ejdddkZ#dZ$e%ee&d f ed< ejdddkZ'ejdddkZ(dZ)eejddZ*e&d ed< dZ+dZ,dZ-dZ.eed< dZ/d
Z0ee1 ed< dZ2ejdddkZ3ejdddkZ4dZ5d
S )rZ   TORCHINDUCTOR_CUDAGRAPHSr   TFr   2   "cudagraph_dynamic_shape_warn_limit   prefer_nd_tilingNautotune_at_compile_timetile_reductions!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr   r   r   r   #TORCHINDUCTOR_PERSISTENT_REDUCTIONS$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   rS   r     multi_kernel      spill_thresholdr   ENABLE_PERSISTENT_TMA_MATMULTORCHINDUCTOR_SKIP_L1)6r   r   r   r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsslow_path_cudagraph_asserts!cudagraph_trees_history_recordingr    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitr  r   r   r   force_cudagraph_syncforce_cudagraphs_warmupfast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexing	max_tilesr  r   autotune_pointwiseautotune_cublasLtr  r   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesr   r   r   persistent_reductionscooperative_reductionsforce_cooperative_reductionsr  divisible_by_16min_split_scan_rblockstore_cubinr  use_block_ptrr   r  codegen_upcast_to_fp32enable_persistent_tma_matmulskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYr   r   r   r   rZ     sf   
 

rZ   c                   @   s0  e Zd ZU dZejdddkZejdddkZejddZ	e
d ed< ejd	d
ZdZdZdZeed< dZeed< dZeed< dZeed< i Zeeef ed< ejdddkZeed< ejdddkZeed< eejddZeed< i Zeeef ed< dZeed< dZeed< dZeed< d
S )aot_inductorry   AOT_INDUCTOR_DEBUG_COMPILEr   r   $AOT_INDUCTOR_COMPILE_WRAPPER_WITH_O0-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightspackagepackage_cpp_onlymetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELr  repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interfaceTpackage_constants_in_so) r   r   r   output_pathr   r   r   debug_compilecompile_wrapper_with_O0rH  r   r   filtered_kernel_namesserialized_in_specserialized_out_specrJ  r   rK  rL  rM  rN  dictr  rP  rR  r   rT  rU  r   rV  rW  rX  r   r   r   r   rB    s4   
 	
rB  c                
   @   s  e Zd ZU dZee ed< dZee ed< dZe	d ed< dZ
dZdZdZejdejejejejd	ZdZee ed
< g dZee ed< dZee ed< dZeed< ejdddkZeed< ejdZ ee ed< ejdZ!ee ed< ejddZ"eed< dS )cudaNarchr(   -O1)-O0rb  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/cutlass_max_profiling_configs)rS   r  r   %cutlass_max_profiling_swizzle_optionscuda_cxxrS   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   generate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_level)#r   r   r   ra  r   r  r   r(   rf  r   enable_cuda_ltoenable_ptxas_infoenable_debug_infouse_fast_mathr   r   r   pathabspathjoindirnamer#   __file__cutlass_dirrh  r   ri  listrj  rk  rm  r   ro  rq  rs  r   r   r   r   r`    s:   
 


r`  c                   @   s   e Zd ZU g Zee ed< ddgZee ed< dZe	d ed< dZ
dZd	Zd	ZdZd
Zee ed< ejdZejdddkZeed< d
Zee ed< dZeed< d
Zeee  ed< dZeed< d
S )rocmra  gfx90agfx942ck_supported_archrd  )	rc  rb  rd  re  z-Osz-Ozz-Ominz-Ofastz-Omaxrf  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   rm  n_max_profiling_configsuse_preselected_instanceskBatch_sweepr  split_k_threshold)r   r   r   ra  r~  r  r   r  rf  r   is_debug
save_tempsrw  flush_denormalsprint_kernel_resource_usager  r   r   r   r   ck_dirrm  r   r  r   r  r  r  r   r   r   r   r  ;  s(   
 
r  )r   rZ   halidecpu_backend)rZ   r  cuda_backendc                   @   sB   e Zd ZU dZdZdZed ed< dZed ed< dZ	dZ
dZd	S )
r  hostz	host-cudaAnderson2021)r  Li2018	Adams2019Mullapudi2016scheduler_cudar  scheduler_cpuFN)r   r   r   
cpu_target
gpu_targetr  r   r   r  assertsdebugscan_kernelsr   r   r   r   r  |  s   
 r  c                   @   s   e Zd ZU ejdddkZejdddkZdZe	e
 ed< dZdZdZdZdZdZdZejd	ddkZejd
ddkZejddZejddZdZdZe	ee
gdf  ed< dZeed< dZeed< dS )traceTORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarlog_autotuning_results1log_inductor_triton_kernel_to_post_grad_node_info)r   r   r   r   r   r   enabledsave_real_tensorsr  r   r  r   	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler  r   r  r   r  r   r   r   r   r    s&   
 
r  )ztrace.upload_tarrL   rM   rN   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifier_save_config_ignore)r  zcuda.cutlass_dirr   r   rK   rJ   r   _cache_config_ignore_prefixexternal_matmulc                   @   sR   e Zd ZU dZeed< dZee ed< dZ	dZ
ee ed< dZee ed< dZdS )test_configsF%force_extern_kernel_in_multi_templateNmax_mm_configsautotune_choice_name_regexautotune_choice_desc_regex)r   r   r   r  r   r   r  r   r   runtime_triton_dtype_assertr  r  r  *graphsafe_rng_func_ignores_fallback_randomr   r   r   r   r    s   
 r  )*)r   r   typingr   r   r   r   r   r   r#   !torch._inductor.custom_graph_passtorch._environmentr   torch.utils._config_moduler	   r
   r   r   r   inplace_paddingcan_inplace_pad_graph_inputr   r   r   r   r   r!   r)   enable_auto_functionalized_v2r  disable_progressverbose_progressr/   r   r0   r1   r2   r3   r4   r6   r7   r   r:   r;   r=   online_softmaxdcestatic_weight_shapessize_assertsnan_assertsscalar_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningrw  rI   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_firstpattern_matcherb2b_gemm_passrJ   r   custom_graph_passCustomGraphPassTyperK   rL   fxGraphrM   rN   graphrO   r~  split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionrP   r_  r  rQ   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_muluse_mixed_mmrX   r\    reorder_for_compute_comm_overlapr`   reorder_for_peak_memoryestimate_op_runtimeintra_node_bwinter_node_bwrc   max_autotunemax_autotune_pointwisemax_autotune_gemmrh   graph_partitionforce_same_precisionuppermax_autotune_gemm_backendsmax_autotune_conv_backendsro   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   r   r}   r~   autoheuristic_log_pathr(   hiplayout_opt_defaultlayout_optimizationforce_layout_optimizationkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdfallback_randomimplicit_fallbacksaggressive_fusionr   r   enabled_metric_tablesr   score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_sizemax_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingcompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapesjoint_graph_constant_foldingdebug_index_assertsemulate_precision_casts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr   r   r   r   _fuse_ddp_communication_fuse_ddp_bucket_sizer   r   r   r   r   r   libfb.pyr   __package__get_dir_pathrx  rz  replacesepr   
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastdisable_padding_cpupadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler   permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testing_profile_varprofile_bandwidthprofile_bandwidth_regexr   /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenr   r   r   r   r   r   r   enable_linear_binary_foldingr   r   rZ   rB  r`  r  r  r  r  r  r  r  r  Tensorr  torch.utils._config_typingmodulesr   r   r   r   r   <module>   s>  
  
	 

	%
m 4ZZ;C
$