
    Phf                   
   U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZm Z m!Z! d dl"m"Z" d dl#m$Z$m%Z%m&Z& d dlm'Z' d dlm(Z( d d	lm)Z) d d
lm*Z* d dlm+Z+ d dl,m-Z-m,Z, d dl.m/Z/ d dl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9 d dl:Z:d dl;m<Z<m=Z= d dl>m?Z? d dl@mAZAmBZB d dlCmDZD d dlEmFZFmGZGmHZH d dlImJZJ d dlKmLZLmMZMmNZN e8rd dlOmPZP d dlQmRZR d dlSmTZTmUZU ej                  j                  eX      ZYej                  j                  ej                  j                  eY            Z[ eAj                         rd dl]m^Z^ d dl_m`Z` d dlambZbmcZcmdZdmeZe nd Zbd Zcd ZdddZed Zfd!agdahdd"Zidd#Zj e
j                  el      Zmdd$Znd% Zo G d& d'      Zp G d( d)ep      Zq G d* d+ep      Zrdd,Zsdd-Ztddd.Zu	 d	 	 	 	 	 	 	 dd/Zvddd0Zw	 	 	 d	 	 	 	 	 	 	 	 	 	 	 dd1Zxdd2Zyej                   G d3 d4             Z{ej                   G d5 d6             Z|dd7Z}dd8Z~d9 Zd: Zd; Z G d< d=ej                        Z ej                  d      dd>       Zej                   G d? d@             Z G dA dB      Z	 	 	 	 	 	 	 	 ddCZ G dD dE      Zej                   G dF dG             ZddHZddIZ ej                  dJ      ddK       ZddLZddMZddNZ ej                  d      ddO       Z G dP dQ      Zej                   G dR dSe             Zej                   G dT dUe             Zej                   G dV dWe             Z G dX dYe      Z e       Z e        e       gZ ej                  d      ddZ       Zdd[Zddd]Zddd^Zddd_Zdd`ZddaZddbZddcZdddZddeZddfZ ej                  d      ddg       Z ej                  d      ddh       Zdiedidif	 	 	 	 	 	 	 	 	 ddjZd\d\diedidididif	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddkZddlZ ej                  d      ddm       Z G dn do      Z G dp dq      Zej                  ddr       ZddsZ	 	 	 	 	 	 	 	 ddtZdaduedv<    G dw dx      Z G dy dz      Z G d{ d|      Z G d} d~      ZddZddZddZddZddZ	 	 	 	 	 	 	 	 ddZ G d d      Z G d d      Zd Z	 	 	 	 	 	 	 	 	 	 ddZddZ G d d      ZddZdaded<    G d d      ZeĐj                          y)    )annotationsN)bisect_right)FutureProcessPoolExecutorThreadPoolExecutor)copy)c_void_pcdllCDLL)field)partial)abc)Path)Thread)sleeptime)
ModuleType)	AnyCallableDictListOptionalSetTupleTYPE_CHECKINGUnion)get_interface_for_device get_registered_device_interfaces)counters)configexc)cuda_env)	cache_dirdeveloper_warningis_linux)suggest_memory_format)has_hinthint_intShapeEnv)GraphLowering)ChoiceCaller)	_Faketqdmtqdm)build_paths)_run_build_command)log_global_cache_errorslog_global_cache_statslog_global_cache_valsuse_global_cachec                      y N argskwargss     dC:\Users\daisl\Desktop\realtime-object-detection\venv\Lib\site-packages\torch/_inductor/codecache.pyr0   r0   L           c                      y r5   r6   r7   s     r:   r1   r1   O   r;   r<   c                      y r5   r6   r7   s     r:   r2   r2   R   r;   r<   c                      yNFr6   r6   r<   r:   r3   r3   U       r<   iX  g        c                 &    t         t               a y y r5   )_t0r   r6   r<   r:   _compile_startrD   `   s    
{f r<   c                 J    t         t               } t        | t         z
  z  ad a y y r5   )rC   r   _cumulative_compile_time)t1s    r:   _compile_endrH   f   s'    
V BH,  r<   c                   t         j                  j                  dn,dt         j                  j                  j                  dd       }dt        j
                  j                   t        j
                  j                   }| d| }t        j                  j                  t               |      }t        j                  j                  ||       }t        j                  |d       |S )	Ncpucu. py_Texist_ok)torchversioncudareplacesysversion_infomajorminorospathjoinr#   makedirs)namecu_strpython_versionbuild_foldercpp_wrapper_dircpp_wrapper_build_directorys         r:   cpp_wrapper_cache_dirrd   r   s     ==% 	%--$$,,S"567 
 #**001#2B2B2H2H1IJN$%Qvh/Lggll9;=O"$'',,"EKK+d;&&r<   c                 >    t         j                  j                  dS dS )N
cubin_path
hsaco_path)rR   rS   hipr6   r<   r:   get_cpp_wrapper_cubin_path_nameri      s     ==,,4<F,Fr<   c                      e Zd Ze ej
                  d      dd              Ze ej
                  d      d	d              Ze ej
                  d      d
d              ZddZ	ddZ
ddZy)	CacheBaseNc                 6   	 dd l } | j                  }	 dt        j                  j                  t        j                  j                               j                  it        j                  j                  |ddt        j                  j                  j                  j                  id}t        j                  t!        j"                  |d      j%                  d            j'                         |d	<   |S # t        $ r d }Y w xY w# t        t        f$ r i }Y pw xY w)
Nr   r^   )rT   triton
allow_tf32)devicerS   otherT)	sort_keysutf-8hash)rm   __version__ModuleNotFoundErrorrR   rT   get_device_propertiescurrent_devicer^   rS   backendsmatmulrn   AssertionErrorRuntimeErrorhashlibsha256jsondumpsencode	hexdigest)rm   triton_versionsystems      r:   
get_systemzCacheBase.get_system   s    	"#//N	 EJJ<<

113d "MM..,
 !%.."5"5"<"<"G"G&F$ !JJv.55g>

)+ 	v 5 # 	"!N	"$ - 	F	s#   C3 BD 3D DDDc                     t        t        j                  j                  t	               dt
        j                         d               S )Ncachers   )r   rZ   r[   r\   r#   rk   r   r6   r<   r:   get_local_cache_pathzCacheBase.get_local_cache_path   s0     BGGLLgy7K7K7Mf7UVWWr<   c                     t         j                  Lt        t        j                  j                  t         j                  t        j                         d               S d S )Nrs   )r    global_cache_dirr   rZ   r[   r\   rk   r   r6   r<   r:   get_global_cache_pathzCacheBase.get_global_cache_path   sL    
 &&2 f55y7K7K7Mf7UVW	
 	
r<   c                    t         j                  j                         sy t        j	                         | _        t        j                         | _        t        j                         | _	        y r5   )
rR   rT   is_availablerk   r   r   r   local_cache_pathr   global_cache_pathselfs    r:   __init__zCacheBase.__init__   sG    zz&&(**, ) > > @!*!@!@!Br<   c                    | j                   j                         si S t        | j                         5 }t        j                  |      }d d d        |d   S # 1 sw Y   d   S xY wNr   )r   is_fileopenr~   load)r   local_cache_fplocal_caches      r:   get_local_cachezCacheBase.get_local_cache   sX    $$,,.I$''(N))N3K )7## )7##s   AA"c                B   t         j                  j                  | j                  j                        s+t        j
                  | j                  j                  d       t        t        | j                        t        j                  | j                  |dd             y )NTrP   )r   r      )indent)rZ   r[   existsr   parentr]   write_atomicstrr~   r   r   )r   r   s     r:   update_local_cachezCacheBase.update_local_cache   se    ww~~d33::;KK--44tD%%&JJ$++DQO	
r<   )returnDict[str, Any])r   r   )r   zOptional[Path]r   None)r   r   r   r   )__name__
__module____qualname__staticmethod	functools	lru_cacher   r   r   r   r   r   r6   r<   r:   rk   rk      s    Y  B YX  X Y
  
C$
r<   rk   c                      e Zd ZddZddZy)
LocalCachec                N    | j                         }|}|D ]  }||v r||   } y  |S r5   )r   )r   keysr   	sub_cachekeys        r:   lookupzLocalCache.lookup   s:    $$&	Ce|!#J		  r<   c                   | j                         }|}|dd D ]  }|j                  |i        ||   } |||d   <   | j                  |       y )Nr   )r   
setdefaultr   )r   valuer   r   r   r   s         r:   	set_valuezLocalCache.set_value   s\    $$&	":C  b)!#I  $	$r(&r<   N)r   r   r   Optional[Dict[str, Any]])r   r   r   r   r   r   )r   r   r   r   r   r6   r<   r:   r   r      s    
	'r<   r   c                  X    e Zd Z ej                  d      d        Z	 	 	 	 	 	 	 	 	 	 ddZy)PersistentCacheNc                    | j                   | j                   j                         si S t        | j                         5 }t        j                  |      }d d d        |d   S # 1 sw Y   d   S xY wr   )r   r   r   r~   r   )r   global_cache_fpglobal_caches      r:   get_global_cachez PersistentCache.get_global_cache   sc    !!)1G1G1O1O1QI$(()_99_5L *G$$ *G$$s   A!!A.c                n   t        t        | j                        }t        t        | j                        }t        t        | j                        }i ddfd}t
        j                  st
        j                  r| j                         }	 ||	      st               r || j                         |      s	  |      t        fdD              sJ |	j                  i        |	   j                  i        j                         D ]  \  }
}||	      |
j                         <     	 | j!                  |	       D 
ci c]  }
|
j                         |
    }}
 ||       S t               r || j                         |       S # t        $ r} ||       |d}~ww xY wc c}
w )a  
        Check to see if we have benchmarked the given choice callers. For each
        choice caller:

            1. Check global_cache[name][inputs][choice], return benchmark if cached.
            2. Check local_cache[name][inputs][choice], return benchmark if cached.
            3.
                a. `max_autotune_gemm=True`: benchmark the choice, update
                    local_cache[name][inputs][choice], and return the benchmark.
                b. `max_autotune_gemm=False`: don't benchmark the choice, return nothing.
        Nc                    d}D ]H  }|j                         }|| j                  i       j                  i       v r|       |   |<   Fd} n |r	 ||       |S )z2Check if `cache` contains data for all the choicesTF)cached)hash_keyget)	r   callbackhitchoicechoice_hashchoicesinputsr^   timingss	        r:   check_cachez+PersistentCache.lookup.<locals>.check_cache
  sq    C!$oo/%))D""5"9"9&""EE&+Dk&&9+&FGFO  C " $Jr<   )r   c              3  &   K   | ]  }|v  
 y wr5   r6   ).0r   r   s     r:   	<genexpr>z)PersistentCache.lookup.<locals>.<genexpr>$  s     GwVv0ws   r5   r   bool)r   r1   r   r2   r0   r    max_autotunemax_autotune_gemmr   r3   r   allr   itemsr   r{   r   )r   r   r^   r   	benchmark	log_statslog_vals
log_errorsr   r   r   timingetimings_to_logr   s    ```          @r:   r   zPersistentCache.lookup   s   & 2DKKvN	0$++tVL4dkk4P
	 	  &":":..0K{+ " 5 5 7)L'0GGwGGGG**44%00<*1--/GMD)&1&//2CD +: ''4 FM"EL6FOO%wv6W  " (  --/)D ! $ qMG"s    A6F F2	F/ 
F**F/)
r   zList[ChoiceCaller]r^   r   r   r   r   z*Callable[[Any], Dict[ChoiceCaller, float]]r   zDict[ChoiceCaller, float])r   r   r   r   r   r   r   r6   r<   r:   r   r      s\    Y% %H#H H 	H
 >H 
#Hr<   r   c                     t         j                  j                  t               d      } t         j                  j	                  |       st        j
                  | d       | S )NlocksTrP   )rZ   r[   r\   r#   r   r]   )lock_dirs    r:   get_lock_dirr   =  s;    ww||IK1H77>>(#
Ht,Or<   c                    t        j                  t        j                  |       j	                               d d j                  d      j                         S )N3   rr   )base64	b32encoder|   r}   digestdecodelower)datas    r:   sha256_hashr   D  s@    GNN40779:3B?FFwOUUWWr<   c                    t        | t              r| n| j                  d      }|dk7  r|dz   |j                  d      z   }dt        |      z   S )Nrr   rM   s   ||c)
isinstancebytesr   r   )codeextrahashing_strs      r:   	code_hashr   I  sJ    $T51$t{{77KK{!E)ELL,AA[)))r<   c                F   |rKt         j                  j                  |      r|}nTt         j                  j                  t	               |      }n+t         j                  j                  t	               | dd       }t         j                  j                  ||  d|       }| ||fS )N      rL   )rZ   r[   isabsr\   r#   )basename	extensionspecified_dirsubdirr[   s        r:   get_pathr   P  sz     77=='"FWW\\)+}=Fik8Aa=977<<8*Ai[ 9:DVT!!r<   c                p    |dk(  rt        | |      S |dv rt        t        |             S t        d|       )Nr   )cubinhsacozUnknown hash type )r   reprrz   )contentr   	hash_types      r:   get_hashr   ^  sB    F%((&&g''
-i[9
::r<   c                $   t        | j                         ||      }t        |||      \  }}}t        j                  j                  |      st        j                  |d       t        j                  j                  |      st        ||        ||fS )NTrP   )r   stripr   rZ   r[   r   r]   r   )	r   r   r   r   r   r   r   r   r[   s	            r:   writer  f  so     	:C%c9mDHfd77>>&!
FT*77>>$T7#T>r<   c                   t        |t        t        f      sJ d       t        j                  |       } | j
                  dt        j                          dt        j                          dz  }t        |t              rdnd}|j                  |      5 }|j                  |       d d d        |j                  |        y # 1 sw Y   xY w)Nz6Only strings and byte arrays can be saved in the cacherL   z.tmpwwb)r   r   r   pathlibr   r   rZ   getpid	threading	get_identr   r  rename)r[   r   tmp_path
write_modefs        r:   r   r   y  s     #u @?@  <<D{{qQy/B/B/D.ETJJH"7C0dJ	z	"a	 
#OOD 
#	"s   B>>Cc                      e Zd ZU dZded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   y)TensorMetadatazG
    The Tensor metadata relevant when hashing FxGraph cache keys.
    torch.dtypedtypez
torch.SizeshapezTuple[Any, ...]stridetorch.devicero   ztorch.layoutlayoutzOptional[torch.memory_format]memory_formatintstorage_offsetr   requires_gradis_quantizedis_conjis_negis_coalesced	dense_dim
sparse_dimNr   r   r   __doc____annotations__r6   r<   r:   r  r    sY     00MLNOr<   r  c                  &    e Zd ZU dZded<   ded<   y)TensorMetadataAndValueszk
    TensorMetadata plus the elements as a list of raw values.
    Used for hashing inlined constants.
    r  tensor_metadata	List[Any]valuesNr!  r6   r<   r:   r%  r%    s    
 $#r<   r%  c                J   t        |       }| j                  |      sd}t        | j                  | j                  | j
                  t        j                  k(  r| j                         nd| j                  | j
                  || j                         | j                  | j                  | j                         | j                         | j                  r| j!                         nd| j                  r| j#                         nd| j                  r| j%                               S d      S )z1
    Extract the TensorMetadata of a tensor.
    )r  Nr6   F)r  r  r  ro   r  r  r  r  r  r  r  r  r  r   )r&   is_contiguousr  r  r  r  rR   stridedr  ro   r  r  r  r  r  	is_sparser  r  r   )tr  s     r:   extract_tensor_metadatar.    s     4I3KM???7ggggXX6qxxzBxxxx#'')oo^^		xxz)*Q^^%%#$;;!++-E%&[[1<<>  7< r<   c                    | S r5   r6   )xs    r:   _identr1    s    Hr<   c                *    t        |       }t        |ffS )zH
    See FxGraphCachePickler. Custom reducer to pickle FakeTensors.
    )r.  r1  r-  metadatas     r:   _reduce_fake_tensorr5    s     'q)HXK  r<   c                   t        |       }t        | j                        dk(  s3t        j                  j
                  j                  j                  |       r!t        t        || j                               ffS t        |ffS )zD
    See FxGraphCachePickler. Custom reducer to pickle Tensors.
    r   )r.  lenr  rR   	_inductorgraphr*   can_inline_constantr1  r%  tolistr3  s     r:   _reduce_tensorr<    se     'q)H
177|qEOO11??SSTUV0188:FHII$$r<   c                &    t         t        |       ffS )zD
    See FxGraphCachePickler. Custom reducer to pickle SymInts.
    )r1  r   )ss    r:   _reduce_symintr?    s     SVIr<   c                      e Zd ZdZej
                  j                         Zeeej                  j                  j                  <   eeej                  <   eeej                  <   edd       Zedd       Zy)FxGraphCachePicklera:  
    Custom pickler to customize the pickling of some objects (Tensors), only for the
    purpose of computing a hash for keying into the FxGraphCache. Tensors contain
    objects that don't pickle and/or vary between runs, and we want to capture the
    data that allow us to compute a stable, but safe hash.
    c                    t        j                         5 }t        |      }|j                  |        |j	                         cddd       S # 1 sw Y   yxY w)zA
        Pickle an object using the FxGraphCachePickler.
        N)ioBytesIOrA  dumpgetvalue)objstreampicklers      r:   r   zFxGraphCachePickler.dumps  s9    
 ZZ\V)&1GLL??$ \\s   ,AAc                B    t         j                  |       }t        |      S )zt
        Serialize an object using the FxGraphCachePickler and return a hash
        of the pickled object.
        )rA  r   r   )rG  serialized_datas     r:   r   zFxGraphCachePickler.get_hash   s     .33C8?++r<   Nr   r   )rG  r   r   r   )r   r   r   r"  copyregdispatch_tabler   r5  rR   _subclassesfake_tensor
FakeTensorr<  Tensorr?  SymIntr   r   r   r6   r<   r:   rA  rA    s{     ++002N?RN5$$00;;<#1N5<< #1N5<< % % , ,r<   rA  c                    t         j                  j                  t              } i }t	        j
                  | g      D ]d  }|j                  j                  |j                  d      }|J |j                  }|J t        |d      5 }|j                         ||<   ddd       f t        j                  t        j                  |            j!                         S # 1 sw Y   xY w)z
    Compute a hash of all inductor code modules. Used by the FxGraph cache
    so any inductor code changes would result in new cache keys.
    Nrb)rZ   r[   dirname__file__pkgutiliter_modulesmodule_finder	find_specr^   originr   readr|   r}   pickler   r   )inductor_rootcontentslibspecmoduler  s         r:   get_inductor_code_hashrd  
  s     GGOOH-M!#H##]O4  **388T:!!!&$1 vvxHV   5 >>&,,x0188::  s   CC"	c                      e Zd ZU dZded<   y)OrderedSetHolderzb
    See FxGraphHashDetails. Holds a sorted list to support stable hashing
    of set kwargs.
    r'  r   Nr!  r6   r<   r:   rf  rf    s    
 r<   rf  c                  2    e Zd ZdZdgZ	 	 	 	 	 	 ddZddZy)FxGraphHashDetailszz
    Object to capture all the details for a compiled FX graph relevant to computing
    a safe and stable cache key.
    graph_idc                   || _         || _        i | _        t        |      D ]\  }|| j                  vst        ||         t        u r%t        t        ||               | j                  |<   K||   | j                  |<   ^ t        j                  | _
        t        j                         | _        t        j                         | _        t#               | _        y r5   )gmexample_inputs	fx_kwargssortedEXCLUDED_KWARGStypesetrf  rR   rt   torch_versionrk   r   system_infor    save_configinductor_configrd  inductor_code_hash)r   rk  rl  rm  ks        r:   r   zFxGraphHashDetails.__init__1  s     , 	"A,,,	!%, )9	!9M(NDNN1%(1!DNN1% # #..$//1%113"8":r<   c                r   dd}g }t        |       j                         D ]  \  }}t        |t              rXt	        t        |            D ]@  }t        j                  ||         }|j                  d| d| d| d |||                 B ot        |t              rQ|j                         D ]=  \  }}t        j                  |      }|j                  d| d| d| d ||              ? t        j                  |      }|j                  d| d| d ||               dj                  |      S )z
        Get a printable string describing in more detail all the attributes
        comprising this object. Useful for debugging when one graph hashes
        to a different value than another.
        c                    t        | t        j                        rt        t	        |             S t        | t
              ryt        |       S )Nz<bytes>)r   rR   rR  r   r.  r   )rG  s    r:   get_strz-FxGraphHashDetails.debug_str.<locals>.get_strT  s7    #u||,23788C' 3xr<   [z] z]: : 
r   r   )varsr   r   listranger7  rA  r   appenddictr\   )	r   rz  linesattrrG  iihrw  vs	            r:   	debug_strzFxGraphHashDetails.debug_strM  s'   	  d))+ID##t$C/B+44SW=ALL1QCr$qCB8H7I!JK * C&IIKDAq+44Q7ALL1QCr$q3wqzl!CD ( (005q2dV2gcl^<= , yyr<   N)rk  torch.fx.GraphModulerl  List[torch.Tensor]rm  r   r~  )r   r   r   r"  ro  r   r  r6   r<   r:   rh  rh  (  s6     "lO; ; +; "	;8 r<   rh  c                    t        | ||      }dt        j                  |      z   }t        j	                  d||j                                |S )z=
    Generate a unique hash of the FX graph for caching.
    r  z*FX graph cache hash details for key %s:
%s)rh  rA  r   logdebugr  )rk  rl  rm  detailsr   s        r:   compiled_fx_graph_hashr  l  sI     !^Y?G #,,W5
5CII;S'BSBSBUVJr<   c                      e Zd ZdZedd       Zedd       Zedd       Zedd       Ze	 	 	 	 	 	 dd       Z	e	 	 	 	 	 	 dd       Z
e	 	 	 	 	 	 	 	 dd       Zed	        Zy
)FxGraphCachea7  
    Supports caching and reusing compiled Fx graphs.

    The overall strategy is as follows:
    - This cache stores entries on disk. When saving an entry, we can't
      serialize callables (that could be C++, Triton, etc.), so we serialize
      their own disk cache location. We then recreate the compiled artifact
      after fetching from disk.
    - For indexing the cache, we gather the fields relevant to identifying an
      FxGraph (the graph module, graph inputs, system settings etc.) into an
      FxGraphCacheDetails object, pickle it, and compute a hash for the key.
      See FxGraphCachePickler.
    - Among the metadata we store, we also include a guards expression that's
      appropriate for validating any symbols for Tensor arguments that have
      symbolic bounds. On cache lookup then, we evaluate those guards in the
      current context to validate that a cached entry can be served.
    - A given graph could have multiple compiled versions, corresponding to
      different sets of guards. Therefore, we store cache entries in the form:
          <temp dir>/<fx graph hash>/<serialized metatdata>
    - On lookup, we compute the key from the graph details, iterate over all
      leaf files in the corresponding subdirectory, deserialize the entry, and
      evaluate its guards expression. If the evaluation succeeds, we have a
      cache hit. If it fails, we compile the graph and store a new entry.
    - Finally, on a cache hit, we need to make sure any guards that would
      have been created during compilation are added to the current context.
    c                 R    t         j                  j                  t               d      S )zS
        Get the toplevel temporary directory for storing compiled graphs.
        fxgraph)rZ   r[   r\   r#   r6   r<   r:   _get_tmp_dirzFxGraphCache._get_tmp_dir  s    
 ww||IK33r<   c                n    t         j                  j                  t        j	                         | dd |       S )zA
        Return the disk location for a given cache key.
        r   r   )rZ   r[   r\   r  r  )r   s    r:   _get_tmp_dir_for_keyz!FxGraphCache._get_tmp_dir_for_key  s*    
 ww||L557Qq3GGr<   c                b    | D cg c]  }t        |t        j                        s|! c}S c c}w )z=
        Get the SymInt objects from the input list.
        )r   rR   rS  )r   r>  s     r:   _filter_symintszFxGraphCache._filter_symints  s)    
 "A6aZ5<<%@6AAAs   ,,c                 z    t         j                  j                  j                         j                  j
                  S )zG
        Helper to get the shape env from the tracing context.
        )rR   _guardsTracingContextr   	fake_mode	shape_envr6   r<   r:   _get_shape_envzFxGraphCache._get_shape_env  s)    
 }}++//1;;EEEr<   c                .   t         j                  |       }t        j                  j	                  |      syt        t        j                  |            D ]-  }t        t        j                  j                  ||      d      5 }t        j                  |      }ddd       j                  }|s|c S t         j                         }t         j                  |      }t        d |D              sJ |D 	cg c]  }	t        |	       }
}	t!        |j#                  ||
            }t$        j'                  d| ||
|       |st!        |j#                  ||            }|du sJ t$        j'                  d| |j(                         |c S  y# 1 sw Y   xY wc c}	w )z
        Lookup a compiled graph in the cache by key. On a hit, return the
        deserialized CompiledFxGraph object. On a miss, return None.
        NrU  c              3  2   K   | ]  }t        |        y wr5   )r'   )r   r>  s     r:   r   z-FxGraphCache._lookup_graph.<locals>.<genexpr>  s     4Gqx{Gs   zCfx graph cache key %s evaluating guards for %s with values %s => %sTz*fx graph cache key %s post-load guards: %s)r  r  rZ   r[   r   rn  listdirr   r\   r^  r   guards_exprr  r  r   r(   r   evaluate_guards_expressionr  r  guards)r   rl  r   r[   r  r9  r  r  symintsr>  hintsr   checks                r:   _lookup_graphzFxGraphCache._lookup_graph  sb    2237ww~~f% 2::f-.Dbggll640$71)/Q 8  ++K %335I"22>BG
 4G4444*12'QXa['E2y;;KOPCIIU YAA+wWX}$}		@$$
 G /J I 87  3s   F3FF	c                   t        |      }d|_        t        j                         }t        j	                  |      }|j                  |      |_        t        j                  |      }t        j                  |       }t        j                  j                  |      st        j                  |d       t        j                  j                  |t        |            }t!        ||       y)z=
        Store a serialized CompiledFxGraph on disk.
        NTrP   )r   compiled_artifactr  r  r  produce_guards_expressionr  r^  r   r  rZ   r[   r   r]   r\   r   r   )	r   compiled_graphrl  disk_compiled_graphr  r  r   r   r[   s	            r:   _save_graphzFxGraphCache._save_graph  s     #>204- !//1	..~>*3*M*Mg*V',,232237ww~~f%KK.
 ww||FK$89T7#r<   c                   ddl m} t        |||      }t               } |t        j
                  j                  ||dz         t              }|5  t        j                  ||      }|Lt        j                  d|       t        d   dxx   d	z  cc<    | ||fi |}t        j                  |||       n*t        j                  d
|       t        d   dxx   d	z  cc<   |cddd       S # 1 sw Y   yxY w)z
        Load a compiled graph from the cache. If a cached entry does not exist,
        compile the graph and save it to the cache.
        r   FileLock.locktimeoutNzfx graph cache miss for key %sinductorfxgraph_cache_missr   zfx graph cache hit for key %sfxgraph_cache_hit)filelockr  r  r   rZ   r[   r\   LOCK_TIMEOUTr  r  r  r  r   r  )	compile_fx_fnrk  rl  rm  r  r   r   lockr  s	            r:   r   zFxGraphCache.load  s     	&$RC>XsW}=|T)77^LN%		:C@$%9:a?:!.r>!OY!O((nnM		93?$%89Q>9! TTs   BC))C2c                 R    t        j                  t        j                                y)z.
        Clear out the on-disk cache.
        N)shutilrmtreer  r  r6   r<   r:   clearzFxGraphCache.clear*  s    
 	l//12r<   Nr~  )r   r   r   r   )r   r'  r   zList[torch.SymInt])r   r)   )r   r   rl  r  r   zOptional[CompiledFxGraph])r   r   r  CompiledFxGraphrl  r  )r  Callable[..., Any]rk  r  rl  r  rm  r   )r   r   r   r"  r   r  r  r  r  r  r  r   r  r6   r<   r:   r  r  |  s   : 4 4 H H B B F F 33*3 
#3 3j $$"1$CU$ $> ")" " +" "	" ": 3 3r<   r  c                  2   e Zd ZU dZdZded<   dZded<   dZded<   dZded<   dZ	d	ed
<    e
e      Zded<    e
e      Zded<    e
e      Zded<    e
e      Zded<    e
e      Zded<   dZded<   dZded<   dZded<   	 	 	 	 	 	 ddZddZddZy)r  zr
    Class holding a compiled FX graph. This is the object serialized on disk
    to support FxGraph caching.
    NOptional[Callable[..., Any]]r  current_callableOptional[str]	cache_keyartifact_pathOptional[List[Tuple[int, str]]]cache_linemap)default_factoryzSet[str]device_typeszSet[int]device_idxsmutated_inputsmutated_input_idxszDict[str, torch.Tensor]	constantsz)Optional[List[Optional[Tuple[int, ...]]]]output_stridesr  zOptional[bool]_boxed_callc                P   || _         |j                  | _        |j                  | _        |j                  | _        |j
                  | _        |j                  | _        |j                  | _        t        |j                        | _	        |j                  | _
        || _        d | _        y r5   )r  r  
cache_pathr  r  r  r  r  rq  r  r  r  r  )r   r  r9  r  s       r:   r   zCompiledFxGraph.__init__M  s     "3"--"00!.. ,,#22"%e&>&>"?,r<   c                .     | j                         |      S r5   )get_current_callable)r   r   s     r:   __call__zCompiledFxGraph.__call___  s    *t((*622r<   c                    | j                   -t        j                  t        t	        j
                  |             S | j                   S r5   )r  r   r   _run_from_cacheweakrefproxyr   s    r:   r  z$CompiledFxGraph.get_current_callableb  s8      ( $$_gmmD6IJJ(((r<   )r  r  r9  r*   r  zList[Optional[Tuple[int, ...]]])r   r'  r   r   )r   r  )r   r   r   r"  r  r#  r  r  r  r  r   rq  r  r  r  r  r  r  r  r  r  r   r  r  r6   r<   r:   r  r  2  s    
 7;3:5929#I}##'M='59M29"37L(7!#6K6$S9NH9#(#==).t)DI&D@DN=D "&K%"&K& 7    8	 $3)r<   r  c                   | j                   mddlm} | j                  sJ | j                  sJ |j                  | j                  | j                  | j                  | j                        j                  | _         | j                  |      S )Nr   )PyCodeCache)	r  	codecacher  r  r  load_by_key_pathr  r  call)r  r   r  s      r:   r  r  k  s    
 ''/*''''+++++6+G+G$$(((($$	,

 $ 	( ++F33r<   c                 V   t        j                         rt        j                         S t	        t         j
                  j                  t        t        f      r.t        t         j
                  j                        } t        |       S t         j
                  j                  f} t        |       S r5   )
r    	is_fbcoder.   ccr   cppcxxr  tuplecpp_compiler_search)searchs    r:   cpp_compilerr    sm    ~~&**..4-0vzz~~& v&& **.."v&&r<   r   c                   | D ]  }	 |{t         j                  dk7  rt        j                  d      s0ddlm} t               } |t        j                  j                  |d      t              }|5  t               }d d d        t        j                  |dg       |c S  t!        j"                         # 1 sw Y   9xY w# t        j                  t        t        f$ r Y w xY w)NlinuxTORCH_INDUCTOR_INSTALL_GXXr   r  zg++.lockr  	--version)rV   platformrZ   getenvr  r  r   r[   r\   r  install_gcc_via_conda
subprocesscheck_outputSubprocessErrorFileNotFoundErrorImportErrorr!   InvalidCxxCompiler)r  r  r  r   r  s        r:   r  r    s    	{ <<7*yy!=>-'>GGLL:6 /1C ##S+$67J' , 
 
 
"" T **,={K 		s3   C C >C 1B4< C 4B=	9C  C! C!c            
        t         j                  j                  t               d      } t         j                  j                  | dd      }t         j                  j	                  |      s~t
        j                  d       t         j                  j                  dd      }|t        j                  d      }|0t        j                  |dd|  d	d
dddgt        j                         |S )z>On older systems, this is a quick way to get a modern compilergccbinzg++zDownloading GCC via conda	CONDA_EXEcondacreatez	--prefix=z--channel=conda-forgez--quietz-yz
python=3.8gxx)stdout)rZ   r[   r\   r#   r   r  infoenvironr   r  whichr  
check_callPIPE)prefixcxx_pathr  s      r:   r  r    s    WW\\)+u-Fww||FE51H77>>(#,-

{G4=LL)E!!x(+ 	 " Or<   c                 P    t        t        j                  dt                           S )Nz(gcc|g\+\+)r   rer  r  r6   r<   r:   is_gccr	    s    		.,.9::r<   c                 P    t        t        j                  dt                           S )Nz(clang|clang\+\+)r  r6   r<   r:   is_clangr    s    		.?@@r<   c                     t               } t        j                  | dg      j                  d      }d|j	                         d   v S )Nr  utf8Appler   )r  r  r  r   
splitlines)r  version_strings     r:   is_apple_clangr    sB    
.C,,c;-?@GGONn//1!444r<   c                      e Zd ZU ded<   ded<   ded<   ded<   dZd	Zdd
Zej                  fddZ	ddZ
ddZddZ ej                  d      dd       Zy)VecISAr  
_bit_widthr   _macro_arch_flagszDict[torch.dtype, int]_dtype_nelementsa~  
#if defined(CPU_CAPABILITY_AVX512) || defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_ZVECTOR)
#include <ATen/cpu/vec/functional.h>
#include <ATen/cpu/vec/vec.h>
#endif

__attribute__((aligned(64))) float in_out_ptr0[16] = {0.0};

extern "C" void __avx_chk_kernel() {
    auto tmp0 = at::vec::Vectorized<float>(1);
    auto tmp1 = tmp0.exp();
    tmp1.store(in_out_ptr0);
}
zG
import torch
from ctypes import cdll
cdll.LoadLibrary("__lib_path__")
c                    | j                   S r5   )r  r   s    r:   	bit_widthzVecISA.bit_width  s    r<   c                     | j                   |   S r5   )r  )r   r  s     r:   	nelementszVecISA.nelements  s    $$U++r<   c                    | j                   S r5   )r  r   s    r:   build_macrozVecISA.build_macro  s    {{r<   c                    | j                   S r5   )r  r   s    r:   build_arch_flagszVecISA.build_arch_flags   s    r<   c                *    t        t        |             S r5   )rs   r   r   s    r:   __hash__zVecISA.__hash__  s    CIr<   Nc           
     6   t         j                  j                  t         j                  j                  S t        j                         ryt	        t
        j                  d      \  }}ddlm} t               } |t        j                  j                  ||dz         t              }|5  |d d dz   }t        j                  t!        ||d	| 
            }	 t#        |||       t%        j&                  t(        j*                  dt
        j,                  j/                  d|      gt$        j0                  i t        j2                  ddj                  t(        j                        i       	 d d d        y# t4        $ r}Y d }~d d d        y	d }~ww xY w# 1 sw Y   y xY w)NTr  r   r  r  r  soF)warning_allvec_isa-c__lib_path__
PYTHONPATH:stderrenv)r    r  
vec_isa_okr  r  r  	_avx_coder  r  r   rZ   r[   r\   r  shlexsplitcpp_compile_commandcompile_filer  r  rV   
executable_avx_py_loadrU   DEVNULLr   	Exception)	r   r   
input_pathr  r   r  output_path	build_cmdr   s	            r:   __bool__zVecISA.__bool__  sA   ::  ,::((( 0 0%8Z%>XsW}=|T$Sb/D0K#I
Zi@%%++33NKP
 &--H2::H|SXXchh5GH - T&  ) T&' Ts1   (+FBE1'F1	F:FFFF)r   r  )r  r  r   r  r~  r   )r   r   r   r#  r/  r5  r  rR   floatr  r  r  r!  r   r   r;  r6   r<   r:   r  r    si    OK,,"IL .3[[ ,  Y" "r<   r  c                      e Zd ZU dZdZdZej                  dej                  dej                  diZ
d
dZej                  Zded<   y	)	VecAVX512i   z-DCPU_CAPABILITY_AVX512z0-mavx512f -mavx512dq -mavx512vl -mavx512bw -mfma       c                     y)Navx512r6   r   s    r:   __str__zVecAVX512.__str__3  s    r<   Callable[[VecISA], Any]r!  Nr~  r   r   r   r  r  r  rR   r<  bfloat16float16r  rC  r  r!  r#  r6   r<   r:   r>  r>  ,  sF    J&FDKRU]]BO )/H%7r<   r>  c                      e Zd ZU dZdZdZej                  dej                  dej                  diZ
d
dZej                  Zded<   y	)VecAVX2   z-DCPU_CAPABILITY_AVX2z-mavx2 -mfma   r?  c                     y)Navx2r6   r   s    r:   rC  zVecAVX2.__str__@  s    r<   rD  r!  Nr~  rE  r6   r<   r:   rI  rI  9  sF    J$F KQEMM2N )/H%7r<   rI  c                      e Zd ZU dZdZdZej                  dej                  dej                  diZ
d
dZej                  Zded<   y	)
VecZVECTORrJ  zO-DCPU_CAPABILITY_ZVECTOR -DCPU_CAPABILITY=ZVECTOR -DHAVE_ZVECTOR_CPU_DEFINITIONz-mvx -mzvectorrK  r?  c                     y)Nzvectorr6   r   s    r:   rC  zVecZVECTOR.__str__M  s    r<   rD  r!  Nr~  rE  r6   r<   r:   rO  rO  F  sF    J^F"KQEMM2N )/H%7r<   rO  c                  P    e Zd ZU dZdZdZi ZddZd	dZe	j                  Z
ded<   y)
InvalidVecISAr   rM   c                     y)NINVALID_VEC_ISAr6   r   s    r:   rC  zInvalidVecISA.__str__Y  s     r<   c                     yr@   r6   r   s    r:   r;  zInvalidVecISA.__bool__\  rA   r<   rD  r!  Nr~  r   )r   r   r   r  r  r  r  rC  r;  r  r!  r#  r6   r<   r:   rS  rS  S  s2    JFK! )/H%7r<   rS  c                 4   t         j                  dk7  rg S t        j                         dk(  rt               gS g } t	        d      5 }|j                         }t        D ]$  }t        |      |v s|s| j                  |       & | cd d d        S # 1 sw Y   y xY w)Nr  s390xz/proc/cpuinfo)	rV   r  machinerO  r   r]  supported_vec_isa_listr   r  )isa_list	_cpu_info_cpu_info_contentisas       r:   valid_vec_isa_listr_  i  s    
||w	W$~H	o	)%NN,)C3x,,$ *  
		s   'B-B0BBc                     t        j                         r
t               S t               } | st        S t         j
                  j                  	| sJ | d   S | D ]1  }t         j
                  j                  |j                         k(  s/|c S  t        S Nr   )r    r  rI  r_  invalid_vec_isar  simdlenr  )_valid_vec_isa_listr^  s     r:   pick_vec_isare  z  s}    y(:(< zz!""""1%%"::0J # r<   Tc                    | rdS dS )Nr'  rM   r6   )compile_onlys    r:   get_compile_onlyrh    s    4'R'r<   c                    | rdS dS )Nz-shared -fPICrM   r6   )shareds    r:   
get_sharedrk    s    $?,",r<   c                    | rdS dS )Nz-WallrM   r6   )r%  s    r:   get_warning_all_flagrm    s    !7)r)r<   c                 `    dt        t        t        j                  j                              z   S )Nz-D_GLIBCXX_USE_CXX11_ABI=)r   r  rR   _C_GLIBCXX_USE_CXX11_ABIr6   r<   r:   get_glibcxx_abi_build_flagsrq    s!    &S1P1P-Q)RRRr<   c                 b    g d} t               r| j                  d       dj                  |       S )N)
-std=c++17z-Wno-unused-variablez-Wno-unknown-pragmasz%-Werror=ignored-optimization-argument )r  r  r\   )flagss    r:   	cpp_flagsrv    s'    JEz<=88E?r<   c                      y)Nz-DTORCH_INDUCTOR_CPP_WRAPPERr6   r6   r<   r:   cpp_wrapper_flagsrx    s    )r<   c                 ^   t         j                  j                  rdnd} | dz  } t         j                  j                  s| dz  } t        j
                         r| S t        j                  dk(  r| dz  } n"t        j                         dk(  r| dz  } n| d	z  } t        j
                         s| d
z  } | S )Nz-O0 -gz-O3 -DNDEBUGz" -ffast-math -fno-finite-math-onlyz -fno-unsafe-math-optimizationsdarwinz -Xclangppc64lez -mcpu=nativez -march=nativez	 -fopenmp)	r    aot_inductordebug_compiler  enable_unsafe_math_opt_flagr  rV   r  rY  )
base_flagss    r:   optimization_flagsr    s    #00>>NJ66J::1177
 
||x 	j 
*/)J**J k!
r<   c                      y)Nz$-D C10_USING_CUSTOM_GENERATED_MACROSr6   r6   r<   r:   use_custom_generated_macrosr    s    1r<   c                     t        j                         r-t        j                         } dj	                  d      }d|  d| S y)Nrt  )z-D C10_USE_GLOGz-D C10_USE_MINIMAL_GLOGz'-D C10_DISABLE_TENSORIMPL_EXTENSIBILITYz-Wp,-fopenmp rM   )r    r  r.   
openmp_libr\   )r  preprocessor_flagss     r:   use_fb_internal_macrosr    sJ     ++-
 XX
 zl!,>+?@@r<   c                 .    t        j                         ryy)Nz	-nostdincrM   )r    r  r6   r<   r:   use_standard_sys_dir_headersr    s    r<   c                     	 d} t        j                  | j                               j                  d      }t	        t        j                  |            dkD  S # t         j                  $ r Y yw xY w)Nzconda list llvm-openmp --jsonr  r   F)r  r  r1  r   r7  r~   loadsr  )commandoutputs     r:   is_conda_llvm_openmp_installedr    s^    1((9@@H4::f%&**%% s   AA A-,A-c                    	 t        j                  ddg       t        j                  g d      j                  d      j                         } t        j
                  j                  |       }|| fS # t         j                  $ r Y yw xY w)Nr  brew)r  z--prefixlibompr  )FrM   )r  r  r   r  rZ   r[   r   r  )libomp_pathomp_availables     r:   homebrew_libompr    sw    & 12
 ##$BCVF^UW 	 {3k))%% s   A-A0 0BBFc                   t        j                         rfdt        j                  vrTdt        j                  vrBt        j                  j                  t        j                               t        j                  d<   ddlm	} d}d}t        j                  dk(  r| s&|t        k7  s|st         j                  j                  r_|j                  |      t!        j"                  d      gz   }|j%                  |      t!        j&                  d      gz   }g }	t        j                         s|	d	d
gz  }	|	dgz  }	|s|	dgz  }	n|	dgz  }	|r|t        j                  j                  t)                     gz  }|rt+        |      D ]  \  }
}|j-                  t        j                  d         s)t        j                  j/                  | d      rLt        j0                  |      D ]a  \  }}}d|v st        j                  j3                  ||      ||
<   |j5                  t        j                  j3                  ||
   d                |j7                         }|rct        j                         rO|t        k7  rFt9        |      j;                         }dj3                  |j=                         d| d| d| dg      }|r|r	|d}|dz  }|rDt>        j@                  jB                  |	ddgz  }	n"t        j                         r|	dgz  }	n|	g dz  }	|j=                         }n|j                  |      t!        j"                  d      gz   }|r+|t        j                  j                  t)                     gz  }g }t        j                  dk(  rtE                }t        jF                  d      t        j                  j3                  t        jF                  d      dd      }t        j                  j/                  |      }|r|j5                  t        j                  j3                  t        jF                  d      d             |j5                  t        j                  j3                  t        jF                  d      d             ntI        jJ                  d       |xs |}|rg ndg}	|st        jF                  d       tM               }|rt        j                  j3                  t        jF                  d       d      }|j5                  t        j                  j3                  t        jF                  d       d             |j5                  |       t        jN                         jP                  d!k(  r@t        j                  j/                  t        j                  j3                  |d"            rd#g}	|stS               \  }}|ry|j5                  t        j                  j3                  |d             |j5                  t        j                  j3                  |d             nt        j                         rdgndg}	t         jT                  jV                  s|	d$gz  }	||jX                  gz  }t        j                         rL|j5                  t        jZ                                |j5                  t        j\                                |j5                  t        j^                                |j5                  t        j`                                |j5                  t        jb                                |j5                  t        jd                                |j5                  t        jf                                |j5                  t        jh                                |j5                  t        j                                |j5                  d       g }|r|rt        j                         rg d%}dj3                  |D cg c]  }d&|z   	 c}      }dj3                  ||	D cg c]  }d'|z   	 c}z         }|||||fS c c}w c c}w )(N	CUDA_HOME	CUDA_PATHr   cpp_extensionrM   r  includeLIBDIRrR   	torch_cpugomptorch_pythonompz/libcudart_static.azlibcudart_static.astubsrt  z-D CPU_CAPABILITY=z-D CPU_CAPABILITY_z-D HAVE__CPU_DEFINITIONz -D USE_CUDAc10_hip	torch_hiprT   )c10_cudarT   
torch_cudarz  
OMP_PREFIXzomp.hra  z-environment variable `OMP_PREFIX` is invalid.CONDA_PREFIXx86_64zlibiomp5.dylibiomp5c10)z-Wl,-Bstaticz-lcudart_staticz-Wl,-Bdynamic-Lz-l)5r    r  rZ   r   r[   rV  r.   rT   torch.utilsr  rV   r  rb  r  enable_kernel_profileinclude_paths	sysconfigr   library_pathsget_config_varcpp_prefix_path	enumerate
startswithr   walkr\   r  r  r   upperr  rR   rS   rh   r  r  warningswarnr  unamerY  r  r|  abi_compatibleTORCH_LIB_PATHsleefopenmp
cc_includelibgcclibgcc_archlibgcc_backwardglibclinux_kernel)include_pytorchr&  rT   aot_moder  macrosr  ipathslpathslibsir[   rootdirsfilescapr  header_path	valid_envconda_lib_pathr  static_link_libsp
lpaths_strlibs_strs                            r:   get_include_and_linking_pathsr    sh    	rzz)rzz)"$''//+2B2B2D"E

;)F
||wo%::++
 ,,T2i6H6H6S5TT,,T2$$X.6
 
  !Wk**DVHD(( UGOD277???+<=>> $-V#44??JJ{3"$''..D69L1M"N57WWT] 1dE#75#@02T40HF1I$*MM"'',,vay'2R$S$)	 6C	 $5 $$&!g&@'l((*002,SE2,SE2"3%7	 ~n$F}}  ,K00##%VH$D>>D"335 ,,T2i6H6H6S5TTrww'89::F<<8# . 00M yy&2 ggll299\+BIwWGGNN;7	MM"'',,ryy/F	"RSMM"'',,ryy/F"NOMM"QR - :&2UGD !RYY~%>%J > @ %'WW\\"))N2KU%SNMM"'',,ryy/H)"TUMM.1xxz))X5"''..^5EF; !(y !-<->*{ MM"'',,{I"FGMM"'',,{E"BC
 %..0E7vhD --=//00 k'')*k((*+k,,./k((*+k--/0k1134k'')*k..01k&&() 	i DV--/OV4V4!8V45Jxx(d+CdD1Hd+CCDH:x1AAA 5+Cs   :`9`>
c
           
     
   t        ||||      \  }
}}}}t        | t              r| g} dj                  |
D cg c]  }d|z   	 c}      }d}t	        j
                         r|r|	s| }|}nK| D cg c]!  }t        j                  j                  |      # }}t        j                  j                  |      }t               sJ |dz  }|dz  }dt        j                         z   }|dt        j                         z   z  }n| }|}d}dj                  |      }t        j                  dddj                  g d	t                d| dt        |       d	t!        |       dt#                d	t%                d	| d| d| d| d	| d| d| d	t'                d	t)                d	t+                d	t-                d	t/        |       d
| d            j1                         S c c}w c c}w )Nrt  -IrM   z --rtlib=compiler-rtz -fuse-ld=lldz-Bz -Lz[ \n]+z
            z
            -o z	
        )r  r   r   r\   r    r  rZ   r[   r   r  r.   	glibc_libr  subr  rk  rm  rv  rq  r  r  r  r  rh  r  )inputr  r%  rj  r  r&  rT   r  rg  use_absolute_pathr  r  r  r  r  r  
ipaths_strclang_flagsinp_nameout_namer  linker_pathsinp_name_strs                          r:   r2  r2    s    6S$62FFD&"2 %V4V4!8V45JK-HH 6;;U((+UH;ww''/Hzz--&k3355 5 5 77788H%L66	 	  	^		*^	+,	-7-?,@	A	!+./	/0	1:	>	 )*+	,	 L		 		 !		 "#		 $(&		 )*		 +;);		<		
 H	
 	
 #^	
 $%	
 &1M	
2	  !"	#	 )*+	,	 $%&	'	 *+,	-	 l+,	-	 z			  eg!+ 5 <s   G;-&H c                    t        j                  |       } 	 t        j                  |        y # t        j                  $ r&}t        j                  | |j                        |d }~ww xY wr5   )r0  r1  r  r  CalledProcessErrorr!   CppCompileErrorr  )cmdr   s     r:   run_command_and_checkr    sR    
++c
C8c"(( 8!!#qxx0a78s   - A& !A!!A&c                j    | j                  d      rt        j                  j                  |       S | dfS )zDReturns the path where the AOT Inductor compiled kernels are stored..sorM   )endswithrZ   r[   r1  )r[   s    r:   split_aot_inductor_output_pathr    s,     }}Uww}}T""Rxr<   c                  n    e Zd ZU  e       Zded<    eej                        Zedd       Z	edd       Z
y)CudaKernelParamCachezDict[str, Dict[str, str]]r   c                    t         j                  j                  dnd}t        |||t	        t
        j                  j                        d         \  }}||t               <   || j                  |<   y )Nr   r   r   )r   r   )
rR   rS   rh   r  r  r    r|  r9  ri   r   )clsr   paramsr   bin_typerO   r[   s          r:   rq  zCudaKernelParamCache.set  sk    #mm//77W8##//	
4 59.01		#r<   c                :    | j                   j                  |d       S r5   )r   r   )r  r   s     r:   r   zCudaKernelParamCache.get  s    yy}}S$''r<   N)r   r   r  Dict[str, str]r   r   r   r   )r   r   r   zOptional[Dict[str, str]])r   r   r   r  r   r#  r   r  classmethodrq  r   r6   r<   r:   r  r    sC    '+vE$-%E    ( (r<   r  c                  p    e Zd ZU  e       Zded<    eej                        Ze	 	 	 	 	 	 	 	 	 	 dd       Z	y)AotCodeCacher  r   c           
        t               }t        t        dd|||j                              }d}d}t	        j
                         rPt        j                         }	|s%|j                  rt        j                         }
d}d}nt        j                         }
nd}	d}
t        t        j                  j                        \  }}t        |d||	      \  }}|| j                  vsd|r/t        j                   j#                  | j                  |         |k7  s3|rt        j                   j%                  | j                  |         |k7  rnd
dlm} t+               } |t        j                   j-                  ||dz         t.              }|5  t	        j
                         rM|rKt        j                   j1                  |      d
   dz   }t3        |d      5 }|j                  |       d d d        |rt        j                  j                  n$t        j                   j1                  |      d
   dz   }t        j                   j5                  |      s=t        j                   j1                  |      d
   dz   }t        |||||j                  d|      }t6        j9                  d|       |r2t;        |||j=                                t        j>                  |d       ntA        |       d,ddj-                  fd|jB                  jE                         D              }t        |d|      \  }}t        j                   j1                  |      d
   dz   }|rv|	 dt        j                   j%                  |       dt        j                   j%                  |       }t;        |||j=                                t        j>                  |d       n|	 d| d| }tA        |       t6        j9                  d|       |
 d| d| }t6        j9                  d|       tA        |       d| }t6        j9                  d |       tA        |       |r5tG        jH                  d!d"t        j                   j%                  |            }ntG        jH                  d!d"|      }g }|jK                  |
 d#| d$|        |jK                  |
 d#| d%|        |jK                  |
 d#| d&|        t6        j9                  d'dj-                  |             |D ]  }tA        |        t        ||g||||j                  |(      }t6        j9                  d)|       |r4t;        ||g||j=                                t        j>                  |d*       n"tA        |       nt6        j9                  d+|       || j                  |<   d d d        | j                  |   S # 1 sw Y   xY w# 1 sw Y   %xY w)-Nr  o)r&  rT   r  FTldobjcopyr  )r   r   r   r  r  r  z.jsonr  r  z.o)r  r  r&  rT   r  rg  r  zaot compilation command: %si  c                ,   dd l }| j                         dk(  ry| j                         j                         }|j	                  |j                         |j                  |j                  |j                         z              }t        |j                        S )Nr   r<   )ctypesnumeluntyped_storagerJ   castdata_ptrPOINTERc_ubytenbytesr   r`  )r-  r  t_cpu	raw_arrays       r:   	_to_bytesz'AotCodeCache.compile.<locals>._to_bytesX  st     &779>#& ! 1 1 3 7 7 9$*KK!NN,"NN6>>ELLN+JK%	
  %Y%7%788r<   r<   c              3  .   K   | ]  } |        y wr5   r6   )r   tensorr  s     r:   r   z'AotCodeCache.compile.<locals>.<genexpr>h  s      -8Pf	&)8Ps   r  )r   z -r -b binary -o rt  zaot constant binary command: %szC --rename-section .data=.lrodata,alloc,load,readonly,data,contents zaot constant obj command: %szrm z$aot constant bin removal command: %sz[\W]rO   z --redefine-sym _binary_z#_start=_binary_constants_bin_start z!_size=_binary_constants_bin_size z_end=_binary_constants_bin_end z'aot constant binary redefine symbol: %s)r  r  r&  rT   r  r  zaot linkage command: %si  z.aot_inductor dynamic library already exist: %s)r-  torch.Tensorr   r   )&re  r   r2  r  r    r  r.   r  objcopy_fallbackr  r  r|  r9  r  r   rZ   r[   rV  r   r  r  r   r\   r  splitextr   r   r  r  r3  r1  chmodr  r  r(  r  r  r  )r  r9  source_codeserialized_extern_kernel_nodesrT   picked_vec_isacpp_commandfbcode_aot_cpu_rer  
ld_commandobjcopy_commandspecified_output_pathspecified_so_namer   r8  r  r   r  output_jsonr  	output_sooutput_or  aot_constants
consts_keyconsts_pathconsts_obodysymbol_listr  s                                @r:   compilezAotCodeCache.compile  sC    &S.tenn

 "!$)JENN"-">">"@$(!$(!"-"5"5"7J'O
 +6+>+>+J+JK	
!/	
Z cii!		#/3HH   304EE)#~HBGGLL3=A<XD ##%*H"$''"2"2:">q"AG"KKk3/1 >? 0
 ) ''33))*5a85@  ww~~i0!ww//
;A>EH-(' .!!&%)*;C II;SA($Z399;G51-c29  %(HH -8=8N8N8P- %M /4%&;/+J  "ww//<Q?$FH(!+,=bgg>N>Nx>X=YYZ[][b[b[k[klw[xZyz$[(CIIKH51!+,=hZqV-c2II?E ++ ,$:Qxj2 
 II<cB)#.}-CIIDcJ)#.(!vvgsBGG4D4D[4QR!vvgsK@"$K&&*++CD6Ilmulvw  &&*++CD6Ijksjtu  &&*++CD6Ihiqhrs IIA388KCX  +-c2  + .'2( .!!&*;C II7=($h%99ciikRE2-c2IIH) "+		#q t yy~k 0/ s&   AWW OWW	WWN)
r9  r*   r  r   r  r  rT   r   r   r   )
r   r   r   r  r   r#  r   r  r  r  r6   r<   r:   r  r    se     FE>"%Ejj j )6	j
 j 
j jr<   r  c                     t        t              j                  dz  } | j                         5 }|j	                         }t        |d      \  }}d d d        |S # 1 sw Y   S xY w)Nzcodegen/cpp_prefix.hr  )r   rW  r   r   r]  r  )r[   r  r   rO   filenames        r:   r  r    sZ    >  #99D	&&(
8 
 O 
 Os    AA!c                     t               } t        j                         r#dt        j                  j                  |        dS d|  dS )Nz
#include "")r  r    r  rZ   r[   r   )r   s    r:   
cpp_prefixr#    sF     H BGG,,X67q99H:Q''r<   c           	        t        | t              r| gn| }|D cg c]7  }t        j                         rt        j
                  j                  |      n|9 }}	 t        j                         rt               }t        j
                  j                  |      }t        j
                  j                  |      }t        j
                  j                  t        j                  j                  j                  d      }	t        j                         5 }
t        j                   |t        j
                  j                  |
|             t#        ||      D ]9  \  }}t        j                   |t        j
                  j                  |
|             ; t        j
                  j                  |
d      }t        j$                  |	|       t'        ||
|      }t        j
                  j)                  |      rt	        j*                  |       t        j                   ||       d d d        y t-        j.                  |t,        j0                         y c c}w # 1 sw Y   y xY w# t,        j2                  $ r]}|j4                  j7                  d      }d|v xs d|v }|rt8        j:                  dk(  rd}||z  }t=        j>                  ||      |d }~ww xY w)Nr  )r,  rr   z'omp.h' file not foundr  rz  a  

OpenMP support not found. Please try one of the following solutions:
(1) Set the `CXX` environment variable to a compiler other than Apple clang++/g++ that has builtin OpenMP support;
(2) install OpenMP via conda: `conda install llvm-openmp`;
(3) install libomp via brew: `brew install libomp`;
(4) manually setup OpenMP and set the `OMP_PREFIX` environment variable to point to a path with `include/omp.h` under it.) r   r   r    r  rZ   r[   r   r  r\   rR   utilsr  _TORCH_PATHtempfileTemporaryDirectoryr  r   zipcopytreer/   r   remover  r  STDOUTr  r  r   rV   r  r!   r  )r8  r9  r  input_pathsipinput_filesr  header_nameoutput_nametorch_includes_pathtmp_dirr  r  dest_include_pathoutput_file_pathr   r  openmp_probleminstructions                      r:   r3  r3    s    #-Z"=:,:KEPEPr 0 0 2:[  )6)+K''**;7K''**;7K #%'',,))55y# ,,.'Kg{)KL[9DAqKK277<<#;< :$&GGLL)$D! 35FG#5c7K#P 77>>+.IIk*,k: /. ##C
0A0AB; /. (( 6)1V;Qx6?Qcllh62  k!F!!#v.A56sD   <IB3I D
I
I %I 
II I K)AKKzOptional[CDLL]_libgompc                  n    e Zd ZU  e       Zded<    eej                        Zedd       Ze	dd       Z
y)CppCodeCacheDict[str, CDLL]r   c           	        	 t        j                  |       S # t        $ r}dt        |      v rNt        j
                  j                  d      r/t        j                  d      at        j                  |       cY d }~S dt        |      v r9t        | dt        j                          dt        j                          d      | d }~ww xY w)Nr  z/usr/lib64/libgomp.so.1z(failed to map segment from shared objectz3.  The most common reason this may occur is if the zl folder is mounted with noexec (e.g., by default Docker mounts tmp file systems as noexec).  Please remount zi with exec enabled, or set another temporary directory with TORCHINDUCTOR_CACHE_DIR environment variable.)
r
   LoadLibraryOSErrorr   rZ   r[   r   r8  r'  
gettempdir)r[   r   s     r:   _load_libraryzCppCodeCache._load_library  s    	##D)) 	QBGGNN3L$M  ++,EF''--9SVCcLXM`M`MbLc d33;3F3F3H2I J]]
  	s"    	CAC5C;ACCc                Z   t               }t        t        dd|            }t        |d|      \  }}|| j                  vrddlm} t               } |t        j                  j                  ||dz         t        	      }|5  |d d
 dz   }	t        j                  j                  |	      s.t        j                  t        ||	|            }
t        ||	|
       | j!                  |	      | j                  |<   || j                  |   _        d d d        | j                  |   S # 1 sw Y   xY w)Nr  r  )r&  r  r   r   r  r  r  r#  r$  )r  r  r&  )re  r   r2  r  r   r  r  r   rZ   r[   r\   r  r   r0  r1  r3  r@  r   )r  r  r  r  r   r8  r  r   r  r9  r  s              r:   r   zCppCodeCache.load"  s    %.sCPQU+FZcii)#~HBGGLL3=A<XD("o4ww~~k2+++",[.C
 ![#>!$!2!2;!?		#%(		#"  yy~ s   BD!!D*N)r[   r   r   r   )r  r   r   r   )r   r   r   r  r   r#  r   r  r@  r  r   r6   r<   r:   r:  r:    sB    !VE?#%E $  r<   r:  c                     e Zd ZU  e       Zded<    e       Zded<    eej                        Ze	d
dd       Z
e		 	 	 d	 	 	 	 	 	 	 	 	 dd       Ze		 	 d	 	 	 	 	 	 	 	 	 dd       Ze	 ej                  d      	 	 	 	 	 	 dd	              Zy)r  zDict[str, ModuleType]r   z Dict[str, List[Tuple[Any, ...]]]linemapsc                    t        |d|      S NrN   rB  )r  )r  r  r   s      r:   r  zPyCodeCache.write@  s    [$e44r<   Nc                L    t        |d|      \  }}| j                  ||||      S rF  )r  r  )r  r  r   linemapattrsr   r[   s          r:   r   zPyCodeCache.loadD  s-     +t59	T##Cw>>r<   c           
        |g }|| j                   vrt        |      5 }	 t        |j                         |d      }t        t         d|       }||_	        ||_
        t        ||j                  |j                         |t        j                  |j                  <   | j                   j                  ||       t!        t#        |       | j$                  |<   |%|j'                         D ]  \  }	}
t)        ||	|
        d d d        | j                   |   S # t        $ r-}t        d| dt        |      j                   d|       d d }~ww xY w# 1 sw Y   QxY w)NexeczFailed to import r}  r|  rL   )r   r   r  r]  r7  r{   rp  r   r   rW  r   rK  __dict__rV   modulesr   r  r)  rD  r   setattr)r  r   r[   rH  rI  r  r   r   modrw  r  s              r:   r  zPyCodeCache.load_by_key_pathO  s8    ?Gciidq "1668T6:D
 !H:Qse!45#T3<<6,/CLL)		$$S#.%)#w-%8T"$ %1Q* !.% * yy~% !  &+D6DG4D4D3ERsK   s.   EDCE	E	(EE		EEc                    || j                   vry | j                   |   \  }}t        ||      }|dk(  ry ||dz
     }|sy dd} ||      S )Nr   r   c           	         d}t        j                  ||       }t        |      D cg c]  \  }}}|t        |      |d c}}}S c c}}}w )Nz"File "(.+)", line (\d+), in (.+)\n)r   liner^   )r  findallreversedr  )stack_traceregexmatchesr  lns         r:   parse_stack_tracez<PyCodeCache.stack_frames_for_code.<locals>.parse_stack_trace  sV     :Ejj4G  (00GAq! A:0  s   A)rU  r   r   zList[Dict[str, Any]])rD  r   )r  r[   linenor  nodesr  entryrZ  s           r:   stack_frames_for_codez!PyCodeCache.stack_frames_for_codeq  s`    
 s||#||D)u'6a!e	 !''r<   rM   )r  r   r   r   r   Tuple[str, str])rM   NN)
r  r   r   r   rH  r  rI  r   r   r   )NN)
r   r   r[   r   rH  r  rI  r   r   r   )r[   r   r[  r  r   zOptional[List[Dict[str, Any]]])r   r   r   r  r   r#  rD  r   r  r  r  r   r  r   r   r^  r6   r<   r:   r  r  ;  s   #'6E )15H.7%E5 5  37*.?? ? 1	?
 (? 
? ? 
 48*.  1	
 ( 
 B Y(( #(	'(  (r<   r  c                  \    e Zd ZU  e       Zded<    eej                        Zedd       Z	y)CppWrapperCodeCacher;  r   c                   d| }t        |      }t        j                  j                  |      st        j                  |       d}t        j                  j                  || d|       }t        j                  d|       || j                  vrt        j                  d|       ddl	m
}	 t               }
 |	t        j                  j                  |
|dz         t        	      }|5  t        j                  j                  |      st        j                  d
|       t               }t               }t               }t!               }t#        t%               |      \  }}}}}t'               }t)               }| d| d| d| d| d| d| }| d| d| d}t*        j,                  j.                  j1                  |||g|g|g|g|d      }t        j                  d|       nt        j                  d|       t2        j4                  j7                  ||      }|J t2        j4                  j9                  |      }t;        |j<                  t>        j@                        sJ |j<                  jC                  |       t        j                  d|       || j                  |<   d d d        | j                  |   S # 1 sw Y   xY w)Ninline_extension_r$  rL   zCpp wrapper code path %szCpp wrapper cache miss for %sr   r  r  r  zCpp wrapper building %s)r&  rT   rt  z                     z -ffast-mathT)r^   build_directorycpp_sources	functionsextra_cflagsextra_ldflagsextra_include_pathsuse_pchzCpp wrapper done building %sz(Found target .so, cpp wrapper loading %szCpp wrapper done loading %s)"rd   rZ   r[   r   r]   r\   r  r  r   r  r  r   r  rv  r  rk  rm  r  re  r  rx  rR   r%  r  load_inline	importlibutilspec_from_file_locationmodule_from_specr   loaderr   Loaderexec_module)r  r  	func_namer   rT   r^   rb   extfilepathr  r   r  
_cpp_flags
_opt_flags_shared_warning_all_flag_ipaths_lpaths_libs_macros_build_arch_flags_use_custom_generated_macros_cpp_wrapper_flagsrh  ri  rO  rb  s                              r:   r   zCppWrapperCodeCache.load  s}   "3%(/5ww~~o.KK(77<<D63%A		,h7ciiII5x@)#~HBGGLL3=A<XDww~~h/II7B!*J!3!5J(lG(<(>% 6 ,!)
 4O3P0):)<&&0\:,a@Q?RRSTeSffghogp q'(*F)G$IL (/iq	5'$NM++33??!(7%0M#,+&2^'4o,3 $ @ 	C II<hGIIH(S$>>AA$QD+++#..99$?C%dkk3::>>>KK++C0II;XF!$		#e h yy~i s   (F9J88KN)
r  r   rt  r   r   r   rT   r   r   r   )
r   r   r   r  r   r#  r   r  r  r   r6   r<   r:   rb  rb    s0    !VE?#%ED Dr<   rb  c                      e Zd Zedd       Zy)TritonCodeCachec                D    t         j                  |      }t        ||      S r5   )r  r   getattr)r  kernel_namer  rO  s       r:   r   zTritonCodeCache.load  s    {+sK((r<   Nr  r   r  r   r   r   )r   r   r   r  r   r6   r<   r:   r  r    s    ) )r<   r  c                    t        j                  t        j                  j                        rt        j                  j                  S t        j                  t        j                  d            rt        j                  dd      S t        j                  t        j                  d            r4t
        j                  j                  t        j                  dd      d      S y)NCUDACXXrM   r  zbin/nvccnvcc)	r"   
nvcc_existr    rT   cuda_cxxrZ   r  r[   r\   r6   r<   r:   _cuda_compilerr    s    6;;//0{{###299Y/0yyB''299[12ww||BIIk26
CCr<   c                 2   t         j                  j                  } t        j                  j                  | d      t        j                  j                  | d      t        j                  j                  | d      t        j                  j                  | d      gS )Nr  ztools/library/includeztools/library/srcztools/util/include)r    rT   cutlass_dirrZ   r[   r\   )cutlass_paths    r:   _cutlass_include_pathsr    sf    ;;**L
\9-
\#:;
\#67
\#78	 r<   c                    ddl m}  g }t               rd}t        j                  j                  | j                  |            s0t        j                  j                  | j                  d            rd}|j                  d| j                  |              |j                  d| j                  |d              |j                  d       |j                  d       |S t        d	      )
Nr   r  lib64ra  r  r  z-lcudaz-lcudartzMUnsupported env, failed to find cuda libs! Currently only Linux is supported.)	r  r  r%   rZ   r[   r   _join_cuda_homer  NotImplementedError)r  ri  extra_lib_dirs      r:   _cuda_lib_optionsr    s    )!Mzww~~))-8
ggnn]::5AB "Mr-"?"?"N!OPQ..}gFGH	
 	X&Z(
  "[
 	
r<   c                 
    g dS )N)z-fPICz-fno-strict-aliasingz-fvisibility=hiddenz-Wconversionr6   r6   r<   r:   _nvcc_host_compiler_optionsr    s     r<   c            	        t        j                         } | dk(  rd} d|  d|  g}t        j                  j                  r	|d|  gz  }dddd	|  d
dj                  |       dt        j                  j                  ddg}t        j                  j                  r|j                  g d       t        j                  j                  r|j                  g d       t        j                  j                  r|j                  ddg       |S )N9090asm_compute_lto_z-t=0z"-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1z-wz-gencode=arch=compute_z,code=[,]rs  z--expt-relaxed-constexpr)z	-lineinfoz-gz-DCUTLASS_DEBUG_TRACE_LEVEL=1)z--keepz,--ptxas-options=--warn-on-local-memory-usagez --ptxas-options=--warn-on-spillsz--resource-usagez--source-in-ptxz--use_fast_mathz -DCUTLASS_USE_TANH_FOR_SIGMOID=1)r"   get_cuda_archr    rT   enable_cuda_ltor\   compile_opt_levelenable_debug_infoextendenable_ptxas_infouse_fast_math)archr   optionss      r:   _nvcc_compiler_optionsr    s    !!#Dt|$LHTF+,D{{""4v,
 gchhtn-=Q?%%"G {{$$KL{{$$	
 {{  !2	
 Nr<   c                   t               }t               }t               }t               }||D cg c]  }d|v rd| nd|  c}z   |D cg c]  }d|z   	 c}z   |z   }	dj	                  |       }
d}|dk(  r%t                ddj	                  |	       d| d|
 }nJ|d	k(  r6|	j                  d
       t                ddj	                  |	       d| d|
 }nt        d| d      t        j                  d|       |S c c}w c c}w )N=z-Xcompiler z-Xcompiler=r  rt  rM   r  z -c -o r$  z-sharedz -o zUnsupported output file suffix !zCUDA command: %s)
r  r  r  r  r\   r  r  r  r  r  )	src_filesdst_filedst_file_extr  cuda_lib_optionsnvcc_host_compiler_optionsnvcc_compiler_optionsoptr[   r  src_fileress               r:   cuda_compile_commandr  >  sJ   
 +,M(*!<!>24 2
1 $'#:k#[3FF1
	

 $1
1=44$;=
1	2 	  xx	"H
Cs!"!CHHW$5#6ghZq
S		y!!"!CHHW$5#6d8*AhZP!$CL>QR"STTII #&J#
 2s   C>Dc                  @    e Zd ZdZ	 	 d
dZd Zd Zd Zd Zd Z	d Z
y	)
DLLWrapperz A wrapper for a dynamic library.c                T    || _         t        j                  |      | _        d| _        y )NT)lib_pathr
   r=  DLLis_open)r   r  s     r:   r   zDLLWrapper.__init__`  s$     !##H-r<   c                L    | j                   r| j                          d| _         y y r@   )r  _dlcloser   s    r:   closezDLLWrapper.closeh  s    <<MMO DL r<   c                (   d }t               r;t        d       }t        |d      st        d      }t        |d      r|j                  }nt	        d      |)t
        g|_         || j                  j                         y t        j                  d       y )Ndlclosezlibc.soz&Unsupported env, failed to do dlclose!zKdll unloading function was not found, library may not be unloaded properly!)r%   r   hasattrr  r  r	   argtypesr  _handler  warning)r   	f_dlclosesymss      r:   r  zDLLWrapper._dlclosem  sx    	::D4+ItY' LL	%&NOO "*Idhh&&'KK]r<   c                    | j                   st        d| j                         t        | j                  |      fd}|S )NzCannot use closed DLL library: c                 D     |  }|rt        dj                         y )NzError in function: )r{   r   )r8   errmethods     r:   _wrapped_funcz-DLLWrapper.__getattr__.<locals>._wrapped_func  s,    $-C"%88I#JKK r<   )r  r{   r  r  r  )r   r^   r  r  s      @r:   __getattr__zDLLWrapper.__getattr__  s?    ||!@PQQ4(	L
 r<   c                    | S r5   r6   r   s    r:   	__enter__zDLLWrapper.__enter__  s    r<   c                $    | j                          y r5   r  )r   r8   s     r:   __exit__zDLLWrapper.__exit__      

r<   c                $    | j                          y r5   r  r   s    r:   __del__zDLLWrapper.__del__  r  r<   N)r  r   )r   r   r   r"  r   r  r  r  r  r  r  r6   r<   r:   r  r  ]  s0    *!
,r<   r  c                      e Zd ZU ej                   G d d             Z e       Zded<    e	ej                        Z
dZed
d       Zedd       Zedd       Zy	)CUDACodeCachec                  "    e Zd ZU ded<   ded<   y)CUDACodeCache.CacheEntryr   r8  r9  N)r   r   r   r#  r6   r<   r:   
CacheEntryr    s    r<   r  zDict[str, CacheEntry]r   rK   c                n    t        t        dgd|            }t        || j                  |      \  }}||fS )z
        Writes source code into a file with dst_file_ext as the file extension.
        Returns the hash key of source code, and the path to the file.
        dummy_inputdummy_outputrB  )r   r  r  _SOURCE_CODE_SUFFIX)r  r  r  cuda_commandr   r8  s         r:   r  zCUDACodeCache.write  sE      -.,O
  00
Z Jr<   c                   | j                  ||      \  }}|| j                  vrddlm} t	               } |t
        j                  j                  ||dz         t              }|5  |dt        | j                          |z   }t
        j                  j                  |      sRt        |g||      j                  d      }		 t        j                  |	t        j                   t
        j"                         t,        j/                  ||      | j                  |<   ddd       | j                  |   j0                  ||fS # t        j$                  $ r&}
t'        j(                  |	|
j*                        |
d}
~
ww xY w# 1 sw Y   axY w)z
        Compiles CUDA source_code into a file with dst_file_ext extension.
        Returns a tuple of dst_file_path, hash_key, source_code_path
        r   r  r  r  Nrt  r+  )r  r   r  r  r   rZ   r[   r\   r  r7  r  r   r  r1  r  r  r,  r   r  r!   CUDACompileErrorr  r  r  r9  )r  r  r  r   r8  r  r   r  r9  r  errors              r:   r  zCUDACodeCache.compile  s=    ))K>Zcii)#~HBGGLL3=A<XD()HC0G0G,H+HILXww~~k2.#k<eCj Q"//
(9(9rzz
 "/!9!9*k!R		#  		#**C<<	 &88 Q!223E5PQ s1   %AE6?4D:3#E6:E3!E..E33E66E?c                v    |dk7  rt        d| d|       | j                  ||      \  }}}t        |      ||fS )z
        Compiles source code and loads the generated .so file.
        Returns a tuple of DLLWrapper, hash_key, source_code_path
        r$  zCOnly support loading a .so file for now. Requested file extension: z. Source code: )r{   r  r  )r  r  r  dst_file_pathr   source_code_paths         r:   r   zCUDACodeCache.load  sa     4--9N/+X  58KK5
1x!1 =)85EFFr<   N)r   r`  )r   Tuple[str, str, str])r   zTuple[DLLWrapper, str, str])r   r   r   dataclasses	dataclassr  r  r   r#  r   r  r  r  r  r  r   r6   r<   r:   r  r    s~       $(6E )%E  = =8 G Gr<   r  c                 ~    t               D ]0  \  } }|j                         s|j                  j                          2 y r5   )r   r   Workerrv   )rO   device_interfaces     r:   caching_device_propertiesr    s4    ?A((*##99;  Br<   c                    t        |j                        }|j                  j                  |j                         t
        j                  | |      }|j                  |       y )N)warm_cache_only_with_cc)r   rp  r  
set_deviceindexr  r   
precompile)r  r  r  ro   r  kernels         r:   _worker_compiler    sN     0<&&v||4!!+{;F
b1r<   c                R    t         j                  | |      }|j                          |S r5   )r  r   r  )r  r  r  s      r:   _load_kernelr    s%    !!+{;F
Mr<   c                  8    e Zd ZU ded<   	 	 	 	 	 	 	 	 ddZddZy)TritonFuturer   r  c                .    || _         || _        || _        y r5   )r  r  future)r   r  r  r  s       r:   r   zTritonFuture.__init__  s     '&r<   c                \   t               }t        | d      r| j                  S | j                  j	                          t        | j                  | j                        x}| _        t               |z
  }|dkD  r0t        d| d| j                          t        | j                         | `| `| `|S )Nr  2   z"Detected long compilation time of z seconds for kernel name )	r   r  r  r  resultr  r  r  r$   )r   t0r  latencys       r:   r  zTritonFuture.result	  s    V4";;+D,<,<d>N>NOO&2+R<4WI=VW[WgWgVhi d../d.r<   N)r  r   r  r   r  zFuture[Any]r   r   )r   r   )r   r   r   r#  r   r  r6   r<   r:   r  r    s8      	
 
r<   r  c                T     d fd}t        |d      at        j                          y )Nc                     	 t        d        t        j                         k7  r6t        j                  t        j                         t
        j                         YNr   )r   rZ   getppidkillr  signalSIGKILL)	orig_ppids   r:   runz'_async_compile_initializer.<locals>.run 	  s8    !HBJJL(		V^^4 r<   T)targetdaemonr   )r   _watchdog_threadstart)r  r	  s   ` r:   _async_compile_initializerr  	  s#    5 S6r<   zOptional[Thread]r  c                      e Zd ZddZe ej                  d      dd              Ze ej                  d      dd              Ze	dd       Z
e	dd       Ze	dd       Z	 d	 	 	 	 	 	 	 ddZdd	Zd
 ZddZy)AsyncCompilec                     y r5   r6   r   s    r:   r   zAsyncCompile.__init__/	  r;   r<   r   c                 ^    t         j                  dkD  sJ t        t         j                        S r  )r    compile_threadsr   r6   r<   r:   poolzAsyncCompile.pool2	  s)     %%)))!&"8"899r<   c                 z   t                t        j                  dkD  sJ t        j                         } t        j                  t        j                        }t        t        j                  |t        t        |             }t
        j                  j                  d |j                  t        j                         |S )Nr   )
mp_contextinitializer)exitpriority)r  r    r  rZ   r  multiprocessingget_contextworker_start_methodr   r   r  rn  FinalizeshutdownrV   maxsize)r  ctxr  s      r:   process_poolzAsyncCompile.process_pool8	  s    
 	"#%%)))IIK	))&*D*DE""" :IF
 	%%dDMM%Tr<   c                V   t         j                  dk  ry t                | j                         }t	        |d      r|j                          t                y t        t         j                        D ]  }|j                           t	        |d      r|j                          t                y )Nr   _start_queue_management_thread_start_executor_manager_thread)
r    r  rD   r   r  r"  r  _adjust_process_countr#  rH   )r  r  rO   s      r:   	warm_poolzAsyncCompile.warm_poolN	  s    !!Q&!  49://1 		 6112**, 3t=>335r<   c                t    t         j                  dk  r |       S | j                         j                  |      S r  )r    r  r  submit)r  tasks     r:   r'  zAsyncCompile.submitl	  s.    !!Q&6Mxxz  &&r<   c                   t         j                  dk  st        |      dk  rt        t	        ||            S |D cg c]"  }| j                         j                  ||      $ c}D cg c]  }|j                          c}S c c}w c c}w r  )r    r  r7  r  mapr  r'  r  )r  fnseqr0  r-  s        r:   r*  zAsyncCompile.mapr	  sp    !!Q&#c(a-B%%GJ$Ks!SXXZ%6%6r1%=s$KL$Kq
$KLL$KLs   'B'Bc                <   t                t        j                  dkD  rtt        |      }t	        j
                  ||j                               }|j                  |      }| j                         j                  t        ||||      }t        |||      S t        ||      S r  )rD   r    r  r   rR   ro   rw   get_compute_capabilityr   r'  r  r  r  )r   r  r  
device_strr  ro   r  r  s           r:   rm   zAsyncCompile.tritonx	  s     	!!A%7
C\\*.>.M.M.OPF!88@B&&(//k2vF  [&AA[99r<   c                0    fd}| j                  |      S )Nc                 B    t         j                         j                  S r5   )r:  r   r  )r  s   r:   r(  zAsyncCompile.cpp.<locals>.task	  s    $$[1888r<   r'  )r   r  r(  s    ` r:   r  zAsyncCompile.cpp	  s    	9 {{4  r<   c                4    fd}| j                  |      S )Nc                 6    t         j                         d   S ra  )r  r   )r  r  s   r:   r(  zAsyncCompile.cuda.<locals>.task	  s     %%k<@CCr<   r2  )r   r  r  r(  s    `` r:   rT   zAsyncCompile.cuda	  s    	D {{4  r<   c                   t        |j                         D cg c]  \  }}t        |t        t        f      r| c}}      }t        |dt        j                  d      }t        j                  dkD  r|j                         D ]q  \  }}t        j                  r!t        |t              s|j                  |       t        |t        t        f      sN|j                         ||<   |j                  d       s t                y c c}}w )NzInductor Compilationr   )totaldescdisabledelayr   )r7  r   r   r   r  r-   r    disable_progressr  verbose_progressr,   set_postfix_strr  updaterH   )r   scoper   r   num_kernelspbarr  s          r:   waitzAsyncCompile.wait	  s     #(++-"/JCefl%;< "/
 '++	
 !!A%${{}V**:dI3N((-fv|&<=!'E#JKKN  - 	)s   "D
Nr   )r   r   )r   r   )r(  r  r   r   )r+  r  r,  r'  r   r'  )rT   )r  r   r  r   r/  r   r   zUnion[TritonFuture, ModuleType])r  r   r   r   )r>  r   r   r   )r   r   r   r   r   r   r   r  r   r  r%  r'  r*  rm   r  rT   rA  r6   r<   r:   r  r  .	  s     Y:  : Y  (  : ' '
 M M EK::-0:>A:	(: !!r<   r  r   r   )r^   r   r   r   r~  )r   r   r   r   r_  )r   Union[str, bytes]r   r   )r   r   r   r   r   r   r   r  )rM   r   )r   rB  r   r   r   r   )rM   r   rM   )r   rB  r   r   r   r   r   r   r   r   r   r`  )r[   r   r   rB  r   r   )r-  r  r   r  )r0  r   r   r   rL  )rk  r  rl  r  rm  r   r   r   )r  r  r   r'  r   r   )r  r   r   r   )r   zList[VecISA])r   r  )T)rg  r   r   r   )rj  r   r   r   )r%  r   r   r   )r   zTuple[bool, str])
r  r   r&  r  rT   r   r  r   r   z$Tuple[List[str], str, str, str, str])r  Union[str, List[str]]r  r   r%  r   rj  r   r  r   r&  r  rT   r   r  r   rg  r   r  r   r   r   )r  r   )r[   r   r   r`  )r8  rC  r9  r   r  	List[str]r   r   )r   r  )r   rD  )r  rD  r  r   r  r   r   r   )
r  r   r  r   r  r  ro   r  r   r   r  )
__future__r   r   rM  r  r   r|   rm  rC  r~   loggingr  rZ   r  r^  rX  r  r  r0  r  r  r  rV   r  r'  r	  r  r  bisectr   concurrent.futuresr   r   r   r   r  r	   r
   r   r   r   r   r   r   r   r   typesr   typingr   r   r   r   r   r   r   r   r   rR   torch._dynamo.device_interfacer   r   torch._dynamo.utilsr   torch._inductorr    r!   torch._inductor.codegen.cudar"   torch._inductor.utilsr#   r$   r%   torch._prims_commonr&   %torch.fx.experimental.symbolic_shapesr'   r(   r)   torch._inductor.graphr*    torch._inductor.select_algorithmr+   	torch.hubr,   r-   r[   abspathrW  _HERErV  r&  r  	triton.fbr.   triton.fb.buildr/   torch._inductor.fb.utilsr0   r1   r2   r3   r  rF   rC   rD   rH   	getLoggerr   r  rd   ri   rk   r   r   r   r   r   r   r   r  r   r  r  r%  r.  r1  r5  r<  r?  PicklerrA  r   rd  rf  rh  r  r  r  r  r  r  r  r	  r  r  r  r>  rI  rO  rS  rb  rZ  r_  re  rh  rk  rm  rq  rv  rx  r  r  r  r  r  r  r  r2  r  r  r  r  r  r#  r3  r8  r#  r:  r  rb  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r%  r6   r<   r:   <module>r\     s    "       	    	     	     
       N N  ' '        X X X  ) ' 1 H H 5 N N3= %
!ggoobggooe456%2    
 g!'GH
 H
V' '2Qi QhX
* 9;""!"25"";   	
  &   *   4!%",&.. ,B T; ;&   A  A H&  		 s3 s3l 5) 5) 5)p4(' Q# #44;A T5 5\ \~ 	8 	8 	8 	8f 	8 	8 	8 	8 	88F 8  /#+wy1  T  ((-*S*82 T  T ( "%	`B`B`B `B 	`B
 *`BL !%#6 66 6 	6
 6 6 6 6 6 6 	6r8 T ( (2o ot  (06%064706>G06	06f  . - -`P( P(fI IX) )2$N  		>: :zEG EGP<22#&2,/29E2	2 P	 &* " )| |~    r<   