
    /i$                     F   d dl mZ d dlmZ d dlZd dlZd dlZddl d dlZd dl	m
Z
 ej                  j                  ej                  j                   ej                         ej                  j                  e                  Z e       d        Z e       d        Z e       d	        Zd
 Z e       d        Z e       d        Z e       ddefd       Z e       d        Z e       d        Z e       d        Z e       d        ZddZd Ze dk(  r e        yy)    )	lru_cache)PathN   )*)MODEL_REPO_BRANCHc                     dd l }| j                  dk(  r%|j                  j                  |       j                  S | j                  dk(  ryy)Nr   cudampsl       0 )torchtyper	   get_device_propertiestotal_memory)devicer   s     Q/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/crawl4ai/model_loader.pyget_available_memoryr      s>    {{fzz//7DDD		    c                 |    t        |       }| j                  dk(  ry| j                  dv r|dk\  ry|dk\  ry|dk\  ry	y
y)Ncpu   )r	   r
   l           l           l        @       )r   r   )r   available_memorys     r   calculate_batch_sizer      sO    +F3{{e		'|+-,r   c                      dd l } | j                  j                         r| j                  d      }|S | j                  j
                  j                         r| j                  d      }|S | j                  d      }|S )Nr   r	   r
   r   )r   r	   is_availabler   backendsr
   )r   r   s     r   
get_devicer   ,   sl    zz f%
 M	 
			(	(	*e$ M e$Mr   c                 @    t               }| j                  |       | |fS )N)r   to)modelr   s     r   set_model_devicer#   9   s    \F	HHV&=r   c                  (   t         j                  j                  t        j                  dt	        j
                               d      } t        j                  | d       t        j                  |  dd       t        j                  |  dd       | S )NCRAWL4_AI_BASE_DIRECTORYz	.crawl4aiT)exist_okz/cachez/models)ospathjoingetenvr   homemakedirs)home_folders    r   get_home_folderr.   ?   sk    '',,
		,diik:KK KKd+KK;-v&6KK;-w'$7r   c                      ddl m} m} | j                  dd       }|j                  dd       }|j	                          t        |      \  }}||fS )Nr   )BertTokenizer	BertModelzbert-base-uncasedresume_download)transformersr0   r1   from_pretrainedevalr#   )r0   r1   	tokenizerr"   r   s        r   load_bert_base_uncasedr8   J   sT    5--.ASW-XI%%&94%PE	JJL$U+ME6er   returnc                     ddl m}m} |j                  | d      }|j                  | d      }|j	                          t        |      \  }}||fS )zLoad the Hugging Face model for embedding.

    Args:
        model_name (str, optional): The model name to load. Defaults to "BAAI/bge-small-en-v1.5".

    Returns:
        tuple: The tokenizer and model.
    r   )AutoTokenizer	AutoModelNr2   )r4   r;   r<   r5   r6   r#   )
model_namer;   r<   r7   r"   r   s         r   load_HF_embedding_modelr>   U   sS     6--j$-OI%%j$%GE	JJL$U+ME6er   c                      ddl m} m} ddl m} | j	                  d      }|j	                  d      }|j                          t        |      \  }} |d||      }|S )Nr   )r;   "AutoModelForSequenceClassification)pipelinez1dstefa/roberta-base_topic_classification_nyt_newsztext-classification)r"   r7   )r4   r;   r@   rA   r5   r6   r#   )r;   r@   rA   r7   r"   r   pipes          r   load_text_classifierrC   h   s]    N%--;I />>;E 
JJL$U+ME6))LDKr   c                    	 ddl m} m} ddlm dd l	d}|j                  |d       | j                  |d       j                          t              \  j                  j                  d	fd	}|fS )Nr   )r@   r;   )expitzcardiffnlp/tweet-topic-21-multir2   c                     | ddd|      }|j                         D ci c]  \  }}||j                         }}}j                         5   di |}d d d        j                  j	                         j                         j                         } |      }||k\  dz  }g }	|D ]9  }
t        |
      D cg c]  \  }}|dk(  s|    }}}|	j                  |       ; |	S c c}}w # 1 sw Y   xY wc c}}w )NptT)return_tensorspadding
truncation
max_lengthr    )	itemsr!   no_gradlogitsdetachr   numpy	enumerateappend)texts	thresholdrK   tokenskeyvaloutputscorespredictionsbatch_labels
predictionivaluelabelsclass_mappingr   rE   r"   r7   r   s                 r   _classifierz4load_text_multilabel_classifier.<locals>._classifier   s   !
 17
$,CC
 
 ]]_ 	%_V_F	% %%'++-335v*a/% 	(J1::1F%-Q%ST*a F  '		( %
	% 	%s   C*	C0=C<C<0C9)g      ?r   )r4   r@   r;   scipy.specialrE   r   r5   r6   r#   configid2label)
r@   r;   MODELrb   ra   r   rE   r"   r7   r   s
       @@@@@@r   load_text_multilabel_classifierrg   y   s    N# .E--eT-JI.>>t ? E 
JJL$U+ME6LL))M 8 r   c                      dd l } 	 | j                  j                  d       | j                  j                  d      S # t        $ r | j	                  d       Y 7w xY w)Nr   ztokenizers/punktpunkt)nltkdatafindLookupErrordownload)rj   s    r   load_nltk_punktro      sN    		)* 99>>,--  gs   < AAc            
      6   dd l } d}t               }t        |      |z  }|j                         rt	        |j                               sd}t        }t        |      dz  }t        d       |j                         r;	 t        j                  |       |j                         rt        j                  |       	 t        j                  dd	d
||t        |      gt        j                  t        j                  d       t        |      dz  }|j                  dd       |dz  dz  }t        j                   ||       t        j                  |       t        d       	 | j'                  t        |            S # t        $ r* t        d       t        d|        t        d|        Y y w xY w# t        j"                  $ r}	t        d|	        Y d }	~	y d }	~	wt$        $ r}	t        d|	        Y d }	~	y d }	~	ww xY w# t$        $ r}	t        d|	        Y d }	~	y d }	~	ww xY w)Nr   models/reutersz)https://github.com/unclecode/crawl4ai.gitcrawl4aiu7   [LOG] ⏬ Downloading Spacy model for the first time...zh[WARNING] Unable to remove existing folders. Please manually delete the following folders and try again:z- gitclonez-bT)stdoutstderrcheckmodels)parentsr&   reutersu-   [LOG] ✅ Spacy Model downloaded successfullyz0An error occurred while cloning the repository: zAn error occurred: zError loading spacy model: )spacyr.   r   existsanyiterdirr   printshutilrmtreePermissionError
subprocessrunstrDEVNULLmkdircopytreeCalledProcessError	Exceptionload)
r{   namer-   model_folderrepo_urlbranchrepo_foldermodels_foldersource_folderes
             r   load_spacy_modelr      s   D!#K$t+L !c,*>*>*@&A>";'*4GH 
k*&&(MM,/	NNvx[9IJ!))!))	 !-8Mt< ((2Y>MOOM<8 MM+&ABzz#l+,,K # ~ ;-()<.)*: ,, 	DQCHI 	's+,	  +A3/0sO   ;:E4 6B#F* G7 40F'&F'*G4=GG4G//G47	H HHc                    | rt        d       t               }t        j                  j	                  |d      t        j                  j	                  |d      g}|D ]1  }t        |      j                         st        j                  |       3 t        d       t        d       t               \  }}t        d|        t        d       t                t        d       y	)
z*Download all models required for Crawl4AI.z![LOG] Removing existing models...rq   rx   z[LOG] Existing models removed.z$[LOG] Downloading text classifier...z [LOG] Text classifier loaded on z,[LOG] Downloading custom NLTK Punkt model...u-   [LOG] ✅ All models downloaded successfully.N)r   r.   r'   r(   r)   r   r|   r   r   rg   ro   )remove_existingr-   model_foldersfolder_r   s         r   download_all_modelsr      s    12%'GGLL&67GGLLh/
 $ 	&FF|""$f%	& 	./ 

01/1IAv	,VH
56	
89	
9:r   c                      t        d       t        d       t        j                  d      } | j                  ddd       | j	                         }t        |j                  	       y )
Nz/[LOG] Welcome to the Crawl4AI Model Downloader!zE[LOG] This script will download all the models required for Crawl4AI.zCrawl4AI Model Downloader)descriptionz--remove-existing
store_truez)Remove existing models before downloading)actionhelp)r   )r   argparseArgumentParseradd_argument
parse_argsr   r   )parserargss     r   mainr     s_    	
;<	
QR$$1LMF
8  
 D(<(<=r   __main__)zBAAI/bge-small-en-v1.5)F)!	functoolsr   pathlibr   r   r'   r   model_loaderr   crawl4ai.configr   r(   realpathr)   getcwddirname__file____location__r   r   r   r#   r.   r8   tupler>   rC   rg   ro   r   r   r   __name__rL   r   r   <module>r      sZ         -wwYRYY["''//(:S TU    & 	 	     E  $    3 3l . . = =@;:> zF r   