
    /ix             	       @   d dl Z d dlZd dlZd dlZddlmZmZmZmZm	Z	m
Z
mZmZmZmZ ddlmZmZ ddlmZmZ ddlmZmZ ddlmZmZ ddlmZmZ dd	lmZ dd
l m!Z!m"Z" ddl#m$Z$ ddl%m&Z& d dl'Z'd dl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z. d dl/m0Z0 e.e1e*e1ge2f   e,e.e1e*e1ge2f   f      f   Z3 G d de0      Z4d+de)de2fdZ5de)de)fdZ6de)de2fdZ7 G d d      Z8 G d d      Z9 G d d      Z: G d d       Z; G d! d"      Z< G d# d$      Z= G d% d&      Z> G d' d(      Z? G d) d*      Z@y),    N   )
DEFAULT_PROVIDERDEFAULT_PROVIDER_API_KEYMIN_WORD_THRESHOLD$IMAGE_DESCRIPTION_MIN_WORD_THRESHOLDPROVIDER_MODELSPROVIDER_MODELS_PREFIXESSCREENSHOT_HEIGHT_TRESHOLDPAGE_TIMEOUTIMAGE_SCORE_THRESHOLDSOCIAL_MEDIA_DOMAINS)UAGenValidUAGenerator)ExtractionStrategyLLMExtractionStrategy)ChunkingStrategyRegexChunking)MarkdownGenerationStrategyDefaultMarkdownGenerator)ContentScrapingStrategyLXMLWebScrapingStrategy)DeepCrawlStrategy)TableExtractionStrategyDefaultTableExtraction)	CacheMode)ProxyRotationStrategy)AnyCallableDictListOptionalUnion)Enumc                       e Zd ZdZdZy)	MatchModeorandN)__name__
__module____qualname__ORAND     R/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/crawl4ai/async_configs.pyr%   r%   &   s    	B
Cr.   r%   objignore_default_valuec           	         | yt        | t        t        t        t        f      r| S t        | t
              r#| j                  j                  | j                  dS t        | d      r| j                         S t        | t        t        t        f      st        | d      r)t        | t              s| D cg c]  }t        |       c}S t        | t               r"t        |       D cg c]  }t        |       c}S t        | t              r9d| j#                         D ci c]  \  }}t        |      t        |       c}}dS | j                  j                  }t        | d      rt%        j&                  | j                  j(                        }|j*                  }i }|j#                         D ]d  \  }	}
|	dk(  rt-        | |	|
j.                        }t1        |      rt1        |
j.                        rD||
j.                  k7  sT|rWt        |      ||	<   f | j                  j                  |dS t        |       S c c}w c c}w c c}}w )	z|
    Recursively convert an object to a serializable dictionary using {type, params} structure
    for complex objects.
    N)typeparams	isoformat__iter__dict)r3   value	__class__self)
isinstancestrintfloatboolr#   r9   r(   r8   hasattrr5   listtuplesetr7   to_serializable_dict	frozensetitemsinspect	signature__init__
parametersgetattrdefaultis_empty_value)r0   r1   itemkv_typesigr4   current_valuesnameparamr8   s               r/   rD   rD   -   s   
 { #S%./
 #t..#))DD sK }} #eS)*gc:.FzZ]_cOd7:;t$T*;; #y!7;CyAt$T*AA #tBE))+N$!Qc!f2155N
 	

 MM""E sK  6 67 !<<> 		GKD%v~Cu}}5E #5)nU]].KEMM)2F+?+FN4(		G. MM**$
 	

 s8Oe < B Os   0H4"H9H>datareturnc                 J   | yt        | t        t        t        t        f      r| S t        | t
              rd| v r| d   dk(  r4d| v r0| d   j                         D ci c]  \  }}|t        |       c}}S d}dg}|D ]8  }	 t        j                  |      }t        || d         rt        || d         } n: |Vt        |t              r || d         S d| v r7| d   j                         D ci c]  \  }}|t        |       }}} |di |S t        | t               r| D cg c]  }t        |       c}S t        | t
              r-| j                         D ci c]  \  }}|t        |       c}}S | S c c}}w # t        t        f$ r Y w xY wc c}}w c c}w c c}}w )zS
    Recursively convert a serializable dictionary back to an object instance.
    Nr3   r7   r8   crawl4air4   r-   )r;   r<   r=   r>   r?   r7   rF   from_serializable_dict	importlibimport_moduler@   rK   ImportErrorAttributeError
issubclassr#   rA   )	rV   rO   rP   clsmodule_pathsmodule_pathmodconstructor_argsrN   s	            r/   rZ   rZ   x   s    | $c5$/0 $&D.<6!go=A']=P=P=RSTQA-a00SS #|' 	K--k:3V-!#tF|4C .	 ?#t$4>**4 >B(^=Q=Q=S$59QA-a00$  $ .-.. $9=>&t,>> $9=FA)!,,FFKG T  0 $ ? Gs*   E83E>/F$FF>FFr8   c                 r    | yt        | t        t        t        t        t
        f      rt        |       dk(  ryy)z+Check if a value is effectively empty/null.Tr   F)r;   rA   rB   rC   r7   r<   len)r8   s    r/   rM   rM      s/    }%$sD#67CJ!Or.   c                   X    e Zd Z	 ddededee   fdZededd fd       ZdefdZ	dd	Z
y
)GeolocationConfiglatitude	longitudeaccuracyc                 .    || _         || _        || _        y)zConfiguration class for geolocation settings.
        
        Args:
            latitude: Latitude coordinate (e.g., 37.7749)
            longitude: Longitude coordinate (e.g., -122.4194)
            accuracy: Accuracy in meters. Default: 0.0
        Nri   rj   rk   )r:   ri   rj   rk   s       r/   rI   zGeolocationConfig.__init__   s     !" r.   geo_dictrW   c                 z    t        | j                  d      | j                  d      | j                  dd            S )z-Create a GeolocationConfig from a dictionary.ri   rj   rk           rm   )rh   get)rn   s    r/   	from_dictzGeolocationConfig.from_dict   s7     !\\*-ll;/\\*c2
 	
r.   c                 J    | j                   | j                  | j                  dS )%Convert to dictionary representation.rm   rm   r:   s    r/   to_dictzGeolocationConfig.to_dict   s#     
 	
r.   c                 n    | j                         }|j                  |       t        j                  |      S )zCreate a copy of this configuration with updated values.

        Args:
            **kwargs: Key-value pairs of configuration options to update

        Returns:
            GeolocationConfig: A new instance with the specified updates
        )rv   updaterh   rr   r:   kwargsconfig_dicts      r/   clonezGeolocationConfig.clone   s.     lln6" **;77r.   N)rp   )rW   rh   )r(   r)   r*   r>   r!   rI   staticmethodr   rr   rv   r|   r-   r.   r/   rh   rh      s`    
 %(	!! ! 5/	!" 
D 
%8 
 

 
8r.   rh   c            
           e Zd Z	 	 	 ddedee   dee   dee   fdZdee   fdZed	edd fd
       Zede	dd fd       Z
eddeded    fd       Zde	fdZddZy)ProxyConfigNserverusernamepasswordipc                 `    || _         || _        || _        |xs | j                         | _        y)aL  Configuration class for a single proxy.
        
        Args:
            server: Proxy server URL (e.g., "http://127.0.0.1:8080")
            username: Optional username for proxy authentication
            password: Optional password for proxy authentication
            ip: Optional IP address for verification purposes
        N)r   r   r   _extract_ip_from_serverr   )r:   r   r   r   r   s        r/   rI   zProxyConfig.__init__   s0        6446r.   rW   c                     	 d| j                   v r2| j                   j                  d      d   j                  d      }|d   S | j                   j                  d      }|d   S # t        $ r Y yw xY w)z#Extract IP address from server URL.://r   :r   N)r   split	Exception)r:   partss     r/   r   z#ProxyConfig._extract_ip_from_server   so    		#))%0399#>Qx))#.Qx 		s   ?A" A" "	A.-A.	proxy_strc                    | xs dj                         }d|v rZd|v rV|j                  dd      \  }}|j                  dd      \  }}d|v r(|j                  dd      \  }}t        | d| ||      S d|v rd|vrt        |      S |j                  d      }t        |      dk(  r|\  }	}
}}t        d	|	 d|
 ||      S t        |      d
k(  r|\  }	}
t        d	|	 d|
       S t	        d|        )zCreate a ProxyConfig from a string.

        Supported formats:
        - 'http://username:password@ip:port'
        - 'http://ip:port'
        - 'socks5://ip:port'
        - 'ip:port:username:password'
        - 'ip:port'
         @r   r   r   r   r   r   )r      zhttp://   zInvalid proxy string format: )stripr   r   rf   
ValueError)r   s	auth_partserver_partprotocolcredentialsr   r   r   r   ports              r/   from_stringzProxyConfig.from_string	  s(    _"##%!8
%&WWS!_"I{$-OOE1$=!Hkk!%0%6%6sA%>"("&Zs;-8%%  A:#Q,a((u:?+0(Bht1TF&;hYabbu:?HBt1TF&;<<8DEEr.   
proxy_dictc                     t        | j                  d      | j                  d      | j                  d      | j                  d            S )z'Create a ProxyConfig from a dictionary.r   r   r   r   r   r   r   r   )r   rq   )r   s    r/   rr   zProxyConfig.from_dict-  sA     >>(+^^J/^^J/~~d#	
 	
r.   env_varc                     g }	 t        j                  | d      j                  d      }|D ])  }|s|j                  t        j                  |             + 	 |S # t        $ r}t        d|        Y d}~|S d}~ww xY w)zLoad proxies from environment variable.
        
        Args:
            env_var: Name of environment variable containing comma-separated proxy strings
            
        Returns:
            List of ProxyConfig objects
        r   ,z(Error loading proxies from environment: N)osgetenvr   appendr   r   r   print)r   proxies
proxy_listproxyes        r/   from_envzProxyConfig.from_env7  s     	B7B/55c:J# ?{66u=>?   	B<QC@AA	Bs   AA 	A<#A77A<c                 `    | j                   | j                  | j                  | j                  dS )rt   r   r   ru   s    r/   rv   zProxyConfig.to_dictL  s*     kk''	
 	
r.   c                 n    | j                         }|j                  |       t        j                  |      S )zCreate a copy of this configuration with updated values.

        Args:
            **kwargs: Key-value pairs of configuration options to update

        Returns:
            ProxyConfig: A new instance with the specified updates
        )rv   rx   r   rr   ry   s      r/   r|   zProxyConfig.cloneU  s.     lln6"$$[11r.   )NNN)PROXIES)rW   r   )r(   r)   r*   r<   r!   rI   r   r}   r   r   rr   r    r   rv   r|   r-   r.   r/   r   r      s     #'"& 77 3-7 3-	7
 SM7,#  !Fs !F} !F !FF 
d 
} 
 
 # d=.A  (
 
2r.   r   c            L          e Zd ZdZdddddddddddddddddddddddddddd	d
i dddddddf%dededededededededededededededeeedf   de	de	deded ed!eeedf   d"ed#ed$ed%ed&e
d'ed(ed)ed*ed+ed,ed-e
d.e	d/ed0ed1ee   fJd2Zed3ed4d fd5       Zd6 Zd7 Zd4efd8Zed9ed4d fd:       Z	 	 	 	 	 dCd;ed<ed=ed>ed?ed@edAe	fdBZy)DBrowserConfigaI  
    Configuration class for setting up a browser instance and its context in AsyncPlaywrightCrawlerStrategy.

    This class centralizes all parameters that affect browser and context creation. Instead of passing
    scattered keyword arguments, users can instantiate and modify this configuration object. The crawler
    code will then reference these settings to initialize the browser in a consistent, documented manner.

    Attributes:
        browser_type (str): The type of browser to launch. Supported values: "chromium", "firefox", "webkit".
                            Default: "chromium".
        headless (bool): Whether to run the browser in headless mode (no visible GUI).
                         Default: True.
        browser_mode (str): Determines how the browser should be initialized:
                           "builtin" - use the builtin CDP browser running in background
                           "dedicated" - create a new dedicated browser instance each time
                           "cdp" - use explicit CDP settings provided in cdp_url
                           "docker" - run browser in Docker container with isolation
                           Default: "dedicated"
        use_managed_browser (bool): Launch the browser using a managed approach (e.g., via CDP), allowing
                                    advanced manipulation. Default: False.
        cdp_url (str): URL for the Chrome DevTools Protocol (CDP) endpoint. Default: "ws://localhost:9222/devtools/browser/".
        browser_context_id (str or None): Pre-existing CDP browser context ID to use. When provided along with
                                          cdp_url, the crawler will reuse this context instead of creating a new one.
                                          Useful for cloud browser services that pre-create isolated contexts.
                                          Default: None.
        target_id (str or None): Pre-existing CDP target ID (page) to use. When provided along with
                                 browser_context_id, the crawler will reuse this target instead of creating
                                 a new page. Default: None.
        cdp_cleanup_on_close (bool): When True and using cdp_url, the close() method will still clean up
                                     the local Playwright client resources. Useful for cloud/server scenarios
                                     where you don't own the remote browser but need to prevent memory leaks
                                     from accumulated Playwright instances. Default: False.
        create_isolated_context (bool): When True and using cdp_url, forces creation of a new browser context
                                        instead of reusing the default context. Essential for concurrent crawls
                                        on the same browser to prevent navigation conflicts. Default: False.
        debugging_port (int): Port for the browser debugging protocol. Default: 9222.
        use_persistent_context (bool): Use a persistent browser context (like a persistent profile).
                                       Automatically sets use_managed_browser=True. Default: False.
        user_data_dir (str or None): Path to a user data directory for persistent sessions. If None, a
                                     temporary directory may be used. Default: None.
        chrome_channel (str): The Chrome channel to launch (e.g., "chrome", "msedge"). Only applies if browser_type
                              is "chromium". Default: "chromium".
        channel (str): The channel to launch (e.g., "chromium", "chrome", "msedge"). Only applies if browser_type
                              is "chromium". Default: "chromium".
        proxy (Optional[str]): Proxy server URL (e.g., "http://username:password@proxy:port"). If None, no proxy is used.
                             Default: None.
        proxy_config (ProxyConfig or dict or None): Detailed proxy configuration, e.g. {"server": "...", "username": "..."}.
                                     If None, no additional proxy config. Default: None.
        viewport_width (int): Default viewport width for pages. Default: 1080.
        viewport_height (int): Default viewport height for pages. Default: 600.
        viewport (dict): Default viewport dimensions for pages. If set, overrides viewport_width and viewport_height.
                         Default: None.
        verbose (bool): Enable verbose logging.
                        Default: True.
        accept_downloads (bool): Whether to allow file downloads. If True, requires a downloads_path.
                                 Default: False.
        downloads_path (str or None): Directory to store downloaded files. If None and accept_downloads is True,
                                      a default path will be created. Default: None.
        storage_state (str or dict or None): An in-memory storage state (cookies, localStorage).
                                             Default: None.
        ignore_https_errors (bool): Ignore HTTPS certificate errors. Default: True.
        java_script_enabled (bool): Enable JavaScript execution in pages. Default: True.
        cookies (list): List of cookies to add to the browser context. Each cookie is a dict with fields like
                        {"name": "...", "value": "...", "url": "..."}.
                        Default: [].
        headers (dict): Extra HTTP headers to apply to all requests in this context.
                        Default: {}.
        user_agent (str): Custom User-Agent string to use. Default: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
                           "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36".
        user_agent_mode (str or None): Mode for generating the user agent (e.g., "random"). If None, use the provided
                                       user_agent as-is. Default: None.
        user_agent_generator_config (dict or None): Configuration for user agent generation if user_agent_mode is set.
                                                    Default: None.
        text_mode (bool): If True, disables images and other rich content for potentially faster load times.
                          Default: False.
        light_mode (bool): Disables certain background features for performance gains. Default: False.
        extra_args (list): Additional command-line arguments passed to the browser.
                           Default: [].
        enable_stealth (bool): If True, applies playwright-stealth to bypass basic bot detection.
                              Cannot be used with use_undetected browser mode. Default: False.
    chromiumT	dedicatedFN8  X  zQMozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/116.0.0.0 Safari/537.36r   $  	localhostbrowser_typeheadlessbrowser_modeuse_managed_browsercdp_urlbrowser_context_id	target_idcdp_cleanup_on_closecreate_isolated_contextuse_persistent_contextuser_data_dirchrome_channelchannelr   proxy_configviewport_widthviewport_heightviewportaccept_downloadsdownloads_pathstorage_stateignore_https_errorsjava_script_enabledsleep_on_closeverbosecookiesheaders
user_agentuser_agent_modeuser_agent_generator_config	text_mode
light_mode
extra_argsdebugging_porthostenable_stealthinit_scriptsc&                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        || _
        |xs | j                   xs d| _        |xs | j                   xs d| _        | j                   dv rd| _        d| _        |rt        j                  dt               || _        || _        t%        | j"                  t&              r$t(        j+                  | j"                        | _        t%        | j"                  t,              r$t(        j/                  | j"                        | _        | j                   r.| j"                  r"t        j                  dt               d | _        n7| j                   r+t(        j/                  | j                         | _        d | _        || _        || _        || _        | j4                  B| j4                  j7                  dd      | _        | j4                  j7                  dd	      | _        || _        || _        || _        || _        || _         ||ng | _!        ||ni | _"        || _#        || _$        || _%        || _&        | | _'        |!|!ng | _(        || _)        || _*        |"| _+        |#| _,        |$| _-        |%|%ng | _.        t_               }&| jH                  d
k(  r& |&j`                  di | jJ                  xs i | _#        n	 tc        jd                  | jF                        | _3        | jD                  ji                  d| jf                         | j                  dk(  rd| _        nJ| j                  dk(  rd| _        n3| j                  dk(  r| j                  rd| _        n| j                  dk(  r	 | j                  rd| _        | jZ                  r(| j                  r| j                  dk(  rtk        d      y y y )Nr   )firefoxwebkitr   zhThe 'proxy' parameter is deprecated and will be removed in a future release. Use 'proxy_config' instead.zRBoth 'proxy' and 'proxy_config' are provided. 'proxy_config' will take precedence.widthr   heightr   randomz	sec-ch-uabuiltinTdockercustomr   znenable_stealth cannot be used with browser_mode='builtin'. Stealth mode requires a dedicated browser instance.r-   )6r   r   r   r   r   r   r   r   r   r   r   r   r   warningswarnUserWarningr   r   r;   r7   r   rr   r<   r   r   r   r   rq   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   generater   generate_client_hintsbrowser_hint
setdefaultr   )'r:   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   fa_user_agenr_generators'                                          r/   rI   zBrowserConfig.__init__  s   \ ) (#6 "4"$8!'>$&<#*,O0A0AOZA$"3"3Az 55DL"$DMM  E  GR  S
(d''. + 5 5d6G6G HDd''- + 7 78I8I JD::$++MMnp{|DJZZ + 7 7

 CDDJ,. ==$"&--"3"3GT"BD#'==#4#4Xs#CD  0,*#6 #6 ")"5w2")"5w2$.+F("$(2(>*B,,	,,8,DL""2"48+>5>> 339rDO !77HT->->? 	)'+D$(*'+D$(*t||'+D$+- &&'+D$ 4#;#;@Q@QU^@^F  A_#;r.   rz   rW   c           	         t        d,i d| j                  dd      d| j                  dd      d| j                  dd      d| j                  dd      d	| j                  d	      d
| j                  d
      d| j                  d      d| j                  dd      d| j                  dd      d| j                  dd      d| j                  d      d| j                  dd      d| j                  dd      d| j                  d      d| j                  dd       d| j                  dd      d| j                  dd      d| j                  dd      d| j                  d      d| j                  d      d| j                  dd      d| j                  dd      d| j                  dg       d| j                  di       d| j                  dd       d!| j                  d!      d"| j                  d"      d#| j                  d#d      d$| j                  d$d      d%| j                  d%g       d&| j                  d&d'      d(| j                  d(d)      d*| j                  d*d      d+| j                  d+g       S )-Nr   r   r   Tr   r   r   Fr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   zuMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36r   r   r   r   r   r   r   r   r   r   r   r-   )r   rq   rz   s    r/   from_kwargszBrowserConfig.from_kwargsD  s    '
NJ?'
ZZ
D1'
  NK@'
 !'

+@% H	'

 JJy)'
  &zz*>?'
 jj-'
 "(,BE!J'
 %+JJ/H%$P'
 $*::.F#N'
 !**_5'
 "::&6
C'
 JJy*5'
 **W%'
  ND9'
  "::&6=!'
" #JJ'8#>#'
$ $ZZ(:EB%'
& "::&67''
( !**_5)'
* !'

+@$ G+'
, !'

+@$ G-'
. JJy"-/'
0 JJy"-1'
2 zzX3'
< #JJ'89='
> )/

3P(Q?'
@ jje4A'
B zz,6C'
D zz,3E'
F "::&6=G'
H FK0I'
J "::&6>K'
L  NB7M'
 '	
r.   c                    i d| j                   d| j                  d| j                  d| j                  d| j                  d| j
                  d| j                  d| j                  d	| j                  d
| j                  d| j                  d| j                  d| j                  d| j                  d| j                  r| j                  j                         nd d| j                   d| j"                  i d| j$                  d| j&                  d| j(                  d| j*                  d| j,                  d| j.                  d| j0                  d| j2                  d| j4                  d| j6                  d| j8                  d| j:                  d| j<                  d| j>                  d | j@                  d!| jB                  d"| jD                  | jF                  | jH                  d#}|S )$Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   )%r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rv   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r:   results     r/   rv   zBrowserConfig.to_dicto  s   %
D--%
%
 D--%
 "4#;#;	%

 t||%
 !$"9"9%
 %
 #D$=$=%
 &t'C'C%
 %d&A&A%
 T//%
 d11%
 t||%
 TZZ%
 4;L;LD--557RV%
  d11!%
" t33#%
$  5 5%%
& d11'%
( T//)%
* "4#;#;+%
, "4#;#;-%
. t||/%
0 t||1%
2 $//3%
4 t335%
6 *4+K+K7%
8 9%
: $//;%
< $//=%
> d11?%
@ t||A%
B d11C%
D DIIE%
F #11 --I%
P r.   c                 n    | j                         }|j                  |       t        j                  |      S )zCreate a copy of this configuration with updated values.

        Args:
            **kwargs: Key-value pairs of configuration options to update

        Returns:
            BrowserConfig: A new instance with the specified updates
        )rv   rx   r   r   ry   s      r/   r|   zBrowserConfig.clone  s.     lln6"((55r.   c                     t        |       S NrD   ru   s    r/   dumpzBrowserConfig.dump      #D))r.   rV   c                 f    t        |       }t        |t              r|S t        j                  |      S r   )rZ   r;   r   r   rV   configs     r/   loadzBrowserConfig.load  s.     (-fm,M((00r.   token
channel_idcountrystatecityr   session_durationc                 <   |r|st        d      |dvrt        d|       dd|||||d}|r||d<   |r||d<   d	}		 t        j                  |	|d
      }
|
j                          |
j	                         }t        |t              r/|j                  d      rt        d|j                  dd             t        |t              r|st        d      |d   }t        | d|d    d|d    |d   |d         | _
        y# t        $ r}t        d|         d}~ww xY w)a7  
        Fetch a proxy from NSTProxy API and automatically assign it to proxy_config.

        Get your NSTProxy token from: https://app.nstproxy.com/profile

        Args:
            token (str): NSTProxy API token.
            channel_id (str): NSTProxy channel ID.
            country (str, optional): Country code (default: "ANY").
            state (str, optional): State code (default: "").
            city (str, optional): City name (default: "").
            protocol (str, optional): Proxy protocol ("http" or "socks5"). Defaults to "http".
            session_duration (int, optional): Session duration in minutes (0 = rotate each request). Defaults to 10.

        Raises:
            ValueError: If the API response format is invalid.
            PermissionError: If the API returns an error message.
        z,[NSTProxy] token and channel_id are required)httpsocks5z[NSTProxy] Invalid protocol: r   r   )fTypecount	channelIdr   r   sessionDurationr   r   r   z3https://api.nstproxy.com/api/v1/generate/apiproxies
   )r4   timeouterrz[NSTProxy] API Error: msgzUnknown erroru=   [NSTProxy] Invalid API response — expected a non-empty listr   r   r   r   r   r   r   r   u$   [NSTProxy] ❌ Failed to set proxy: N)r   requestsrq   raise_for_statusjsonr;   r7   PermissionErrorrA   r   r   r   r   )r:   r   r   r   r   r   r   r   r4   urlresponserV   
proxy_infor   s                 r/   set_nstproxyzBrowserConfig.set_nstproxy  sV   < JKLL--<XJGHH # /
 #F7O!F6NC	||CCH%%'==?D $%$((5/%(>txx?_>`&abbdD) !`aaaJ !,"3z$'7&8*V:L9MN#J/#J/!D  	8<=	s   C C> >	DDD)ANYr   r   r  r  )r(   r)   r*   __doc__r<   r?   r"   r   r7   r=   rA   r    rI   r}   r   rv   r|   r   r   r  r-   r.   r/   r   r   b  s   Ph ''$)"&%*(-',!(!7;""!&"04$($($
 `!,. "$"&WMM M 	M
 "M M  M M #M "&M !%M M M M M  Kt34!M" #M$ %M& 'M( )M* +M, S$_--M. "/M0 "1M2 3M4 5M6 7M8 9M: ;MF GMH &*IMJ KML MMN OMP QMR SMT UMV 3iWM^ (
D (
_ (
 (
T)V6*d * 14 1O 1 1  "MM M 	M
 M M M Mr.   r   c            	       `    e Zd ZdZ	 	 	 ddededeeef   defdZde	fdZ
ed	e	dd fd
       Zy)VirtualScrollConfigzConfiguration for virtual scroll handling.
    
    This config enables capturing content from pages with virtualized scrolling
    (like Twitter, Instagram feeds) where DOM elements are recycled as user scrolls.
    container_selectorscroll_count	scroll_bywait_after_scrollc                 <    || _         || _        || _        || _        y)a  
        Initialize virtual scroll configuration.
        
        Args:
            container_selector: CSS selector for the scrollable container
            scroll_count: Maximum number of scrolls to perform
            scroll_by: Amount to scroll - can be:
                - "container_height": scroll by container's height
                - "page_height": scroll by viewport height  
                - int: fixed pixel amount
            wait_after_scroll: Seconds to wait after each scroll for content to load
        Nr  r  r  r  )r:   r  r  r  r  s        r/   rI   zVirtualScrollConfig.__init__
  s#    & #5("!2r.   rW   c                 `    | j                   | j                  | j                  | j                  dS )z(Convert to dictionary for serialization.r  r  ru   s    r/   rv   zVirtualScrollConfig.to_dict"  s0     #'"9"9 --!%!7!7	
 	
r.   rV   c                      | di |S )z Create instance from dictionary.r-   r-   )r`   rV   s     r/   rr   zVirtualScrollConfig.from_dict+  s     {T{r.   N)r  container_heightg      ?)r(   r)   r*   r  r<   r=   r"   r>   rI   r7   rv   classmethodrr   r-   r.   r/   r  r    st     %7#&33 3 c?	3
 !30
 
 T &;  r.   r  c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 ddededeee      deee      deded	ed
ee   dee	   defdZ
edeeef   dd fd       Zdeeef   fdZddZy)LinkPreviewConfigz3Configuration for link head extraction and scoring.Ninclude_internalinclude_externalinclude_patternsexclude_patternsconcurrencyr  	max_linksqueryscore_thresholdr   c                 X   || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        |dk  rt        d      |dk  rt        d      |dk  rt        d      |	"d|	cxk  rdk  st        d       t        d      |s|st        d	      yy)
aV  
        Initialize link extraction configuration.
        
        Args:
            include_internal: Whether to include same-domain links
            include_external: Whether to include different-domain links  
            include_patterns: List of glob patterns to include (e.g., ["*/docs/*", "*/api/*"])
            exclude_patterns: List of glob patterns to exclude (e.g., ["*/login*", "*/admin*"])
            concurrency: Number of links to process simultaneously
            timeout: Timeout in seconds for each link's head extraction
            max_links: Maximum number of links to process (prevents overload)
            query: Query string for BM25 contextual scoring (optional)
            score_threshold: Minimum relevance score to include links (0.0-1.0, optional)
            verbose: Show detailed progress during extraction
        r   zconcurrency must be positiveztimeout must be positivezmax_links must be positiveNrp   g      ?z+score_threshold must be between 0.0 and 1.0zAAt least one of include_internal or include_external must be True)r#  r$  r%  r&  r'  r  r(  r)  r*  r   r   )r:   r#  r$  r%  r&  r'  r  r(  r)  r*  r   s              r/   rI   zLinkPreviewConfig.__init__3  s    8 !1 0 0 0&"
. !;<<a<788>9::&0M#0MJKK 1NJKK(8`aa )9r.   r{   rW   c                 j   | syt        | j                  dd      | j                  dd      | j                  d      | j                  d      | j                  dd	      | j                  d
d      | j                  dd      | j                  d      | j                  d      | j                  dd      
      S )zFCreate LinkPreviewConfig from dictionary (for backward compatibility).Nr#  Tr$  Fr%  r&  r'  r  r     r(  d   r)  r*  r   
r#  r$  r%  r&  r'  r  r(  r)  r*  r   )r"  rq   )r{   s    r/   rr   zLinkPreviewConfig.from_dictf  s      (__-?F(__-?G(__-?@(__-?@#r:OOIq1!ook37//'*'OO,=>OOIu5
 	
r.   c                     | j                   | j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  d
S )zConvert to dictionary format.r/  r/  ru   s    r/   rv   zLinkPreviewConfig.to_dicty  s`     !% 5 5 $ 5 5 $ 5 5 $ 5 5++||ZZ#33||
 	
r.   c                 n    | j                         }|j                  |       t        j                  |      S )z"Create a copy with updated values.)rv   rx   r"  rr   ry   s      r/   r|   zLinkPreviewConfig.clone  s,    lln6" **;77r.   )
TFNNr  r-  r.  NNF)rW   r"  )r(   r)   r*   r  r?   r!   r    r<   r=   r>   rI   r}   r   r   rr   rv   r|   r-   r.   r/   r"  r"  0  s    = "&!&0404#+/1b1b 1b #49-	1b
 #49-1b 1b 1b 1b }1b "%1b 1bf 
tCH~ 
2E 
 
$
c3h 
8r.   r"  c                   @   e Zd ZU dZdZeed<   dZee	eef      ed<   dZ
ee	eef      ed<   dZee	eef      ed<   dZeed	<   dZeed
<   	 	 	 	 	 	 ddedee	eef      dee	eef      dee	eef      d	ed
efdZededd fd       Zd Zd ZdefdZededd fd       Zy)HTTPCrawlerConfigz#HTTP-specific crawler configurationGETmethodNr   rV   r  Tfollow_redirects
verify_sslc                 X    || _         || _        || _        || _        || _        || _        y r   r5  r   rV   r  r6  r7  )r:   r5  r   rV   r  r6  r7  s          r/   rI   zHTTPCrawlerConfig.__init__  s/     		 0$r.   rz   rW   c                     t        | j                  dd      | j                  d      | j                  d      | j                  d      | j                  dd      | j                  dd      	      S )
Nr5  r4  r   rV   r  r6  Tr7  r9  )r3  rq   r   s    r/   r   zHTTPCrawlerConfig.from_kwargs  s^     ::h.JJy)F#F##ZZ(:DAzz,5
 	
r.   c                     | j                   | j                  | j                  | j                  | j                  | j
                  dS )Nr9  r9  ru   s    r/   rv   zHTTPCrawlerConfig.to_dict  s8    kk||IIII $ 5 5//
 	
r.   c                 n    | j                         }|j                  |       t        j                  |      S )zCreate a copy of this configuration with updated values.

        Args:
            **kwargs: Key-value pairs of configuration options to update

        Returns:
            HTTPCrawlerConfig: A new instance with the specified updates
        )rv   rx   r3  r   ry   s      r/   r|   zHTTPCrawlerConfig.clone  s.     lln6" ,,[99r.   c                     t        |       S r   r   ru   s    r/   r   zHTTPCrawlerConfig.dump  s    #D))r.   c                 f    t        |       }t        |t              r|S t        j                  |      S r   )rZ   r;   r3  r   r   s     r/   r   zHTTPCrawlerConfig.load  s-    '-f/0M ,,V44r.   )r4  NNNTT)r(   r)   r*   r  r5  r<   __annotations__r   r!   r   rV   r   r  r6  r?   r7  rI   r}   r7   r   rv   r|   r   r   r-   r.   r/   r3  r3    s?   -FC(,GXd38n%,%)D(4S>
")%)D(4S>
")!d!J ,0)-)-!%%% $sCx.)% tCH~&	%
 tCH~&% % %  
D 
%8 
 

:*d * 54 5/ 5 5r.   r3  c                       e Zd ZdZdddddZed e        e       dddddddddd	ddddddddddej                  dddddddd
de
ddddddddddddddddddddddeddeeddddddddddddddddddddddddi ddddej                  df]dededededededee   dedededededed ed!ed"eeedf   d#ee   d$ee   d%ee   d&ed'ee   d(ee   d)ee   d*ed+ed,ed-ed.ed/ed0ed1ed2ed3ed4ed5ed6ed7ed8ed9ed:ed;ed<ed=eeee   f   d>eeee   f   d?ed@edAedBedCee   dDedEedFedGedHedIedJedKedLedMedNedOedPedQedRe dSedTedUedVedWedXedYedZed[ed\ed]ed^ed_ed`edaedbedceddedeedfedgedhediedjee!   dkee"e#ee$f   f   dlee%e#ee$f   f   dmee&   dnedoe#ee$f   fdpZ'dq Z(ddedrefdsZ)dt Z* fduZ+e,dvedrd fdw       Z-drefdxZ.e,dyedrd fdz       Z/d{ Z0d| Z1 xZ2S )}CrawlerRunConfiga>  
    Configuration class for controlling how the crawler runs each crawl operation.
    This includes parameters for content extraction, page manipulation, waiting conditions,
    caching, and other runtime behaviors.

    This centralizes parameters that were previously scattered as kwargs to `arun()` and related methods.
    By using this class, you have a single place to understand and adjust the crawling options.

    Attributes:
        # Deep Crawl Parameters
        deep_crawl_strategy (DeepCrawlStrategy or None): Strategy to use for deep crawling.

        # Content Processing Parameters
        word_count_threshold (int): Minimum word count threshold before processing content.
                                    Default: MIN_WORD_THRESHOLD (typically 200).
        extraction_strategy (ExtractionStrategy or None): Strategy to extract structured data from crawled pages.
                                                          Default: None (NoExtractionStrategy is used if None).
        chunking_strategy (ChunkingStrategy): Strategy to chunk content before extraction.
                                              Default: RegexChunking().
        markdown_generator (MarkdownGenerationStrategy): Strategy for generating markdown.
                                                         Default: None.
        only_text (bool): If True, attempt to extract text-only content where applicable.
                          Default: False.
        css_selector (str or None): CSS selector to extract a specific portion of the page.
                                    Default: None.
        
        target_elements (list of str or None): List of CSS selectors for specific elements for Markdown generation 
                                                and structured data extraction. When you set this, only the contents 
                                                of these elements are processed for extraction and Markdown generation. 
                                                If you do not set any value, the entire page is processed. 
                                                The difference between this and css_selector is that this will shrink 
                                                the initial raw HTML to the selected element, while this will only affect 
                                                the extraction and Markdown generation.
                                    Default: None
        excluded_tags (list of str or None): List of HTML tags to exclude from processing.
                                             Default: None.
        excluded_selector (str or None): CSS selector to exclude from processing.
                                         Default: None.
        keep_data_attributes (bool): If True, retain `data-*` attributes while removing unwanted attributes.
                                     Default: False.
        keep_attrs (list of str): List of HTML attributes to keep during processing.
                                      Default: [].
        remove_forms (bool): If True, remove all `<form>` elements from the HTML.
                             Default: False.
        prettiify (bool): If True, apply `fast_format_html` to produce prettified HTML output.
                          Default: False.
        parser_type (str): Type of parser to use for HTML parsing.
                           Default: "lxml".
        scraping_strategy (ContentScrapingStrategy): Scraping strategy to use.
                           Default: LXMLWebScrapingStrategy.
        proxy_config (ProxyConfig or dict or None): Detailed proxy configuration, e.g. {"server": "...", "username": "..."}.
                                     If None, no additional proxy config. Default: None.

        # Sticky Proxy Session Parameters
        proxy_session_id (str or None): When set, maintains the same proxy for all requests sharing this session ID.
                                        The proxy is acquired on first request and reused for subsequent requests.
                                        Session expires when explicitly released or crawler context is closed.
                                        Default: None.
        proxy_session_ttl (int or None): Time-to-live for sticky session in seconds.
                                         After TTL expires, a new proxy is acquired on next request.
                                         Default: None (session lasts until explicitly released or crawler closes).
        proxy_session_auto_release (bool): If True, automatically release the proxy session after a batch operation.
                                           Useful for arun_many() to clean up sessions automatically.
                                           Default: False.

        # Browser Location and Identity Parameters
        locale (str or None): Locale to use for the browser context (e.g., "en-US").
                             Default: None.
        timezone_id (str or None): Timezone identifier to use for the browser context (e.g., "America/New_York").
                                  Default: None.
        geolocation (GeolocationConfig or None): Geolocation configuration for the browser.
                                                Default: None.

        # SSL Parameters
        fetch_ssl_certificate: bool = False,
        # Caching Parameters
        cache_mode (CacheMode or None): Defines how caching is handled.
                                        If None, defaults to CacheMode.ENABLED internally.
                                        Default: CacheMode.BYPASS.
        session_id (str or None): Optional session ID to persist the browser context and the created
                                  page instance. If the ID already exists, the crawler does not
                                  create a new page and uses the current page to preserve the state.
        bypass_cache (bool): Legacy parameter, if True acts like CacheMode.BYPASS.
                             Default: False.
        disable_cache (bool): Legacy parameter, if True acts like CacheMode.DISABLED.
                              Default: False.
        no_cache_read (bool): Legacy parameter, if True acts like CacheMode.WRITE_ONLY.
                              Default: False.
        no_cache_write (bool): Legacy parameter, if True acts like CacheMode.READ_ONLY.
                               Default: False.
        shared_data (dict or None): Shared data to be passed between hooks.
                                     Default: None.

        # Cache Validation Parameters (Smart Cache)
        check_cache_freshness (bool): If True, validates cached content freshness using HTTP
                                      conditional requests (ETag/Last-Modified) and head fingerprinting
                                      before returning cached results. Avoids full browser crawls when
                                      content hasn't changed. Only applies when cache_mode allows reads.
                                      Default: False.
        cache_validation_timeout (float): Timeout in seconds for cache validation HTTP requests.
                                          Default: 10.0.

        # Page Navigation and Timing Parameters
        wait_until (str): The condition to wait for when navigating, e.g. "domcontentloaded".
                          Default: "domcontentloaded".
        page_timeout (int): Timeout in ms for page operations like navigation.
                            Default: 60000 (60 seconds).
        wait_for (str or None): A CSS selector or JS condition to wait for before extracting content.
                                Default: None.
        wait_for_timeout (int or None): Specific timeout in ms for the wait_for condition.
                                       If None, uses page_timeout instead.
                                       Default: None.
        wait_for_images (bool): If True, wait for images to load before extracting content.
                                Default: False.
        delay_before_return_html (float): Delay in seconds before retrieving final HTML.
                                          Default: 0.1.
        mean_delay (float): Mean base delay between requests when calling arun_many.
                            Default: 0.1.
        max_range (float): Max random additional delay range for requests in arun_many.
                           Default: 0.3.
        semaphore_count (int): Number of concurrent operations allowed.
                               Default: 5.

        # Page Interaction Parameters
        js_code (str or list of str or None): JavaScript code/snippets to run on the page.
                                              Default: None.
        js_only (bool): If True, indicates subsequent calls are JS-driven updates, not full page loads.
                        Default: False.
        ignore_body_visibility (bool): If True, ignore whether the body is visible before proceeding.
                                       Default: True.
        scan_full_page (bool): If True, scroll through the entire page to load all content.
                               Default: False.
        scroll_delay (float): Delay in seconds between scroll steps if scan_full_page is True.
                              Default: 0.2.
        max_scroll_steps (Optional[int]): Maximum number of scroll steps to perform during full page scan.
                                         If None, scrolls until the entire page is loaded. Default: None.
        process_iframes (bool): If True, attempts to process and inline iframe content.
                                Default: False.
        remove_overlay_elements (bool): If True, remove overlays/popups before extracting HTML.
                                        Default: False.
        simulate_user (bool): If True, simulate user interactions (mouse moves, clicks) for anti-bot measures.
                              Default: False.
        override_navigator (bool): If True, overrides navigator properties for more human-like behavior.
                                   Default: False.
        magic (bool): If True, attempts automatic handling of overlays/popups.
                      Default: False.
        adjust_viewport_to_content (bool): If True, adjust viewport according to the page content dimensions.
                                           Default: False.

        # Media Handling Parameters
        screenshot (bool): Whether to take a screenshot after crawling.
                           Default: False.
        screenshot_wait_for (float or None): Additional wait time before taking a screenshot.
                                             Default: None.
        screenshot_height_threshold (int): Threshold for page height to decide screenshot strategy.
                                           Default: SCREENSHOT_HEIGHT_TRESHOLD (from config, e.g. 20000).
        pdf (bool): Whether to generate a PDF of the page.
                    Default: False.
        image_description_min_word_threshold (int): Minimum words for image description extraction.
                                                    Default: IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD (e.g., 50).
        image_score_threshold (int): Minimum score threshold for processing an image.
                                     Default: IMAGE_SCORE_THRESHOLD (e.g., 3).
        exclude_external_images (bool): If True, exclude all external images from processing.
                                         Default: False.
        table_score_threshold (int): Minimum score threshold for processing a table.
                                     Default: 7.
        table_extraction (TableExtractionStrategy): Strategy to use for table extraction.
                                     Default: DefaultTableExtraction with table_score_threshold.

        # Virtual Scroll Parameters
        virtual_scroll_config (VirtualScrollConfig or dict or None): Configuration for handling virtual scroll containers.
                                                                     Used for capturing content from pages with virtualized 
                                                                     scrolling (e.g., Twitter, Instagram feeds).
                                                                     Default: None.

        # Link and Domain Handling Parameters
        exclude_social_media_domains (list of str): List of domains to exclude for social media links.
                                                    Default: SOCIAL_MEDIA_DOMAINS (from config).
        exclude_external_links (bool): If True, exclude all external links from the results.
                                       Default: False.
        exclude_internal_links (bool): If True, exclude internal links from the results.
                                       Default: False.
        exclude_social_media_links (bool): If True, exclude links pointing to social media domains.
                                           Default: False.
        exclude_domains (list of str): List of specific domains to exclude from results.
                                       Default: [].
        exclude_internal_links (bool): If True, exclude internal links from the results.
                                       Default: False.
        score_links (bool): If True, calculate intrinsic quality scores for all links using URL structure,
                           text quality, and contextual relevance metrics. Separate from link_preview_config.
                           Default: False.

        # Debugging and Logging Parameters
        verbose (bool): Enable verbose logging.
                        Default: True.
        log_console (bool): If True, log console messages from the page.
                            Default: False.

        # HTTP Crwler Strategy Parameters
        method (str): HTTP method to use for the request, when using AsyncHTTPCrwalerStrategy.
                        Default: "GET".
        data (dict): Data to send in the request body, when using AsyncHTTPCrwalerStrategy.
                        Default: None.
        json (dict): JSON data to send in the request body, when using AsyncHTTPCrwalerStrategy.

        # Connection Parameters
        stream (bool): If True, enables streaming of crawled URLs as they are processed when used with arun_many.
                      Default: False.
        process_in_browser (bool): If True, forces raw:/file:// URLs to be processed through the browser
                                   pipeline (enabling js_code, wait_for, scrolling, etc.). When False (default),
                                   raw:/file:// URLs use a fast path that returns HTML directly without browser
                                   interaction. This is automatically enabled when browser-requiring parameters
                                   are detected (js_code, wait_for, screenshot, pdf, etc.).
                                   Default: False.

        check_robots_txt (bool): Whether to check robots.txt rules before crawling. Default: False
                                 Default: False.
        user_agent (str): Custom User-Agent string to use.
                          Default: None.
        user_agent_mode (str or None): Mode for generating the user agent (e.g., "random"). If None, use the provided user_agent as-is.
                                       Default: None.
        user_agent_generator_config (dict or None): Configuration for user agent generation if user_agent_mode is set.
                                                    Default: None.

        # Experimental Parameters
        experimental (dict): Dictionary containing experimental parameters that are in beta phase.
                            This allows passing temporary features that are not yet fully integrated 
                            into the main parameter set.
                            Default: None.

        url: str = None  # This is not a compulsory parameter
    z*Instead, use cache_mode=CacheMode.DISABLEDz(Instead, use cache_mode=CacheMode.BYPASSz,Instead, use cache_mode=CacheMode.WRITE_ONLYz+Instead, use cache_mode=CacheMode.READ_ONLY)disable_cachebypass_cacheno_cache_readno_cache_writeNFlxmlg      $@domcontentloaded皙?333333?r-  T皙?   r4  word_count_thresholdextraction_strategychunking_strategymarkdown_generator	only_textcss_selectortarget_elementsexcluded_tagsexcluded_selectorkeep_data_attributes
keep_attrsremove_forms	prettiifyparser_typescraping_strategyr   proxy_rotation_strategyproxy_session_idproxy_session_ttlproxy_session_auto_releaselocaletimezone_idgeolocationfetch_ssl_certificate
cache_mode
session_idrC  rB  rD  rE  shared_datacheck_cache_freshnesscache_validation_timeout
wait_untilpage_timeoutwait_forwait_for_timeoutwait_for_imagesdelay_before_return_html
mean_delay	max_rangesemaphore_countjs_code
c4a_scriptjs_onlyignore_body_visibilityscan_full_pagescroll_delaymax_scroll_stepsprocess_iframesremove_overlay_elementssimulate_useroverride_navigatormagicadjust_viewport_to_content
screenshotscreenshot_wait_forscreenshot_height_thresholdpdfcapture_mhtml$image_description_min_word_thresholdimage_score_thresholdtable_score_thresholdtable_extractionexclude_external_imagesexclude_all_imagesexclude_social_media_domainsexclude_external_linksexclude_social_media_linksexclude_domainsexclude_internal_linksscore_links!preserve_https_for_internal_linksr   log_consolecapture_network_requestscapture_console_messagesr5  streamprefetchprocess_in_browserr  base_urlcheck_robots_txtr   r   r   deep_crawl_strategylink_preview_configvirtual_scroll_configurl_matcher
match_modeexperimentalc^                    R| _         S| _        || _        || _        || _        || _        || _        || _        |xs g | _        |xs g | _	        |	xs d| _
        |
| _        |xs g | _        || _        || _        || _        |xs
 t!               | _        || _        t'        |t(              rt*        j-                  |      | _        t'        |t.              rt*        j1                  |      | _        || _        || _        || _        || _        || _        || _        || _        || _         || _!        || _"        || _#        || _$        || _%        || _&        || _'        | | _(        |!| _)        |"| _*        |#| _+        |$| _,        |%| _-        |&| _.        |'| _/        |(| _0        |)| _1        |*| _2        |+| _3        |,| _4        |-| _5        |.| _6        |/| _7        |0| _8        |1| _9        |2| _:        |3| _;        |4| _<        |5| _=        |6| _>        |7| _?        |8| _@        |9| _A        |:| _B        |;| _C        |<| _D        |=| _E        |>| _F        A| _G        B| _H        |?| _I        @t        |?      | _K        n@| _K        Cxs t        | _M        D| _N        E| _O        Fxs g | _P        G| _Q        H| _R        I| _S        J| _T        K| _U        L| _V        M| _W        O| _X        P| _Y        Q| _Z        N| _[        T| _\        U| _]        V| _^        W| __        | j                  %t'        | j                  t              st        d      | j                  %t'        | j                  t              st        d      | j                  t               | _        X| _d        Yd | _e        nNt'        Yt              rY| _e        n6t'        Yt(              rt        j-                  Y      | _e        nt        d      Zd | _g        nNt'        Zt              rZ| _g        n6t'        Zt(              rt        j-                  Z      | _g        nt        d      [| _i        \| _j        ]xs i | _k        | jh                  r| jf                  s| j                          y y y )Nr   )r  z=extraction_strategy must be an instance of ExtractionStrategyz9chunking_strategy must be an instance of ChunkingStrategyz<link_preview_config must be LinkPreviewConfig object or dictz@virtual_scroll_config must be VirtualScrollConfig object or dict)mr  r  rL  rM  rN  rO  rP  rQ  rR  rS  rT  rU  rV  rW  rX  rY  r   rZ  r   r;   r7   r   rr   r<   r   r[  r\  r]  r^  r_  r`  ra  rb  rc  rd  rC  rB  rD  rE  re  rf  rg  rh  ri  rj  rk  rl  rm  rn  ro  rp  rq  rr  rs  rt  ru  rv  rw  rx  ry  rz  r{  r|  r}  r~  r  r  r  r  r  r  r  r  r  r   r  r   r  r  r  r  r  r  r  r   r  r  r  r  r  r  r5  r  r   r   r   r   r   r   r   r  r  r"  r  r  r  r  r  _compile_c4a_script)^r:   rL  rM  rN  rO  rP  rQ  rR  rS  rT  rU  rV  rW  rX  rY  rZ  r   r[  r\  r]  r^  r_  r`  ra  rb  rc  rd  rC  rB  rD  rE  re  rf  rg  rh  ri  rj  rk  rl  rm  rn  ro  rp  rq  rr  rs  rt  ru  rv  rw  rx  ry  rz  r{  r|  r}  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r5  r  r  r  r  r  r  r   r   r   r  r  r  r  r  r  s^                                                                                                 r/   rI   zCrawlerRunConfig.__init__  s   f   %9!#6 !2"4"(.4"*0b!2!8b$8!$*("&!2!O6M6O(lD) + 5 5l CDlC( + 7 7 ED'>$ !1!2*D' && &;" %$(**,&%:"(@% %(  0.(@%$". $&<#,( 0.'>$*"4
*D' %#6 +F(*4X1%:"'>$"4%:" #$:Qf$gD!$4D! )@,@ 	) '=#*D'.4"&<#&1R. & )A%(@%  "4 !1 %.+F( ##/
$$&89
 O  !!-j""$47
 K 
 !!)%2_D" $7  &'+D$+->?':D$+T2'8'B'BCV'WD$[\\ !()-D&-/BC)>D&-t4)<)F)FG\)]D&_`` '$ ).B ??4<<$$& $0?r.   c                    	 	 ddl m} t	        | j
                  t              r| j
                  g}n| j
                  }g }t        |      D ]  \  }} ||      }|j                  r|j                  |j                         6|j                  }d|dz    d|j                   d|j                   d|j                   d|j                   
}|j                   r|d	|j                   d   j                   z  }t#        |       || _
        y# t        $ r
 ddlm} Y w xY w# t        $ r t#        d
      t$        $ r8}	dt        |	      j'                         vrt#        dt        |	              d}	~	ww xY w)z Compile C4A script to JavaScriptr   )compiler   z%C4A Script compilation error (script z
):
  Line z	, Column z: z	
  Code: z
  Suggestion: z^C4A script compiler not available. Please ensure crawl4ai.script module is properly installed.zcompilation errorzFailed to compile C4A script: N)scriptr  r]   crawl4ai.scriptr;   rr  r<   	enumeratesuccessextendrq  first_errorlinecolumnmessagesource_linesuggestionsr   r   lower)
r:   r  scriptscompiled_jsir  r   error	error_msgr   s
             r/   r  z$CrawlerRunConfig._compile_c4a_script  s   ,	4+
 $//3/??+// K&w/ 0	6 >>&&v~~6 #..E?!u E""'**Yu||nBu}}o V##(#4#4"57 
 ((!'78I8I!8L8T8T7U%VV	$Y//!0$ 'DL;  4334>  	N   	"#a&,,.8 #A#a&!JKK		s5   D C=D DD DD E193E,,E1rW   c                 ^   | j                   yt        | j                         r| j                  |      S t        | j                   t              rddlm}  ||| j                         S t        | j                   t
              r| j                   syg }| j                   D ]T  }t        |      r|j                   ||             &t        |t              rddlm} |j                   |||             UV | j                  t        j                  k(  r|rt        |      S dS |rt        |      S dS y)a  Check if this config matches the given URL.
        
        Args:
            url: The URL to check against this config's matcher
            
        Returns:
            bool: True if this config should be used for the URL or if no matcher is set.
        Tr   )fnmatchF)r  callabler;   r<   r  rA   r   r  r%   r+   anyall)r:   r  r  resultsmatchers        r/   is_matchzCrawlerRunConfig.is_match!  s    #D$$%##C((((#.'3 0 011(($/##G++ G$NN73<0-/NN73#89  ),,.'.s7|9E9'.s7|9E9r.   c                     || j                   v rt        d| d| j                   |          t        d| j                  j                   d| d      )zHandle attribute access.z	Getting '' is deprecated. 'z' has no attribute ')_UNWANTED_PROPSr^   r9   r(   )r:   rT   s     r/   __getattr__zCrawlerRunConfig.__getattr__O  s]    4''' 9TF2CDDXDXY]D^C_!`aaq!8!8 99MdVSTUVVr.   c                     t        j                  | j                        }|j                  }|| j                  v r/|||   j
                  urt        d| d| j                  |          t        | !  ||       y)zHandle attribute setting.z	Setting 'r  N)	rG   rH   rI   rJ   r  rL   r^   super__setattr__)r:   rT   r8   rR   
all_paramsr9   s        r/   r  zCrawlerRunConfig.__setattr__U  sw     .^^
4'''ED9I9Q9Q,Q 9TF2CDDXDXY]D^C_!`aaD%(r.   rz   c           
         t        dgi d| j                  dd      d| j                  d      d| j                  dt                     d| j                  d      d| j                  dd      d| j                  d      d	| j                  d	g       d
| j                  d
g       d| j                  dd      d| j                  dd      d| j                  dg       d| j                  dd      d| j                  dd      d| j                  dd      d| j                  d      d| j                  d      d| j                  d      d| j                  d      d| j                  d      d| j                  dd      d| j                  dd       d| j                  dd       d| j                  dd       d| j                  dd      d| j                  dt        j                        d| j                  d      d| j                  dd      d | j                  d d      d!| j                  d!d      d"| j                  d"d      d#| j                  d#d       d$| j                  d$d%      d&| j                  d&d'      d(| j                  d(      d)| j                  d)      d*| j                  d*d      d+| j                  d+d,      d-| j                  d-d,      d.| j                  d.d/      d0| j                  d0d1      d2| j                  d2      d3| j                  d3d      d4| j                  d4d5      d6| j                  d6d      d7| j                  d7d8      d9| j                  d9      d:| j                  d:d      d;| j                  d;d      d<| j                  d<d      d=| j                  d=d      d>| j                  d>d      d?| j                  d?d      d@| j                  d@d      dA| j                  dA      dB| j                  dBt
              dC| j                  dCd      dD| j                  dDd      dE| j                  dEt              dF| j                  dFt              dG| j                  dGdH      dI| j                  dId       dJ| j                  dJd      dK| j                  dKd      dL| j                  dLt              dM| j                  dMd      dN| j                  dNd      dO| j                  dOg       dP| j                  dPd      dQ| j                  dQd      dR| j                  dRd      dS| j                  dSd5      dT| j                  dTd      dU| j                  dUd      dV| j                  dVd      dW| j                  dWdX      dY| j                  dYd      dZ| j                  dZd      d[| j                  d[d      d\| j                  d\d      d]| j                  d]      d^| j                  d^      d_| j                  d_i       d`| j                  d`      da| j                  da      db| j                  db      dc| j                  dc      dd| j                  dd      de| j                  det        j                        df| j                  df      S )hNrL     rM  rN  rO  rP  FrQ  rR  rS  rT  r   rU  rV  rW  rX  rY  rF  rZ  r   r[  r\  r]  r^  r_  r`  ra  rb  rc  rd  rC  rB  rD  rE  re  rh  rG  ri  i`  rj  rk  rl  rm  rH  rn  ro  rI  rp  r-  rq  rs  rt  Tru  rv  rJ  rw  rx  ry  rz  r{  r|  r}  r~  r  r  r  r  r  r  r  rK  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r5  r4  r  r  r  r  r   r   r   r  r  r  r  r  r  r  r-   )rA  rq   r   r   BYPASSr
   r   r   r   r%   r+   r   s    r/   r   zCrawlerRunConfig.from_kwargs`  sT    s
!',BC!Hs
 !'

+@ As
 %jj)<moN	s

  &zz*>?s
 jje4s
  N3s
 #JJ'8"=s
 !**_b9s
 %jj)<bAs
 "(,BE!Js
 zz,3s
  NE:s
 jje4s
 

=&9s
  %jj)<=!s
"  N3#s
$ %+JJ/H$I%s
( $ZZ(:;)s
* %jj)<=+s
, (.zz2NPU'V-s
0 ::h-1s
2 

=$73s
4 

=$75s
8 #)**-De"L9s
< zz,	0@0@A=s
> zz,/?s
@  NE:As
B !**_e<Cs
D !**_e<Es
F "::&6>Gs
H 

=$7Is
L zz,0BCMs
N  NE:Os
P ZZ
+Qs
R $ZZ(:;Ss
T #JJ'8%@Us
V &,ZZ0JC%PWs
X zz,4Ys
Z jjc2[s
\ #JJ'8!<]s
` JJy)as
b JJy%0cs
d $*::.F#Mes
f "::&6>gs
h  NC8is
j $ZZ(:;ks
l #JJ'8%@ms
n %+JJ/H%$Pos
p !**_e<qs
r  &zz*>Fss
t **We,us
v (.zz2NPU'Vws
z zz,6{s
| !'

+@ A}s
~ )/

-/I)s
D 

5%(Es
F !**_e<Gs
H 28642Is
P #)**')>#Qs
V #)**-Da"HWs
X $ZZ(:DAYs
Z  &zz*>F[s
\ %+JJ/H%$P]s
` *0.0D*as
f $*::.F#Ngs
h (.zz2NPU'Vis
j #JJ'8"=ks
l $*::.F#Nms
n 

=%8os
p /5jj9\^c.dqs
t JJy$/us
v 

=%8ws
z &,ZZ0JE%R{s
| &,ZZ0JE%R}s
@ ::h.As
B ::h.Cs
D ZZ
E2Es
F  &zz*>FGs
H $ZZ(:EBIs
J zz,/Ks
L #JJ'89Ms
N )/

3PRT(UOs
R !'

+@ ASs
V !'

+@ AWs
X 

5!Ys
Z ZZ
+[s
^ 

=1_s
` zz,	=as
d  N3es
 s	
r.   c                     t        |       S r   r   ru   s    r/   r   zCrawlerRunConfig.dump  r   r.   rV   c                 f    t        |       }t        |t              r|S t        j                  |      S r   )rZ   r;   rA  r   r   s     r/   r   zCrawlerRunConfig.load  s/     (-f./M++F33r.   c                 8	   i d| j                   d| j                  d| j                  d| j                  d| j                  d| j
                  d| j                  d| j                  d	| j                  d
| j                  d| j                  d| j                  d| j                  d| j                  d| j                  d| j                  d| j                   i d| j"                  d| j$                  d| j&                  d| j(                  d| j*                  d| j,                  d| j.                  d| j0                  d| j2                  d| j4                  d| j6                  d| j8                  d| j:                  d| j<                  d | j>                  d!| j@                  d"| jB                  i d#| jD                  d$| jF                  d%| jH                  d&| jJ                  d'| jL                  d(| jN                  d)| jP                  d*| jR                  d+| jT                  d,| jV                  d-| jX                  d.| jZ                  d/| j\                  d0| j^                  d1| j`                  d2| jb                  d3| jd                  i d4| jf                  d5| jh                  d6| jj                  d7| jl                  d8| jn                  d9| jp                  d:| jr                  d;| jt                  d<| jv                  d=| jx                  d>| jz                  d?| j|                  d@| j~                  dA| j                  dB| j                  dC| j                  dD| j                  i dE| j                  dF| j                  dG| j                  dH| j                  dI| j                  dJ| j                  dK| j                  dL| j                  dM| j                  dN| j                  dO| j                  dP| j                  dQ| j                  dR| j                  dS| j                  dT| j                  r| j                  j                         nd dU| j                  | j                  | j                  | j                  dVS )WNrL  rM  rN  rO  rP  rQ  rR  rS  rT  rU  rV  rW  rX  rY  rZ  r   r[  r\  r]  r^  r_  r`  ra  rb  rc  rd  rC  rB  rD  rE  re  rh  ri  rj  rk  rl  rm  rn  ro  rp  rq  rs  rt  ru  rv  rw  rx  ry  rz  r{  r|  r}  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r5  r  r  r  r  r   r   r   r  r  r  )r  r  r  )YrL  rM  rN  rO  rP  rQ  rR  rS  rT  rU  rV  rW  rX  rY  rZ  r   r[  r\  r]  r^  r_  r`  ra  rb  rc  rd  rC  rB  rD  rE  re  rh  ri  rj  rk  rl  rm  rn  ro  rp  rq  rs  rt  ru  rv  rw  rx  ry  rz  r{  r|  r}  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r5  r  r  r  r  r   r   r   r  r  rv   r  r  r  r  ru   s    r/   rv   zCrawlerRunConfig.to_dict  s   Y
"D$=$=Y
!4#;#;Y
  !7!7Y
 !$"9"9	Y

 Y
 D--Y
 t33Y
 T//Y
  !7!7Y
 #D$=$=Y
 $//Y
 D--Y
 Y
 4++Y
  !7!7Y
  D--!Y
" &t'C'C#Y
$  5 5%Y
&  !7!7'Y
( )$*I*I)Y
* dkk+Y
, 4++-Y
. 4++/Y
0 $T%?%?1Y
2 $//3Y
4 $//5Y
6 D--7Y
8 T//9Y
: T//;Y
< d11=Y
> 4++?Y
@ $//AY
B D--CY
D EY
F  5 5GY
H t33IY
J '(E(EKY
L $//MY
N OY
P t33QY
R t||SY
T t||UY
V %d&A&AWY
X d11YY
Z D--[Y
\  5 5]Y
^ t33_Y
` &t'C'CaY
b T//cY
d !$"9"9eY
f TZZgY
h )$*I*IiY
j $//kY
l "4#;#;mY
n *4+K+KoY
p 488qY
r T//sY
t 3D4]4]uY
v $T%?%?wY
x $T%?%?yY
z  5 5{Y
| !$"9"9}Y
~ &t'C'CY
@ +D,M,MAY
B %d&A&ACY
D )$*I*IEY
F t33GY
H %d&A&AIY
J 4++KY
L 01W1WMY
N t||OY
P 4++QY
R '(E(ESY
T '(E(EUY
V dkkWY
X dkkYY
Z [Y
\ !$"9"9]Y
^  5 5_Y
` $//aY
b t33cY
d *4+K+KeY
f "4#;#;gY
h "IaIa4#;#;#C#C#EgkiY
j 488kY
l  ++// --qY
 Y	
r.   c                 n    | j                         }|j                  |       t        j                  |      S )ao  Create a copy of this configuration with updated values.

        Args:
            **kwargs: Key-value pairs of configuration options to update

        Returns:
            CrawlerRunConfig: A new instance with the specified updates

        Example:
            ```python
            # Create a new config with streaming enabled
            stream_config = config.clone(stream=True)

            # Create a new config with multiple updates
            new_config = config.clone(
                stream=True,
                cache_mode=CacheMode.BYPASS,
                verbose=True
            )
            ```
        )rv   rx   rA  r   ry   s      r/   r|   zCrawlerRunConfig.clone@  s.    , lln6"++K88r.   )3r(   r)   r*   r  r  r   r   r   r   r  r   r
   r   r   r%   r+   r=   r   r   r   r?   r<   r    rA   r   r"   r   r7   r!   r   rh   r>   r   r   r"  r   r   r  
UrlMatcherrI   r  r  r  r  r}   r   r   r   rv   r|   __classcell__)r9   s   @r/   rA  rA    s   gR GCHH	O %726.;o9Q9S %)"!%%*"!597;CG*.+/+0 $%)37&+ ) 0 0"##$ &+*.,( $ %*- )-,0'+$!*. %(-##(+0 %)+E#4X%:%&48(-#(-1',+0 $',!27!).).#(!&#,.;?HLLP,0 )'+ah' "h' 0	h'
 ,h' 7h' h' h' ch' h' h' #h' h' h' h'  !h'" 3#h'$ Kt34%h'& "**?!@'h'* #3-+h', $C=-h'. %)/h'2 3h'4 c]5h'6 /07h':  $;h'> ?h'@ Ah'B Ch'D Eh'F Gh'H Ih'J Kh'N  $Oh'P #(Qh'T Uh'V Wh'X Yh'Z [h'\ ]h'^ #(_h'` ah'b ch'd eh'h sDI~&ih'j #tCy.)kh'l mh'n !%oh'p qh'r sh't #3-uh'v wh'x "&yh'z {h'| !}h'~ h'@ %)Ah'D Eh'F #Gh'H &)Ih'J Kh'L Mh'N /2Oh'P  #Qh'R  #Sh'T 2Uh'V "&Wh'X !Yh'\ '+]h'^ !%_h'` %)ah'b ch'd !%eh'f gh'h ,0ih'l mh'n oh'r #'sh't #'uh'x yh'z {h'| }h'~ !h'@ Ah'B Ch'D Eh'F Gh'H Ih'J &*Kh'N &&78Oh'R ##4d38n#DESh'V  %%8$sCx.%HIWh'Z j)[h'\ ]h'` 38nah'V	.`+C +D +\W	) t
D t
%7 t
 t
n*d * 44 4. 4 4Z
x9r.   rA  c                       e Zd Zeddddddddddddfdedee   dee   dee   dee   dee   dee   d	ee   d
eee      dee   dee   dee   dee   fdZ	e
dedd fd       Zd Zd Zy)	LLMConfigNprovider	api_tokenr  temperature
max_tokenstop_pfrequency_penaltypresence_penaltystopnbackoff_base_delaybackoff_max_attemptsbackoff_exponential_factorc                 l   | _         |r|j                  d      s|| _        n|r/|j                  d      rt        j                  |dd       | _        nt        j                         }t        fd|D              r0t        fd|D        d      }t        j                  |      | _        n)t        | _         t        j                  t              | _        || _        || _        || _        || _        || _        || _        |	| _        |
| _        ||nd| _        ||nd| _        ||| _        yd| _        y)z4Configuaration class for LLM provider and API token.zenv:r   Nc              3   @   K   | ]  }j                  |        y wr   
startswith.0prefixr  s     r/   	<genexpr>z%LLMConfig.__init__.<locals>.<genexpr>u  s     F68&&v.Fs   c              3   F   K   | ]  }j                  |      s|  y wr   r  r  s     r/   r  z%LLMConfig.__init__.<locals>.<genexpr>w  s     Rh6I6I&6QVRs   !!r      )r  r  r  r   r   r	   keysr  nextrq   r   r   r  r  r  r  r  r  r  r  r  r  r  )r:   r  r  r  r  r  r  r  r  r  r  r  r  r  prefixesselected_prefixs    `              r/   rI   zLLMConfig.__init__[  s   " !Y11&9&DN9//7YYy}5DN 0446HFXFF"&R(R# ":!=!=o!N 0!#+C!D &$
!2 0	8J8V"4\]<P<\$8bc!HbHn*D'tu'r.   rz   rW   c                    t        | j                  dt              | j                  d      | j                  d      | j                  d      | j                  d      | j                  d      | j                  d      | j                  d      | j                  d	      | j                  d
      | j                  d      | j                  d      | j                  d            S )Nr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )r  rq   r   r   s    r/   r   zLLMConfig.from_kwargs  s    ZZ
,<=jj-ZZ
+

=1zz,/**W%$jj)<=#ZZ(:;F#jjo%zz*>?!',B!C'-zz2N'O
 	
r.   c                 &   | j                   | j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  dS )Nr  r  ru   s    r/   rv   zLLMConfig.to_dict  ss    ++//ZZ!%!7!7 $ 5 5II"&"9"9$($=$=*.*I*I
 	
r.   c                 n    | j                         }|j                  |       t        j                  |      S )zCreate a copy of this configuration with updated values.

        Args:
            **kwargs: Key-value pairs of configuration options to update

        Returns:
            llm_config: A new instance with the specified updates
        )rv   rx   r  r   ry   s      r/   r|   zLLMConfig.clone  s.     lln6"$$[11r.   )r(   r)   r*   r   r<   r!   r>   r=   r    rI   r}   r7   r   rv   r|   r-   r.   r/   r  r  Z  s     )#'"&'+$(!%-1,0$(,0.248-v-v C=-v 3-	-v
 e_-v SM-v -v $E?-v #5/-v tCy!-v C=-v %SM-v 'sm-v %-SM-v^ 
D 
[ 
 
"
"2r.   r  c            #           e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddedee   dedededed	ed
edee   dee   dee   dee   dee	   dedededef"dZ
deeef   fdZedeeef   dd fd       Zdedd fdZy)SeedingConfigzV
    Configuration class for URL discovery and pre-validation via AsyncUrlSeeder.
    Nsourcepattern
live_checkextract_headmax_urlsr'  hits_per_secforcebase_directory
llm_configr   r)  r*  scoring_methodfilter_nonsense_urlscache_ttl_hoursvalidate_sitemap_lastmodc                     || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        || _
        || _        || _        || _        || _        || _        || _        y)a'
  
        Initialize URL seeding configuration.
        
        Args:
            source: Discovery source(s) to use. Options: "sitemap", "cc" (Common Crawl), 
                   or "sitemap+cc" (both). Default: "sitemap+cc"
            pattern: URL pattern to filter discovered URLs (e.g., "*example.com/blog/*"). 
                    Supports glob-style wildcards. Default: "*" (all URLs)
            live_check: Whether to perform HEAD requests to verify URL liveness. 
                       Default: False
            extract_head: Whether to fetch and parse <head> section for metadata extraction.
                         Required for BM25 relevance scoring. Default: False
            max_urls: Maximum number of URLs to discover. Use -1 for no limit. 
                     Default: -1
            concurrency: Maximum concurrent requests for live checks/head extraction. 
                        Default: 1000
            hits_per_sec: Rate limit in requests per second to avoid overwhelming servers. 
                         Default: 5
            force: If True, bypasses the AsyncUrlSeeder's internal .jsonl cache and 
                  re-fetches URLs. Default: False
            base_directory: Base directory for UrlSeeder's cache files (.jsonl). 
                           If None, uses default ~/.crawl4ai/. Default: None
            llm_config: LLM configuration for future features (e.g., semantic scoring). 
                       Currently unused. Default: None
            verbose: Override crawler's general verbose setting for seeding operations. 
                    Default: None (inherits from crawler)
            query: Search query for BM25 relevance scoring (e.g., "python tutorials"). 
                  Requires extract_head=True. Default: None
            score_threshold: Minimum relevance score (0.0-1.0) to include URL. 
                           Only applies when query is provided. Default: None
            scoring_method: Scoring algorithm to use. Currently only "bm25" is supported.
                          Future: "semantic". Default: "bm25"
            filter_nonsense_urls: Filter out utility URLs like robots.txt, sitemap.xml,
                                 ads.txt, favicon.ico, etc. Default: True
            cache_ttl_hours: Hours before sitemap cache expires. Set to 0 to disable TTL
                            (only lastmod validation). Default: 24
            validate_sitemap_lastmod: If True, compares sitemap's <lastmod> with cache
                                     timestamp and refetches if sitemap is newer. Default: True
        N)r  r  r  r  r  r'  r  r  r  r  r   r)  r*  r  r  r  r  )r:   r  r  r  r  r  r'  r  r  r  r  r   r)  r*  r  r  r  r  s                     r/   rI   zSeedingConfig.__init__  s    v $( &(
,$
.,$8!.(@%r.   rW   c                 z    | j                   j                         D ci c]  \  }}|dk7  s||| c}}S c c}}w )Nr  )__dict__rF   )r:   rO   rP   s      r/   rv   zSeedingConfig.to_dict  s6    !%!4!4!6]A!|:Kq}1]]]s   77rz   c                     t        di | S )Nr-   )r  r   s    r/   r   zSeedingConfig.from_kwargs  s    &v&&r.   c                 n    | j                         }|j                  |       t        j                  |      S r   )rv   rx   r  r   ry   s      r/   r|   zSeedingConfig.clone  s,    lln6"((55r.   )z
sitemap+cc*FFi  r-  FNNNNNbm25T   T)r(   r)   r*   r  r<   r!   r?   r=   r  r>   rI   r   r   rv   r}   r   r|   r-   r.   r/   r  r    sz   
 #!$ "(,*."&#+/$%)!)-%KAKA #KA 	KA
 KA KA KA KA KA !KA Y'KA $KA }KA "%KA KA  #!KA" #KA$ #'%KA\^c3h ^ 'DcN ' ' '6c 6o 6r.   r  )F)Ar[   r   r   r  r   r   r   r   r   r   r	   r
   r   r   r   user_agent_generatorr   r   rM  r   r   rN  r   r   markdown_generation_strategyr   r   content_scraping_strategyr   r   deep_crawlingr   r  r   r   cache_contextr   proxy_strategyr   rG   typingr   r   r   r    r!   r"   enumr#   r<   r?   r  r%   rD   rZ   rM   rh   r   r   r  r"  r3  rA  r  r  r-   r.   r/   <module>r     s;    	     : J > ^ W , M $ 1  = =  3#-tE#xt?T:T4U/VVW
 Hc H$ HV2 2 2j# $ .8 .8`{2 {2z_ _B+ +Z\8 \8~D5 D5LC9 C9J^2 ^2@\6 \6r.   