
    i)                        d dl Zd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlZej                  j                  d      ZddlmZ ddlmZmZ d d	lmZ dd
lmZmZmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z& ddlm'Z' ddl(m)Z)m*Z*m+Z+m,Z, de-de	ee-e-f      ddfdZ.ddddddddddede	e-   de-de	e-   de	e   de	e
e/ef      de
eee-ef   df   de	ee-e-f      de	e'   dedefd Zdddddddd!dede	e-   de-de	e-   de	e   de	e
eee-ef   f      de	ee-e-f      de	e'   defd"Z0g d#Z1d$ Z2 e3ed%      r ejh                  e2&       yy)'    N)ThreadPoolExecutor)	timedelta)DictOptionalUnionAnylancedb   )connect)URIsanitize_uri)urlparse)AsyncConnectionDBConnectionLanceDBConnection)StorageOptionsProvider)ClientConfig)RemoteDBConnection)vector)
AsyncTableTable)Session)connect_namespaceconnect_namespace_asyncLanceNamespaceDBConnectionAsyncLanceNamespaceDBConnectionuristorage_optionsreturnc                     t        | t              r| j                  d      syt        |       }|j                  }d|vryddh}xr t        fd|D              }|st        d| d      y)	a  
    Check if an S3 URI has a bucket name containing dots and warn if no region
    is specified. S3 buckets with dots cannot use virtual-hosted-style URLs,
    which breaks automatic region detection.

    See: https://github.com/lancedb/lancedb/issues/1898
    zs3://N.region
aws_regionc              3   &   K   | ]  }|v  
 y w)N ).0kr   s     L/mnt/e/genesis-system/.venv/lib/python3.12/site-packages/lancedb/__init__.py	<genexpr>z-_check_s3_bucket_with_dots.<locals>.<genexpr>5   s     (S!o)=(Ss   zS3 bucket name 'a  ' contains dots, which prevents automatic region detection. Please specify the region explicitly via storage_options={'region': '<your-region>'} or storage_options={'aws_region': '<your-region>'}. See https://github.com/lancedb/lancedb/issues/1898 for details.)
isinstancestr
startswithr   netlocany
ValueError)r   r   parsedbucketregion_keys
has_regions    `    r(   _check_s3_bucket_with_dotsr4       s     c3s~~g'>c]F]]F
& \*K SS(S{(S%SJvh 'N O
 	
     z	us-east-1)api_keyr"   host_overrideread_consistency_intervalrequest_thread_poolclient_configr   sessionr6   r"   r7   r8   r9   r:   r;   kwargsc                j   t        | t              rp| j                  d      r_|t        j                  j                  d      }|t        d|        t        |t              rt        |      }t        | |||f|||d|	S t        t        |       |       |	rt        d|	       t        | |||      S )a
  Connect to a LanceDB database.

    Parameters
    ----------
    uri: str or Path
        The uri of the database.
    api_key: str, optional
        If presented, connect to LanceDB cloud.
        Otherwise, connect to a database on file system or cloud storage.
        Can be set via environment variable `LANCEDB_API_KEY`.
    region: str, default "us-east-1"
        The region to use for LanceDB Cloud.
    host_override: str, optional
        The override url for LanceDB Cloud.
    read_consistency_interval: timedelta, default None
        (For LanceDB OSS only)
        The interval at which to check for updates to the table from other
        processes. If None, then consistency is not checked. For performance
        reasons, this is the default. For strong consistency, set this to
        zero seconds. Then every read will check for updates from other
        processes. As a compromise, you can set this to a non-zero timedelta
        for eventual consistency. If more than that interval has passed since
        the last check, then the table will be checked for updates. Note: this
        consistency only applies to read operations. Write operations are
        always consistent.
    client_config: ClientConfig or dict, optional
        Configuration options for the LanceDB Cloud HTTP client. If a dict, then
        the keys are the attributes of the ClientConfig class. If None, then the
        default configuration is used.
    storage_options: dict, optional
        Additional options for the storage backend. See available options at
        <https://lancedb.com/docs/storage/>
    session: Session, optional
        (For LanceDB OSS only)
        A session to use for this connection. Sessions allow you to configure
        cache sizes for index and metadata caches, which can significantly
        impact memory use and performance. They can also be re-used across
        multiple connections to share the same cache state.

    Examples
    --------

    For a local directory, provide a path for the database:

    >>> import lancedb
    >>> db = lancedb.connect("~/.lancedb")

    For object storage, use a URI prefix:

    >>> db = lancedb.connect("s3://my-bucket/lancedb",
    ...                      storage_options={"aws_access_key_id": "***"})

    Connect to LanceDB cloud:

    >>> db = lancedb.connect("db://my_database", api_key="ldb_...",
    ...                      client_config={"retry_config": {"retries": 5}})

    Returns
    -------
    conn : DBConnection
        A connection to a LanceDB database.
    zdb://LANCEDB_API_KEYz1api_key is required to connect to LanceDB cloud: )r9   r:   r   zUnknown keyword arguments: )r8   r   r;   )r*   r+   r,   osenvirongetr/   intr   r   r4   r   )
r   r6   r"   r7   r8   r9   r:   r   r;   r<   s
             r(   r   r   A   s    V #sw 7?jjnn%67G?PQTPUVWW)3/"45H"I!	

 !4'+

 

 
	
 s3x96vh?@@";'	 r5   )r6   r"   r7   r8   r:   r   r;   c                   K   ||j                         }nd}t        |t              rt        di |}t	        t        |       |       t        t        t        |       |||||||       d{         S 7 w)a
  Connect to a LanceDB database.

    Parameters
    ----------
    uri: str or Path
        The uri of the database.
    api_key: str, optional
        If present, connect to LanceDB cloud.
        Otherwise, connect to a database on file system or cloud storage.
        Can be set via environment variable `LANCEDB_API_KEY`.
    region: str, default "us-east-1"
        The region to use for LanceDB Cloud.
    host_override: str, optional
        The override url for LanceDB Cloud.
    read_consistency_interval: timedelta, default None
        (For LanceDB OSS only)
        The interval at which to check for updates to the table from other
        processes. If None, then consistency is not checked. For performance
        reasons, this is the default. For strong consistency, set this to
        zero seconds. Then every read will check for updates from other
        processes. As a compromise, you can set this to a non-zero timedelta
        for eventual consistency. If more than that interval has passed since
        the last check, then the table will be checked for updates. Note: this
        consistency only applies to read operations. Write operations are
        always consistent.
    client_config: ClientConfig or dict, optional
        Configuration options for the LanceDB Cloud HTTP client. If a dict, then
        the keys are the attributes of the ClientConfig class. If None, then the
        default configuration is used.
    storage_options: dict, optional
        Additional options for the storage backend. See available options at
        <https://lancedb.com/docs/storage/>
    session: Session, optional
        (For LanceDB OSS only)
        A session to use for this connection. Sessions allow you to configure
        cache sizes for index and metadata caches, which can significantly
        impact memory use and performance. They can also be re-used across
        multiple connections to share the same cache state.

    Examples
    --------

    >>> import lancedb
    >>> async def doctest_example():
    ...     # For a local directory, provide a path to the database
    ...     db = await lancedb.connect_async("~/.lancedb")
    ...     # For object storage, use a URI prefix
    ...     db = await lancedb.connect_async("s3://my-bucket/lancedb",
    ...                                      storage_options={
    ...                                          "aws_access_key_id": "***"})
    ...     # Connect to LanceDB cloud
    ...     db = await lancedb.connect_async("db://my_database", api_key="ldb_...",
    ...                                      client_config={
    ...                                          "retry_config": {"retries": 5}})

    Returns
    -------
    conn : AsyncConnection
        A connection to a LanceDB database.
    Nr%   )	total_secondsr*   dictr   r4   r+   r   lancedb_connectr   )	r   r6   r"   r7   r8   r:   r   r;   read_consistency_interval_secss	            r(   connect_asyncrH      s     N !,)B)P)P)R&)-&-&$5}5s3x9*	
 		
 		
s   A)A6+A4,	A6)r   rH   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __version__c                  .    t        j                  d       y )NzLlance is not fork-safe. If you are using multiprocessing, use spawn instead.)warningswarnr%   r5   r(   __warn_on_forkrM      s    MMVr5   register_at_fork)before)5importlib.metadata	importlibr?   concurrent.futuresr   datetimer   typingr   r   r   r   rK   metadataversionrI   _lancedbr   rF   commonr   r   urllib.parser   dbr   r   r   ior   remoter   	remote.dbr   schemar   tabler   r   r   	namespacer   r   r   r   r+   r4   rB   rH   __all__rM   hasattrrN   r%   r5   r(   <module>rc      sF  
  	 1  - -   ((3 0 % ! @ @ &   )  $  
	
'S#X7
	
H "#'59DH?C04!%g	g c]g 	g
 C=g  (	2g "%-?(?"@Ag tCH~t;<g d38n-g gg g gZ "#'59CG04!%\	\ c]\ 	\
 C=\  (	2\ E,S#X">?@\ d38n-\ g\ \~, 2!"B~. #r5   