airbyte_cdk.sources.file_based.discovery_policy

1from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import (
2    AbstractDiscoveryPolicy,
3)
4from airbyte_cdk.sources.file_based.discovery_policy.default_discovery_policy import (
5    DefaultDiscoveryPolicy,
6)
7
8__all__ = ["AbstractDiscoveryPolicy", "DefaultDiscoveryPolicy"]
class AbstractDiscoveryPolicy(abc.ABC):
11class AbstractDiscoveryPolicy(ABC):
12    """
13    Used during discovery; allows the developer to configure the number of concurrent
14    requests to send to the source, and the number of files to use for schema discovery.
15    """
16
17    @property
18    @abstractmethod
19    def n_concurrent_requests(self) -> int: ...
20
21    @abstractmethod
22    def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int: ...

Used during discovery; allows the developer to configure the number of concurrent requests to send to the source, and the number of files to use for schema discovery.

n_concurrent_requests: int
17    @property
18    @abstractmethod
19    def n_concurrent_requests(self) -> int: ...
@abstractmethod
def get_max_n_files_for_schema_inference( self, parser: airbyte_cdk.sources.file_based.file_types.file_type_parser.FileTypeParser) -> int:
21    @abstractmethod
22    def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int: ...
15class DefaultDiscoveryPolicy(AbstractDiscoveryPolicy):
16    """
17    Default number of concurrent requests to send to the source on discover, and number
18    of files to use for schema inference.
19    """
20
21    @property
22    def n_concurrent_requests(self) -> int:
23        return DEFAULT_N_CONCURRENT_REQUESTS
24
25    def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int:
26        return min(
27            filter(
28                None,
29                (
30                    DEFAULT_MAX_N_FILES_FOR_STREAM_SCHEMA_INFERENCE,
31                    parser.parser_max_n_files_for_schema_inference,
32                ),
33            )
34        )

Default number of concurrent requests to send to the source on discover, and number of files to use for schema inference.

n_concurrent_requests: int
21    @property
22    def n_concurrent_requests(self) -> int:
23        return DEFAULT_N_CONCURRENT_REQUESTS
def get_max_n_files_for_schema_inference( self, parser: airbyte_cdk.sources.file_based.file_types.file_type_parser.FileTypeParser) -> int:
25    def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int:
26        return min(
27            filter(
28                None,
29                (
30                    DEFAULT_MAX_N_FILES_FOR_STREAM_SCHEMA_INFERENCE,
31                    parser.parser_max_n_files_for_schema_inference,
32                ),
33            )
34        )