airbyte_cdk.sources.file_based.stream

View Source

 1from airbyte_cdk.sources.file_based.stream.abstract_file_based_stream import AbstractFileBasedStream
 2from airbyte_cdk.sources.file_based.stream.default_file_based_stream import DefaultFileBasedStream
 3from airbyte_cdk.sources.file_based.stream.identities_stream import FileIdentitiesStream
 4from airbyte_cdk.sources.file_based.stream.permissions_file_based_stream import (
 5    PermissionsFileBasedStream,
 6)
 7
 8__all__ = [
 9    "AbstractFileBasedStream",
10    "DefaultFileBasedStream",
11    "FileIdentitiesStream",
12    "PermissionsFileBasedStream",
13]

class AbstractFileBasedStream(airbyte_cdk.sources.streams.core.Stream): View Source

 38class AbstractFileBasedStream(Stream):
 39    """
 40    A file-based stream in an Airbyte source.
 41
 42    In addition to the base Stream attributes, a file-based stream has
 43    - A config object (derived from the corresponding stream section in source config).
 44      This contains the globs defining the stream's files.
 45    - A StreamReader, which knows how to list and open files in the stream.
 46    - A FileBasedAvailabilityStrategy, which knows how to verify that we can list and open
 47      files in the stream.
 48    - A DiscoveryPolicy that controls the number of concurrent requests sent to the source
 49      during discover, and the number of files used for schema discovery.
 50    - A dictionary of FileType:Parser that holds all the file types that can be handled
 51      by the stream.
 52    """
 53
 54    def __init__(
 55        self,
 56        config: FileBasedStreamConfig,
 57        catalog_schema: Optional[Mapping[str, Any]],
 58        stream_reader: AbstractFileBasedStreamReader,
 59        availability_strategy: AbstractFileBasedAvailabilityStrategy,
 60        discovery_policy: AbstractDiscoveryPolicy,
 61        parsers: Dict[Type[Any], FileTypeParser],
 62        validation_policy: AbstractSchemaValidationPolicy,
 63        errors_collector: FileBasedErrorsCollector,
 64        cursor: AbstractFileBasedCursor,
 65    ):
 66        super().__init__()
 67        self.config = config
 68        self.catalog_schema = catalog_schema
 69        self.validation_policy = validation_policy
 70        self.stream_reader = stream_reader
 71        self._discovery_policy = discovery_policy
 72        self._availability_strategy = availability_strategy
 73        self._parsers = parsers
 74        self.errors_collector = errors_collector
 75        self._cursor = cursor
 76
 77    @property
 78    @abstractmethod
 79    def primary_key(self) -> PrimaryKeyType: ...
 80
 81    @cache
 82    def list_files(self) -> List[RemoteFile]:
 83        """
 84        List all files that belong to the stream.
 85
 86        The output of this method is cached so we don't need to list the files more than once.
 87        This means we won't pick up changes to the files during a sync. This method uses the
 88        get_files method which is implemented by the concrete stream class.
 89        """
 90        return list(self.get_files())
 91
 92    @abstractmethod
 93    def get_files(self) -> Iterable[RemoteFile]:
 94        """
 95        List all files that belong to the stream as defined by the stream's globs.
 96        """
 97        ...
 98
 99    def read_records(
100        self,
101        sync_mode: SyncMode,
102        cursor_field: Optional[List[str]] = None,
103        stream_slice: Optional[StreamSlice] = None,
104        stream_state: Optional[Mapping[str, Any]] = None,
105    ) -> Iterable[Mapping[str, Any] | AirbyteMessage]:
106        """
107        Yield all records from all remote files in `list_files_for_this_sync`.
108        This method acts as an adapter between the generic Stream interface and the file-based's
109        stream since file-based streams manage their own states.
110        """
111        if stream_slice is None:
112            raise ValueError("stream_slice must be set")
113        return self.read_records_from_slice(stream_slice)
114
115    @abstractmethod
116    def read_records_from_slice(
117        self, stream_slice: StreamSlice
118    ) -> Iterable[Mapping[str, Any] | AirbyteMessage]:
119        """
120        Yield all records from all remote files in `list_files_for_this_sync`.
121        """
122        ...
123
124    def stream_slices(
125        self,
126        *,
127        sync_mode: SyncMode,
128        cursor_field: Optional[List[str]] = None,
129        stream_state: Optional[Mapping[str, Any]] = None,
130    ) -> Iterable[Optional[Mapping[str, Any]]]:
131        """
132        This method acts as an adapter between the generic Stream interface and the file-based's
133        stream since file-based streams manage their own states.
134        """
135        return self.compute_slices()
136
137    @abstractmethod
138    def compute_slices(self) -> Iterable[Optional[StreamSlice]]:
139        """
140        Return a list of slices that will be used to read files in the current sync.
141        :return: The slices to use for the current sync.
142        """
143        ...
144
145    @abstractmethod
146    @lru_cache(maxsize=None)
147    def get_json_schema(self) -> Mapping[str, Any]:
148        """
149        Return the JSON Schema for a stream.
150        """
151        ...
152
153    @abstractmethod
154    def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]:
155        """
156        Infer the schema for files in the stream.
157        """
158        ...
159
160    def get_parser(self) -> FileTypeParser:
161        try:
162            return self._parsers[type(self.config.format)]
163        except KeyError:
164            raise UndefinedParserError(
165                FileBasedSourceError.UNDEFINED_PARSER,
166                stream=self.name,
167                format=type(self.config.format),
168            )
169
170    def record_passes_validation_policy(self, record: Mapping[str, Any]) -> bool:
171        if self.validation_policy:
172            return self.validation_policy.record_passes_validation_policy(
173                record=record, schema=self.catalog_schema
174            )
175        else:
176            raise RecordParseError(
177                FileBasedSourceError.UNDEFINED_VALIDATION_POLICY,
178                stream=self.name,
179                validation_policy=self.config.validation_policy,
180            )
181
182    @cached_property
183    @deprecated("Deprecated as of CDK version 3.7.0.")
184    def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy:
185        return self._availability_strategy
186
187    @property
188    def name(self) -> str:
189        return self.config.name
190
191    def get_cursor(self) -> Optional[Cursor]:
192        """
193        This is a temporary hack. Because file-based, declarative, and concurrent have _slightly_ different cursor implementations
194        the file-based cursor isn't compatible with the cursor-based iteration flow in core.py top-level CDK. By setting this to
195        None, we defer to the regular incremental checkpoint flow. Once all cursors are consolidated under a common interface
196        then this override can be removed.
197        """
198        return None

A file-based stream in an Airbyte source.

In addition to the base Stream attributes, a file-based stream has

A config object (derived from the corresponding stream section in source config). This contains the globs defining the stream's files.
A StreamReader, which knows how to list and open files in the stream.
A FileBasedAvailabilityStrategy, which knows how to verify that we can list and open files in the stream.
A DiscoveryPolicy that controls the number of concurrent requests sent to the source during discover, and the number of files used for schema discovery.
A dictionary of FileType:Parser that holds all the file types that can be handled by the stream.

config

catalog_schema

validation_policy

stream_reader

errors_collector

primary_key: Union[str, List[str], NoneType] View Source

77    @property
78    @abstractmethod
79    def primary_key(self) -> PrimaryKeyType: ...

Returns

string if single primary key, list of strings if composite primary key, list of list of strings if composite primary key consisting of nested fields. If the stream has no primary keys, return None.

@cache

def list_files(self) -> List[airbyte_cdk.sources.file_based.RemoteFile]: View Source

81    @cache
82    def list_files(self) -> List[RemoteFile]:
83        """
84        List all files that belong to the stream.
85
86        The output of this method is cached so we don't need to list the files more than once.
87        This means we won't pick up changes to the files during a sync. This method uses the
88        get_files method which is implemented by the concrete stream class.
89        """
90        return list(self.get_files())

List all files that belong to the stream.

The output of this method is cached so we don't need to list the files more than once. This means we won't pick up changes to the files during a sync. This method uses the get_files method which is implemented by the concrete stream class.

@abstractmethod

def get_files(self) -> Iterable[airbyte_cdk.sources.file_based.RemoteFile]: View Source

92    @abstractmethod
93    def get_files(self) -> Iterable[RemoteFile]:
94        """
95        List all files that belong to the stream as defined by the stream's globs.
96        """
97        ...

List all files that belong to the stream as defined by the stream's globs.

def read_records( self, sync_mode: airbyte_protocol_dataclasses.models.airbyte_protocol.SyncMode, cursor_field: Optional[List[str]] = None, stream_slice: Optional[Mapping[str, Any]] = None, stream_state: Optional[Mapping[str, Any]] = None) -> Iterable[Union[Mapping[str, Any], airbyte_cdk.AirbyteMessage]]: View Source

 99    def read_records(
100        self,
101        sync_mode: SyncMode,
102        cursor_field: Optional[List[str]] = None,
103        stream_slice: Optional[StreamSlice] = None,
104        stream_state: Optional[Mapping[str, Any]] = None,
105    ) -> Iterable[Mapping[str, Any] | AirbyteMessage]:
106        """
107        Yield all records from all remote files in `list_files_for_this_sync`.
108        This method acts as an adapter between the generic Stream interface and the file-based's
109        stream since file-based streams manage their own states.
110        """
111        if stream_slice is None:
112            raise ValueError("stream_slice must be set")
113        return self.read_records_from_slice(stream_slice)

Yield all records from all remote files in list_files_for_this_sync. This method acts as an adapter between the generic Stream interface and the file-based's stream since file-based streams manage their own states.

@abstractmethod

def read_records_from_slice( self, stream_slice: Mapping[str, Any]) -> Iterable[Union[Mapping[str, Any], airbyte_cdk.AirbyteMessage]]: View Source

115    @abstractmethod
116    def read_records_from_slice(
117        self, stream_slice: StreamSlice
118    ) -> Iterable[Mapping[str, Any] | AirbyteMessage]:
119        """
120        Yield all records from all remote files in `list_files_for_this_sync`.
121        """
122        ...

Yield all records from all remote files in list_files_for_this_sync.

def stream_slices( self, *, sync_mode: airbyte_protocol_dataclasses.models.airbyte_protocol.SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None) -> Iterable[Optional[Mapping[str, Any]]]: View Source

124    def stream_slices(
125        self,
126        *,
127        sync_mode: SyncMode,
128        cursor_field: Optional[List[str]] = None,
129        stream_state: Optional[Mapping[str, Any]] = None,
130    ) -> Iterable[Optional[Mapping[str, Any]]]:
131        """
132        This method acts as an adapter between the generic Stream interface and the file-based's
133        stream since file-based streams manage their own states.
134        """
135        return self.compute_slices()

This method acts as an adapter between the generic Stream interface and the file-based's stream since file-based streams manage their own states.

@abstractmethod

def compute_slices(self) -> Iterable[Optional[Mapping[str, Any]]]: View Source

137    @abstractmethod
138    def compute_slices(self) -> Iterable[Optional[StreamSlice]]:
139        """
140        Return a list of slices that will be used to read files in the current sync.
141        :return: The slices to use for the current sync.
142        """
143        ...

Return a list of slices that will be used to read files in the current sync.

Returns

The slices to use for the current sync.

@abstractmethod

@lru_cache(maxsize=None)

def get_json_schema(self) -> Mapping[str, Any]: View Source

145    @abstractmethod
146    @lru_cache(maxsize=None)
147    def get_json_schema(self) -> Mapping[str, Any]:
148        """
149        Return the JSON Schema for a stream.
150        """
151        ...

Return the JSON Schema for a stream.

@abstractmethod

def infer_schema( self, files: List[airbyte_cdk.sources.file_based.RemoteFile]) -> Mapping[str, Any]: View Source

153    @abstractmethod
154    def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]:
155        """
156        Infer the schema for files in the stream.
157        """
158        ...

Infer the schema for files in the stream.

def get_parser( self) -> airbyte_cdk.sources.file_based.file_types.file_type_parser.FileTypeParser: View Source

160    def get_parser(self) -> FileTypeParser:
161        try:
162            return self._parsers[type(self.config.format)]
163        except KeyError:
164            raise UndefinedParserError(
165                FileBasedSourceError.UNDEFINED_PARSER,
166                stream=self.name,
167                format=type(self.config.format),
168            )

def record_passes_validation_policy(self, record: Mapping[str, Any]) -> bool: View Source

170    def record_passes_validation_policy(self, record: Mapping[str, Any]) -> bool:
171        if self.validation_policy:
172            return self.validation_policy.record_passes_validation_policy(
173                record=record, schema=self.catalog_schema
174            )
175        else:
176            raise RecordParseError(
177                FileBasedSourceError.UNDEFINED_VALIDATION_POLICY,
178                stream=self.name,
179                validation_policy=self.config.validation_policy,
180            )

availability_strategy: airbyte_cdk.sources.file_based.availability_strategy.AbstractFileBasedAvailabilityStrategy View Source

182    @cached_property
183    @deprecated("Deprecated as of CDK version 3.7.0.")
184    def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy:
185        return self._availability_strategy

name: str View Source

187    @property
188    def name(self) -> str:
189        return self.config.name

Returns

Stream name. By default this is the implementing class name, but it can be overridden as needed.

def get_cursor(self) -> Optional[airbyte_cdk.sources.streams.checkpoint.Cursor]: View Source

191    def get_cursor(self) -> Optional[Cursor]:
192        """
193        This is a temporary hack. Because file-based, declarative, and concurrent have _slightly_ different cursor implementations
194        the file-based cursor isn't compatible with the cursor-based iteration flow in core.py top-level CDK. By setting this to
195        None, we defer to the regular incremental checkpoint flow. Once all cursors are consolidated under a common interface
196        then this override can be removed.
197        """
198        return None

This is a temporary hack. Because file-based, declarative, and concurrent have _slightly_ different cursor implementations the file-based cursor isn't compatible with the cursor-based iteration flow in core.py top-level CDK. By setting this to None, we defer to the regular incremental checkpoint flow. Once all cursors are consolidated under a common interface then this override can be removed.

Inherited Members

airbyte_cdk.sources.streams.core.Stream: logger; transformer; cursor; has_multiple_slices; get_error_display_message; read; read_only_records; as_airbyte_stream; supports_incremental; is_resumable; cursor_field; namespace; source_defined_cursor; exit_on_rate_limit; state_checkpoint_interval; get_updated_state; log_stream_sync_configuration; configured_json_schema

class FileIdentitiesStream(airbyte_cdk.sources.streams.permissions.identities_stream.IdentitiesStream): View Source

19class FileIdentitiesStream(IdentitiesStream):
20    """
21    The identities stream. A full refresh stream to sync identities from a certain domain.
22    The stream reader manage the logic to get such data, which is implemented on connector side.
23    """
24
25    is_resumable = False
26
27    def __init__(
28        self,
29        catalog_schema: Optional[Mapping[str, Any]],
30        stream_permissions_reader: AbstractFileBasedStreamPermissionsReader,
31        discovery_policy: AbstractDiscoveryPolicy,
32        errors_collector: FileBasedErrorsCollector,
33    ) -> None:
34        super().__init__()
35        self.catalog_schema = catalog_schema
36        self.stream_permissions_reader = stream_permissions_reader
37        self._discovery_policy = discovery_policy
38        self.errors_collector = errors_collector
39        self._cursor: MutableMapping[str, Any] = {}
40
41    @property
42    def primary_key(self) -> PrimaryKeyType:
43        return None
44
45    def load_identity_groups(self) -> Iterable[Dict[str, Any]]:
46        return self.stream_permissions_reader.load_identity_groups(logger=self.logger)
47
48    @cache
49    def get_json_schema(self) -> JsonSchema:
50        return self.stream_permissions_reader.identities_schema

The identities stream. A full refresh stream to sync identities from a certain domain. The stream reader manage the logic to get such data, which is implemented on connector side.

FileIdentitiesStream( catalog_schema: Optional[Mapping[str, Any]], stream_permissions_reader: airbyte_cdk.sources.file_based.file_based_stream_permissions_reader.AbstractFileBasedStreamPermissionsReader, discovery_policy: airbyte_cdk.sources.file_based.discovery_policy.AbstractDiscoveryPolicy, errors_collector: airbyte_cdk.sources.file_based.exceptions.FileBasedErrorsCollector) View Source

27    def __init__(
28        self,
29        catalog_schema: Optional[Mapping[str, Any]],
30        stream_permissions_reader: AbstractFileBasedStreamPermissionsReader,
31        discovery_policy: AbstractDiscoveryPolicy,
32        errors_collector: FileBasedErrorsCollector,
33    ) -> None:
34        super().__init__()
35        self.catalog_schema = catalog_schema
36        self.stream_permissions_reader = stream_permissions_reader
37        self._discovery_policy = discovery_policy
38        self.errors_collector = errors_collector
39        self._cursor: MutableMapping[str, Any] = {}

is_resumable = False

Returns

True if this stream allows the checkpointing of sync progress and can resume from it on subsequent attempts. This differs from supports_incremental because certain kinds of streams like those supporting resumable full refresh can checkpoint progress in between attempts for improved fault tolerance. However, they will start from the beginning on the next sync job.

catalog_schema

stream_permissions_reader

errors_collector

primary_key: Union[str, List[str], NoneType] View Source

41    @property
42    def primary_key(self) -> PrimaryKeyType:
43        return None

Returns

string if single primary key, list of strings if composite primary key, list of list of strings if composite primary key consisting of nested fields. If the stream has no primary keys, return None.

def load_identity_groups(self) -> Iterable[Dict[str, Any]]: View Source

45    def load_identity_groups(self) -> Iterable[Dict[str, Any]]:
46        return self.stream_permissions_reader.load_identity_groups(logger=self.logger)

@cache

def get_json_schema(self) -> Mapping[str, Any]: View Source

48    @cache
49    def get_json_schema(self) -> JsonSchema:
50        return self.stream_permissions_reader.identities_schema

Returns

A dict of the JSON schema representing this stream.

The default implementation of this method looks for a JSONSchema file with the same name as this stream's "name" property. Override as needed.

Inherited Members

airbyte_cdk.sources.streams.permissions.identities_stream.IdentitiesStream: IDENTITIES_STREAM_NAME; state; read_records; name; get_cursor
airbyte_cdk.sources.streams.core.Stream: logger; transformer; cursor; has_multiple_slices; get_error_display_message; read; read_only_records; as_airbyte_stream; supports_incremental; cursor_field; namespace; source_defined_cursor; exit_on_rate_limit; stream_slices; state_checkpoint_interval; get_updated_state; log_stream_sync_configuration; configured_json_schema

class PermissionsFileBasedStream(airbyte_cdk.sources.file_based.stream.DefaultFileBasedStream): View Source

20class PermissionsFileBasedStream(DefaultFileBasedStream):
21    """
22    A specialized stream for handling file-based ACL permissions.
23
24    This stream works with the stream_reader to:
25    1. Fetch ACL permissions for each file in the source
26    2. Transform permissions into a standardized format
27    3. Generate records containing permission information
28
29    The stream_reader is responsible for the actual implementation of permission retrieval
30    and schema definition, while this class handles the streaming interface.
31    """
32
33    def __init__(
34        self, stream_permissions_reader: AbstractFileBasedStreamPermissionsReader, **kwargs: Any
35    ):
36        super().__init__(**kwargs)
37        self.stream_permissions_reader = stream_permissions_reader
38
39    def _filter_schema_invalid_properties(
40        self, configured_catalog_json_schema: Dict[str, Any]
41    ) -> Dict[str, Any]:
42        return self.stream_permissions_reader.file_permissions_schema
43
44    def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[AirbyteMessage]:
45        """
46        Yield permissions records from all remote files
47        """
48
49        for file in stream_slice["files"]:
50            no_permissions = False
51            file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
52            try:
53                permissions_record = self.stream_permissions_reader.get_file_acl_permissions(
54                    file, logger=self.logger
55                )
56                if not permissions_record:
57                    no_permissions = True
58                    self.logger.warning(
59                        f"Unable to fetch permissions. stream={self.name} file={file.uri}"
60                    )
61                    continue
62                permissions_record = self.transform_record(
63                    permissions_record, file, file_datetime_string
64                )
65                yield stream_data_to_airbyte_message(self.name, permissions_record)
66            except Exception as e:
67                self.logger.error(f"Failed to retrieve permissions for file {file.uri}: {str(e)}")
68                yield AirbyteMessage(
69                    type=MessageType.LOG,
70                    log=AirbyteLogMessage(
71                        level=Level.ERROR,
72                        message=f"Error retrieving files permissions: stream={self.name} file={file.uri}",
73                        stack_trace=traceback.format_exc(),
74                    ),
75                )
76            finally:
77                if no_permissions:
78                    yield AirbyteMessage(
79                        type=MessageType.LOG,
80                        log=AirbyteLogMessage(
81                            level=Level.WARN,
82                            message=f"Unable to fetch permissions. stream={self.name} file={file.uri}",
83                        ),
84                    )
85
86    def _get_raw_json_schema(self) -> JsonSchema:
87        """
88        Retrieve the raw JSON schema for file permissions from the stream reader.
89
90        Returns:
91           The file permissions schema that defines the structure of permission records
92        """
93        return self.stream_permissions_reader.file_permissions_schema

A specialized stream for handling file-based ACL permissions.

This stream works with the stream_reader to:

Fetch ACL permissions for each file in the source
Transform permissions into a standardized format
Generate records containing permission information

The stream_reader is responsible for the actual implementation of permission retrieval and schema definition, while this class handles the streaming interface.

PermissionsFileBasedStream( stream_permissions_reader: airbyte_cdk.sources.file_based.file_based_stream_permissions_reader.AbstractFileBasedStreamPermissionsReader, **kwargs: Any) View Source

33    def __init__(
34        self, stream_permissions_reader: AbstractFileBasedStreamPermissionsReader, **kwargs: Any
35    ):
36        super().__init__(**kwargs)
37        self.stream_permissions_reader = stream_permissions_reader

stream_permissions_reader

def read_records_from_slice( self, stream_slice: Mapping[str, Any]) -> Iterable[airbyte_cdk.AirbyteMessage]: View Source

44    def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[AirbyteMessage]:
45        """
46        Yield permissions records from all remote files
47        """
48
49        for file in stream_slice["files"]:
50            no_permissions = False
51            file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
52            try:
53                permissions_record = self.stream_permissions_reader.get_file_acl_permissions(
54                    file, logger=self.logger
55                )
56                if not permissions_record:
57                    no_permissions = True
58                    self.logger.warning(
59                        f"Unable to fetch permissions. stream={self.name} file={file.uri}"
60                    )
61                    continue
62                permissions_record = self.transform_record(
63                    permissions_record, file, file_datetime_string
64                )
65                yield stream_data_to_airbyte_message(self.name, permissions_record)
66            except Exception as e:
67                self.logger.error(f"Failed to retrieve permissions for file {file.uri}: {str(e)}")
68                yield AirbyteMessage(
69                    type=MessageType.LOG,
70                    log=AirbyteLogMessage(
71                        level=Level.ERROR,
72                        message=f"Error retrieving files permissions: stream={self.name} file={file.uri}",
73                        stack_trace=traceback.format_exc(),
74                    ),
75                )
76            finally:
77                if no_permissions:
78                    yield AirbyteMessage(
79                        type=MessageType.LOG,
80                        log=AirbyteLogMessage(
81                            level=Level.WARN,
82                            message=f"Unable to fetch permissions. stream={self.name} file={file.uri}",
83                        ),
84                    )

Yield permissions records from all remote files

Inherited Members

DefaultFileBasedStream: FILE_TRANSFER_KW; PRESERVE_DIRECTORY_STRUCTURE_KW; FILES_KEY; DATE_TIME_FORMAT; ab_last_mod_col; ab_file_name_col; modified; source_file_url; airbyte_columns; use_file_transfer; preserve_directory_structure; state; cursor; primary_key; compute_slices; transform_record; cursor_field; get_json_schema; get_files; as_airbyte_stream; infer_schema
AbstractFileBasedStream: config; catalog_schema; validation_policy; stream_reader; errors_collector; list_files; read_records; stream_slices; get_parser; record_passes_validation_policy; availability_strategy; name; get_cursor
airbyte_cdk.sources.streams.core.Stream: logger; transformer; has_multiple_slices; get_error_display_message; read; read_only_records; supports_incremental; is_resumable; namespace; source_defined_cursor; exit_on_rate_limit; state_checkpoint_interval; get_updated_state; log_stream_sync_configuration; configured_json_schema