airbyte_cdk.sources.file_based.stream.cursor.abstract_file_based_cursor

 1#
 2# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 3#
 4
 5import logging
 6from abc import ABC, abstractmethod
 7from datetime import datetime
 8from typing import Any, Iterable, MutableMapping
 9
10from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
11from airbyte_cdk.sources.file_based.remote_file import RemoteFile
12from airbyte_cdk.sources.file_based.types import StreamState
13
14
15class AbstractFileBasedCursor(ABC):
16    """
17    Abstract base class for cursors used by file-based streams.
18    """
19
20    @abstractmethod
21    def __init__(self, stream_config: FileBasedStreamConfig, **kwargs: Any):
22        """
23        Common interface for all cursors.
24        """
25        ...
26
27    @abstractmethod
28    def add_file(self, file: RemoteFile) -> None:
29        """
30        Add a file to the cursor. This method is called when a file is processed by the stream.
31        :param file: The file to add
32        """
33        ...
34
35    @abstractmethod
36    def set_initial_state(self, value: StreamState) -> None:
37        """
38        Set the initial state of the cursor. The cursor cannot be initialized at construction time because the stream doesn't know its state yet.
39        :param value: The stream state
40        """
41
42    @abstractmethod
43    def get_state(self) -> MutableMapping[str, Any]:
44        """
45        Get the state of the cursor.
46        """
47        ...
48
49    @abstractmethod
50    def get_start_time(self) -> datetime:
51        """
52        Returns the start time of the current sync.
53        """
54        ...
55
56    @abstractmethod
57    def get_files_to_sync(
58        self, all_files: Iterable[RemoteFile], logger: logging.Logger
59    ) -> Iterable[RemoteFile]:
60        """
61        Given the list of files in the source, return the files that should be synced.
62        :param all_files: All files in the source
63        :param logger:
64        :return: The files that should be synced
65        """
66        ...
class AbstractFileBasedCursor(abc.ABC):
16class AbstractFileBasedCursor(ABC):
17    """
18    Abstract base class for cursors used by file-based streams.
19    """
20
21    @abstractmethod
22    def __init__(self, stream_config: FileBasedStreamConfig, **kwargs: Any):
23        """
24        Common interface for all cursors.
25        """
26        ...
27
28    @abstractmethod
29    def add_file(self, file: RemoteFile) -> None:
30        """
31        Add a file to the cursor. This method is called when a file is processed by the stream.
32        :param file: The file to add
33        """
34        ...
35
36    @abstractmethod
37    def set_initial_state(self, value: StreamState) -> None:
38        """
39        Set the initial state of the cursor. The cursor cannot be initialized at construction time because the stream doesn't know its state yet.
40        :param value: The stream state
41        """
42
43    @abstractmethod
44    def get_state(self) -> MutableMapping[str, Any]:
45        """
46        Get the state of the cursor.
47        """
48        ...
49
50    @abstractmethod
51    def get_start_time(self) -> datetime:
52        """
53        Returns the start time of the current sync.
54        """
55        ...
56
57    @abstractmethod
58    def get_files_to_sync(
59        self, all_files: Iterable[RemoteFile], logger: logging.Logger
60    ) -> Iterable[RemoteFile]:
61        """
62        Given the list of files in the source, return the files that should be synced.
63        :param all_files: All files in the source
64        :param logger:
65        :return: The files that should be synced
66        """
67        ...

Abstract base class for cursors used by file-based streams.

@abstractmethod
AbstractFileBasedCursor( stream_config: airbyte_cdk.sources.file_based.FileBasedStreamConfig, **kwargs: Any)
21    @abstractmethod
22    def __init__(self, stream_config: FileBasedStreamConfig, **kwargs: Any):
23        """
24        Common interface for all cursors.
25        """
26        ...

Common interface for all cursors.

@abstractmethod
def add_file( self, file: airbyte_cdk.sources.file_based.RemoteFile) -> None:
28    @abstractmethod
29    def add_file(self, file: RemoteFile) -> None:
30        """
31        Add a file to the cursor. This method is called when a file is processed by the stream.
32        :param file: The file to add
33        """
34        ...

Add a file to the cursor. This method is called when a file is processed by the stream.

Parameters
  • file: The file to add
@abstractmethod
def set_initial_state(self, value: MutableMapping[str, Any]) -> None:
36    @abstractmethod
37    def set_initial_state(self, value: StreamState) -> None:
38        """
39        Set the initial state of the cursor. The cursor cannot be initialized at construction time because the stream doesn't know its state yet.
40        :param value: The stream state
41        """

Set the initial state of the cursor. The cursor cannot be initialized at construction time because the stream doesn't know its state yet.

Parameters
  • value: The stream state
@abstractmethod
def get_state(self) -> MutableMapping[str, Any]:
43    @abstractmethod
44    def get_state(self) -> MutableMapping[str, Any]:
45        """
46        Get the state of the cursor.
47        """
48        ...

Get the state of the cursor.

@abstractmethod
def get_start_time(self) -> datetime.datetime:
50    @abstractmethod
51    def get_start_time(self) -> datetime:
52        """
53        Returns the start time of the current sync.
54        """
55        ...

Returns the start time of the current sync.

@abstractmethod
def get_files_to_sync( self, all_files: Iterable[airbyte_cdk.sources.file_based.RemoteFile], logger: logging.Logger) -> Iterable[airbyte_cdk.sources.file_based.RemoteFile]:
57    @abstractmethod
58    def get_files_to_sync(
59        self, all_files: Iterable[RemoteFile], logger: logging.Logger
60    ) -> Iterable[RemoteFile]:
61        """
62        Given the list of files in the source, return the files that should be synced.
63        :param all_files: All files in the source
64        :param logger:
65        :return: The files that should be synced
66        """
67        ...

Given the list of files in the source, return the files that should be synced.

Parameters
  • all_files: All files in the source
  • logger:
Returns

The files that should be synced