airbyte_cdk.sources.declarative.parsers.model_to_component_factory

   1#
   2# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
   3#
   4
   5from __future__ import annotations
   6
   7import datetime
   8import importlib
   9import inspect
  10import re
  11from functools import partial
  12from typing import (
  13    Any,
  14    Callable,
  15    Dict,
  16    List,
  17    Mapping,
  18    MutableMapping,
  19    Optional,
  20    Type,
  21    Union,
  22    get_args,
  23    get_origin,
  24    get_type_hints,
  25)
  26
  27from isodate import parse_duration
  28from pydantic.v1 import BaseModel
  29
  30from airbyte_cdk.connector_builder.models import (
  31    LogMessage as ConnectorBuilderLogMessage,
  32)
  33from airbyte_cdk.models import FailureType, Level
  34from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
  35from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
  36from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker
  37from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
  38from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
  39from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator
  40from airbyte_cdk.sources.declarative.auth.declarative_authenticator import (
  41    DeclarativeAuthenticator,
  42    NoAuth,
  43)
  44from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm
  45from airbyte_cdk.sources.declarative.auth.oauth import (
  46    DeclarativeSingleUseRefreshTokenOauth2Authenticator,
  47)
  48from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator
  49from airbyte_cdk.sources.declarative.auth.token import (
  50    ApiKeyAuthenticator,
  51    BasicHttpAuthenticator,
  52    BearerAuthenticator,
  53    LegacySessionTokenAuthenticator,
  54)
  55from airbyte_cdk.sources.declarative.auth.token_provider import (
  56    InterpolatedStringTokenProvider,
  57    SessionTokenProvider,
  58    TokenProvider,
  59)
  60from airbyte_cdk.sources.declarative.checks import (
  61    CheckDynamicStream,
  62    CheckStream,
  63    DynamicStreamCheckConfig,
  64)
  65from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
  66from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
  67from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
  68from airbyte_cdk.sources.declarative.decoders import (
  69    Decoder,
  70    IterableDecoder,
  71    JsonDecoder,
  72    PaginationDecoderDecorator,
  73    XmlDecoder,
  74    ZipfileDecoder,
  75)
  76from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
  77    CompositeRawDecoder,
  78    CsvParser,
  79    GzipParser,
  80    JsonLineParser,
  81    JsonParser,
  82    Parser,
  83)
  84from airbyte_cdk.sources.declarative.extractors import (
  85    DpathExtractor,
  86    RecordFilter,
  87    RecordSelector,
  88    ResponseToFileExtractor,
  89)
  90from airbyte_cdk.sources.declarative.extractors.record_filter import (
  91    ClientSideIncrementalRecordFilterDecorator,
  92)
  93from airbyte_cdk.sources.declarative.incremental import (
  94    ChildPartitionResumableFullRefreshCursor,
  95    ConcurrentCursorFactory,
  96    ConcurrentPerPartitionCursor,
  97    CursorFactory,
  98    DatetimeBasedCursor,
  99    DeclarativeCursor,
 100    GlobalSubstreamCursor,
 101    PerPartitionCursor,
 102    PerPartitionWithGlobalCursor,
 103    ResumableFullRefreshCursor,
 104)
 105from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
 106from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
 107from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import (
 108    LegacyToPerPartitionStateMigration,
 109)
 110from airbyte_cdk.sources.declarative.models import (
 111    CustomStateMigration,
 112)
 113from airbyte_cdk.sources.declarative.models.base_model_with_deprecations import (
 114    DEPRECATION_LOGS_TAG,
 115    BaseModelWithDeprecations,
 116)
 117from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 118    AddedFieldDefinition as AddedFieldDefinitionModel,
 119)
 120from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 121    AddFields as AddFieldsModel,
 122)
 123from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 124    ApiKeyAuthenticator as ApiKeyAuthenticatorModel,
 125)
 126from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 127    AsyncJobStatusMap as AsyncJobStatusMapModel,
 128)
 129from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 130    AsyncRetriever as AsyncRetrieverModel,
 131)
 132from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 133    BasicHttpAuthenticator as BasicHttpAuthenticatorModel,
 134)
 135from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 136    BearerAuthenticator as BearerAuthenticatorModel,
 137)
 138from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 139    CheckDynamicStream as CheckDynamicStreamModel,
 140)
 141from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 142    CheckStream as CheckStreamModel,
 143)
 144from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 145    ComplexFieldType as ComplexFieldTypeModel,
 146)
 147from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 148    ComponentMappingDefinition as ComponentMappingDefinitionModel,
 149)
 150from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 151    CompositeErrorHandler as CompositeErrorHandlerModel,
 152)
 153from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 154    ConcurrencyLevel as ConcurrencyLevelModel,
 155)
 156from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 157    ConfigComponentsResolver as ConfigComponentsResolverModel,
 158)
 159from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 160    ConstantBackoffStrategy as ConstantBackoffStrategyModel,
 161)
 162from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 163    CsvDecoder as CsvDecoderModel,
 164)
 165from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 166    CursorPagination as CursorPaginationModel,
 167)
 168from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 169    CustomAuthenticator as CustomAuthenticatorModel,
 170)
 171from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 172    CustomBackoffStrategy as CustomBackoffStrategyModel,
 173)
 174from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 175    CustomDecoder as CustomDecoderModel,
 176)
 177from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 178    CustomErrorHandler as CustomErrorHandlerModel,
 179)
 180from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 181    CustomIncrementalSync as CustomIncrementalSyncModel,
 182)
 183from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 184    CustomPaginationStrategy as CustomPaginationStrategyModel,
 185)
 186from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 187    CustomPartitionRouter as CustomPartitionRouterModel,
 188)
 189from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 190    CustomRecordExtractor as CustomRecordExtractorModel,
 191)
 192from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 193    CustomRecordFilter as CustomRecordFilterModel,
 194)
 195from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 196    CustomRequester as CustomRequesterModel,
 197)
 198from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 199    CustomRetriever as CustomRetrieverModel,
 200)
 201from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 202    CustomSchemaLoader as CustomSchemaLoader,
 203)
 204from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 205    CustomSchemaNormalization as CustomSchemaNormalizationModel,
 206)
 207from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 208    CustomTransformation as CustomTransformationModel,
 209)
 210from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 211    DatetimeBasedCursor as DatetimeBasedCursorModel,
 212)
 213from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 214    DeclarativeStream as DeclarativeStreamModel,
 215)
 216from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 217    DefaultErrorHandler as DefaultErrorHandlerModel,
 218)
 219from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 220    DefaultPaginator as DefaultPaginatorModel,
 221)
 222from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 223    DpathExtractor as DpathExtractorModel,
 224)
 225from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 226    DpathFlattenFields as DpathFlattenFieldsModel,
 227)
 228from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 229    DynamicSchemaLoader as DynamicSchemaLoaderModel,
 230)
 231from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 232    DynamicStreamCheckConfig as DynamicStreamCheckConfigModel,
 233)
 234from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 235    ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
 236)
 237from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 238    FileUploader as FileUploaderModel,
 239)
 240from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 241    FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
 242)
 243from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 244    FlattenFields as FlattenFieldsModel,
 245)
 246from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 247    GroupByKeyMergeStrategy as GroupByKeyMergeStrategyModel,
 248)
 249from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 250    GroupingPartitionRouter as GroupingPartitionRouterModel,
 251)
 252from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 253    GzipDecoder as GzipDecoderModel,
 254)
 255from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 256    HTTPAPIBudget as HTTPAPIBudgetModel,
 257)
 258from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 259    HttpComponentsResolver as HttpComponentsResolverModel,
 260)
 261from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 262    HttpRequester as HttpRequesterModel,
 263)
 264from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 265    HttpRequestRegexMatcher as HttpRequestRegexMatcherModel,
 266)
 267from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 268    HttpResponseFilter as HttpResponseFilterModel,
 269)
 270from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 271    IncrementingCountCursor as IncrementingCountCursorModel,
 272)
 273from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 274    InlineSchemaLoader as InlineSchemaLoaderModel,
 275)
 276from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 277    IterableDecoder as IterableDecoderModel,
 278)
 279from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 280    JsonDecoder as JsonDecoderModel,
 281)
 282from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 283    JsonFileSchemaLoader as JsonFileSchemaLoaderModel,
 284)
 285from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 286    JsonlDecoder as JsonlDecoderModel,
 287)
 288from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 289    JwtAuthenticator as JwtAuthenticatorModel,
 290)
 291from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 292    JwtHeaders as JwtHeadersModel,
 293)
 294from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 295    JwtPayload as JwtPayloadModel,
 296)
 297from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 298    KeysReplace as KeysReplaceModel,
 299)
 300from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 301    KeysToLower as KeysToLowerModel,
 302)
 303from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 304    KeysToSnakeCase as KeysToSnakeCaseModel,
 305)
 306from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 307    LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
 308)
 309from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 310    LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel,
 311)
 312from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 313    ListPartitionRouter as ListPartitionRouterModel,
 314)
 315from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 316    MinMaxDatetime as MinMaxDatetimeModel,
 317)
 318from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 319    MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel,
 320)
 321from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 322    NoAuth as NoAuthModel,
 323)
 324from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 325    NoPagination as NoPaginationModel,
 326)
 327from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 328    OAuthAuthenticator as OAuthAuthenticatorModel,
 329)
 330from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 331    OffsetIncrement as OffsetIncrementModel,
 332)
 333from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 334    PageIncrement as PageIncrementModel,
 335)
 336from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 337    ParentStreamConfig as ParentStreamConfigModel,
 338)
 339from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 340    PropertiesFromEndpoint as PropertiesFromEndpointModel,
 341)
 342from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 343    PropertyChunking as PropertyChunkingModel,
 344)
 345from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 346    PropertyLimitType as PropertyLimitTypeModel,
 347)
 348from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 349    QueryProperties as QueryPropertiesModel,
 350)
 351from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 352    Rate as RateModel,
 353)
 354from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 355    RecordFilter as RecordFilterModel,
 356)
 357from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 358    RecordSelector as RecordSelectorModel,
 359)
 360from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 361    RemoveFields as RemoveFieldsModel,
 362)
 363from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 364    RequestOption as RequestOptionModel,
 365)
 366from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 367    RequestPath as RequestPathModel,
 368)
 369from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 370    ResponseToFileExtractor as ResponseToFileExtractorModel,
 371)
 372from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 373    SchemaNormalization as SchemaNormalizationModel,
 374)
 375from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 376    SchemaTypeIdentifier as SchemaTypeIdentifierModel,
 377)
 378from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 379    SelectiveAuthenticator as SelectiveAuthenticatorModel,
 380)
 381from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 382    SessionTokenAuthenticator as SessionTokenAuthenticatorModel,
 383)
 384from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 385    SimpleRetriever as SimpleRetrieverModel,
 386)
 387from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
 388from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 389    StateDelegatingStream as StateDelegatingStreamModel,
 390)
 391from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 392    StreamConfig as StreamConfigModel,
 393)
 394from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 395    SubstreamPartitionRouter as SubstreamPartitionRouterModel,
 396)
 397from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 398    TypesMap as TypesMapModel,
 399)
 400from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 401    UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel,
 402)
 403from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
 404from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 405    WaitTimeFromHeader as WaitTimeFromHeaderModel,
 406)
 407from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 408    WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel,
 409)
 410from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 411    XmlDecoder as XmlDecoderModel,
 412)
 413from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 414    ZipfileDecoder as ZipfileDecoderModel,
 415)
 416from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
 417    COMPONENTS_MODULE_NAME,
 418    SDM_COMPONENTS_MODULE_NAME,
 419)
 420from airbyte_cdk.sources.declarative.partition_routers import (
 421    CartesianProductStreamSlicer,
 422    GroupingPartitionRouter,
 423    ListPartitionRouter,
 424    PartitionRouter,
 425    SinglePartitionRouter,
 426    SubstreamPartitionRouter,
 427)
 428from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
 429    AsyncJobPartitionRouter,
 430)
 431from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
 432    ParentStreamConfig,
 433)
 434from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption
 435from airbyte_cdk.sources.declarative.requesters.error_handlers import (
 436    CompositeErrorHandler,
 437    DefaultErrorHandler,
 438    HttpResponseFilter,
 439)
 440from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import (
 441    ConstantBackoffStrategy,
 442    ExponentialBackoffStrategy,
 443    WaitTimeFromHeaderBackoffStrategy,
 444    WaitUntilTimeFromHeaderBackoffStrategy,
 445)
 446from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository
 447from airbyte_cdk.sources.declarative.requesters.paginators import (
 448    DefaultPaginator,
 449    NoPagination,
 450    PaginatorTestReadDecorator,
 451)
 452from airbyte_cdk.sources.declarative.requesters.paginators.strategies import (
 453    CursorPaginationStrategy,
 454    CursorStopCondition,
 455    OffsetIncrement,
 456    PageIncrement,
 457    StopConditionPaginationStrategyDecorator,
 458)
 459from airbyte_cdk.sources.declarative.requesters.query_properties import (
 460    PropertiesFromEndpoint,
 461    PropertyChunking,
 462    QueryProperties,
 463)
 464from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import (
 465    PropertyLimitType,
 466)
 467from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import (
 468    GroupByKey,
 469)
 470from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType
 471from airbyte_cdk.sources.declarative.requesters.request_options import (
 472    DatetimeBasedRequestOptionsProvider,
 473    DefaultRequestOptionsProvider,
 474    InterpolatedRequestOptionsProvider,
 475    RequestOptionsProvider,
 476)
 477from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
 478from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
 479from airbyte_cdk.sources.declarative.resolvers import (
 480    ComponentMappingDefinition,
 481    ConfigComponentsResolver,
 482    HttpComponentsResolver,
 483    StreamConfig,
 484)
 485from airbyte_cdk.sources.declarative.retrievers import (
 486    AsyncRetriever,
 487    LazySimpleRetriever,
 488    SimpleRetriever,
 489    SimpleRetrieverTestReadDecorator,
 490)
 491from airbyte_cdk.sources.declarative.retrievers.file_uploader import (
 492    ConnectorBuilderFileUploader,
 493    DefaultFileUploader,
 494    FileUploader,
 495    LocalFileSystemFileWriter,
 496    NoopFileWriter,
 497)
 498from airbyte_cdk.sources.declarative.schema import (
 499    ComplexFieldType,
 500    DefaultSchemaLoader,
 501    DynamicSchemaLoader,
 502    InlineSchemaLoader,
 503    JsonFileSchemaLoader,
 504    SchemaTypeIdentifier,
 505    TypesMap,
 506)
 507from airbyte_cdk.sources.declarative.schema.composite_schema_loader import CompositeSchemaLoader
 508from airbyte_cdk.sources.declarative.spec import Spec
 509from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
 510from airbyte_cdk.sources.declarative.transformations import (
 511    AddFields,
 512    RecordTransformation,
 513    RemoveFields,
 514)
 515from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
 516from airbyte_cdk.sources.declarative.transformations.dpath_flatten_fields import (
 517    DpathFlattenFields,
 518    KeyTransformation,
 519)
 520from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
 521    FlattenFields,
 522)
 523from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import (
 524    KeysReplaceTransformation,
 525)
 526from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
 527    KeysToLowerTransformation,
 528)
 529from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
 530    KeysToSnakeCaseTransformation,
 531)
 532from airbyte_cdk.sources.message import (
 533    InMemoryMessageRepository,
 534    LogAppenderMessageRepositoryDecorator,
 535    MessageRepository,
 536    NoopMessageRepository,
 537)
 538from airbyte_cdk.sources.streams.call_rate import (
 539    APIBudget,
 540    FixedWindowCallRatePolicy,
 541    HttpAPIBudget,
 542    HttpRequestRegexMatcher,
 543    MovingWindowCallRatePolicy,
 544    Rate,
 545    UnlimitedCallRatePolicy,
 546)
 547from airbyte_cdk.sources.streams.concurrent.clamping import (
 548    ClampingEndProvider,
 549    ClampingStrategy,
 550    DayClampingStrategy,
 551    MonthClampingStrategy,
 552    NoClamping,
 553    WeekClampingStrategy,
 554    Weekday,
 555)
 556from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
 557from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
 558    CustomFormatConcurrentStreamStateConverter,
 559    DateTimeStreamStateConverter,
 560)
 561from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import (
 562    IncrementingCountStreamStateConverter,
 563)
 564from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
 565from airbyte_cdk.sources.types import Config
 566from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
 567
 568ComponentDefinition = Mapping[str, Any]
 569
 570SCHEMA_TRANSFORMER_TYPE_MAPPING = {
 571    SchemaNormalizationModel.None_: TransformConfig.NoTransform,
 572    SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
 573}
 574
 575
 576class ModelToComponentFactory:
 577    EPOCH_DATETIME_FORMAT = "%s"
 578
 579    def __init__(
 580        self,
 581        limit_pages_fetched_per_slice: Optional[int] = None,
 582        limit_slices_fetched: Optional[int] = None,
 583        emit_connector_builder_messages: bool = False,
 584        disable_retries: bool = False,
 585        disable_cache: bool = False,
 586        disable_resumable_full_refresh: bool = False,
 587        message_repository: Optional[MessageRepository] = None,
 588        connector_state_manager: Optional[ConnectorStateManager] = None,
 589        max_concurrent_async_job_count: Optional[int] = None,
 590    ):
 591        self._init_mappings()
 592        self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
 593        self._limit_slices_fetched = limit_slices_fetched
 594        self._emit_connector_builder_messages = emit_connector_builder_messages
 595        self._disable_retries = disable_retries
 596        self._disable_cache = disable_cache
 597        self._disable_resumable_full_refresh = disable_resumable_full_refresh
 598        self._message_repository = message_repository or InMemoryMessageRepository(
 599            self._evaluate_log_level(emit_connector_builder_messages)
 600        )
 601        self._connector_state_manager = connector_state_manager or ConnectorStateManager()
 602        self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
 603        self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1)
 604        # placeholder for deprecation warnings
 605        self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = []
 606
 607    def _init_mappings(self) -> None:
 608        self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
 609            AddedFieldDefinitionModel: self.create_added_field_definition,
 610            AddFieldsModel: self.create_add_fields,
 611            ApiKeyAuthenticatorModel: self.create_api_key_authenticator,
 612            BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
 613            BearerAuthenticatorModel: self.create_bearer_authenticator,
 614            CheckStreamModel: self.create_check_stream,
 615            DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config,
 616            CheckDynamicStreamModel: self.create_check_dynamic_stream,
 617            CompositeErrorHandlerModel: self.create_composite_error_handler,
 618            ConcurrencyLevelModel: self.create_concurrency_level,
 619            ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
 620            CsvDecoderModel: self.create_csv_decoder,
 621            CursorPaginationModel: self.create_cursor_pagination,
 622            CustomAuthenticatorModel: self.create_custom_component,
 623            CustomBackoffStrategyModel: self.create_custom_component,
 624            CustomDecoderModel: self.create_custom_component,
 625            CustomErrorHandlerModel: self.create_custom_component,
 626            CustomIncrementalSyncModel: self.create_custom_component,
 627            CustomRecordExtractorModel: self.create_custom_component,
 628            CustomRecordFilterModel: self.create_custom_component,
 629            CustomRequesterModel: self.create_custom_component,
 630            CustomRetrieverModel: self.create_custom_component,
 631            CustomSchemaLoader: self.create_custom_component,
 632            CustomSchemaNormalizationModel: self.create_custom_component,
 633            CustomStateMigration: self.create_custom_component,
 634            CustomPaginationStrategyModel: self.create_custom_component,
 635            CustomPartitionRouterModel: self.create_custom_component,
 636            CustomTransformationModel: self.create_custom_component,
 637            DatetimeBasedCursorModel: self.create_datetime_based_cursor,
 638            DeclarativeStreamModel: self.create_declarative_stream,
 639            DefaultErrorHandlerModel: self.create_default_error_handler,
 640            DefaultPaginatorModel: self.create_default_paginator,
 641            DpathExtractorModel: self.create_dpath_extractor,
 642            ResponseToFileExtractorModel: self.create_response_to_file_extractor,
 643            ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
 644            SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
 645            GroupByKeyMergeStrategyModel: self.create_group_by_key,
 646            HttpRequesterModel: self.create_http_requester,
 647            HttpResponseFilterModel: self.create_http_response_filter,
 648            InlineSchemaLoaderModel: self.create_inline_schema_loader,
 649            JsonDecoderModel: self.create_json_decoder,
 650            JsonlDecoderModel: self.create_jsonl_decoder,
 651            GzipDecoderModel: self.create_gzip_decoder,
 652            KeysToLowerModel: self.create_keys_to_lower_transformation,
 653            KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
 654            KeysReplaceModel: self.create_keys_replace_transformation,
 655            FlattenFieldsModel: self.create_flatten_fields,
 656            DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
 657            IterableDecoderModel: self.create_iterable_decoder,
 658            IncrementingCountCursorModel: self.create_incrementing_count_cursor,
 659            XmlDecoderModel: self.create_xml_decoder,
 660            JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
 661            DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
 662            SchemaTypeIdentifierModel: self.create_schema_type_identifier,
 663            TypesMapModel: self.create_types_map,
 664            ComplexFieldTypeModel: self.create_complex_field_type,
 665            JwtAuthenticatorModel: self.create_jwt_authenticator,
 666            LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
 667            ListPartitionRouterModel: self.create_list_partition_router,
 668            MinMaxDatetimeModel: self.create_min_max_datetime,
 669            NoAuthModel: self.create_no_auth,
 670            NoPaginationModel: self.create_no_pagination,
 671            OAuthAuthenticatorModel: self.create_oauth_authenticator,
 672            OffsetIncrementModel: self.create_offset_increment,
 673            PageIncrementModel: self.create_page_increment,
 674            ParentStreamConfigModel: self.create_parent_stream_config,
 675            PropertiesFromEndpointModel: self.create_properties_from_endpoint,
 676            PropertyChunkingModel: self.create_property_chunking,
 677            QueryPropertiesModel: self.create_query_properties,
 678            RecordFilterModel: self.create_record_filter,
 679            RecordSelectorModel: self.create_record_selector,
 680            RemoveFieldsModel: self.create_remove_fields,
 681            RequestPathModel: self.create_request_path,
 682            RequestOptionModel: self.create_request_option,
 683            LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator,
 684            SelectiveAuthenticatorModel: self.create_selective_authenticator,
 685            SimpleRetrieverModel: self.create_simple_retriever,
 686            StateDelegatingStreamModel: self.create_state_delegating_stream,
 687            SpecModel: self.create_spec,
 688            SubstreamPartitionRouterModel: self.create_substream_partition_router,
 689            WaitTimeFromHeaderModel: self.create_wait_time_from_header,
 690            WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
 691            AsyncRetrieverModel: self.create_async_retriever,
 692            HttpComponentsResolverModel: self.create_http_components_resolver,
 693            ConfigComponentsResolverModel: self.create_config_components_resolver,
 694            StreamConfigModel: self.create_stream_config,
 695            ComponentMappingDefinitionModel: self.create_components_mapping_definition,
 696            ZipfileDecoderModel: self.create_zipfile_decoder,
 697            HTTPAPIBudgetModel: self.create_http_api_budget,
 698            FileUploaderModel: self.create_file_uploader,
 699            FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
 700            MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
 701            UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
 702            RateModel: self.create_rate,
 703            HttpRequestRegexMatcherModel: self.create_http_request_matcher,
 704            GroupingPartitionRouterModel: self.create_grouping_partition_router,
 705        }
 706
 707        # Needed for the case where we need to perform a second parse on the fields of a custom component
 708        self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR}
 709
 710    def create_component(
 711        self,
 712        model_type: Type[BaseModel],
 713        component_definition: ComponentDefinition,
 714        config: Config,
 715        **kwargs: Any,
 716    ) -> Any:
 717        """
 718        Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and
 719        subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating
 720        creating declarative components from that model.
 721
 722        :param model_type: The type of declarative component that is being initialized
 723        :param component_definition: The mapping that represents a declarative component
 724        :param config: The connector config that is provided by the customer
 725        :return: The declarative component to be used at runtime
 726        """
 727
 728        component_type = component_definition.get("type")
 729        if component_definition.get("type") != model_type.__name__:
 730            raise ValueError(
 731                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
 732            )
 733
 734        declarative_component_model = model_type.parse_obj(component_definition)
 735
 736        if not isinstance(declarative_component_model, model_type):
 737            raise ValueError(
 738                f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}"
 739            )
 740
 741        return self._create_component_from_model(
 742            model=declarative_component_model, config=config, **kwargs
 743        )
 744
 745    def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any:
 746        if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR:
 747            raise ValueError(
 748                f"{model.__class__} with attributes {model} is not a valid component type"
 749            )
 750        component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__)
 751        if not component_constructor:
 752            raise ValueError(f"Could not find constructor for {model.__class__}")
 753
 754        # collect deprecation warnings for supported models.
 755        if isinstance(model, BaseModelWithDeprecations):
 756            self._collect_model_deprecations(model)
 757
 758        return component_constructor(model=model, config=config, **kwargs)
 759
 760    def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]:
 761        """
 762        Returns the deprecation warnings that were collected during the creation of components.
 763        """
 764        return self._collected_deprecation_logs
 765
 766    def _collect_model_deprecations(self, model: BaseModelWithDeprecations) -> None:
 767        """
 768        Collects deprecation logs from the given model and appends any new logs to the internal collection.
 769
 770        This method checks if the provided model has deprecation logs (identified by the presence of the DEPRECATION_LOGS_TAG attribute and a non-None `_deprecation_logs` property). It iterates through each deprecation log in the model and appends it to the `_collected_deprecation_logs` list if it has not already been collected, ensuring that duplicate logs are avoided.
 771
 772        Args:
 773            model (BaseModelWithDeprecations): The model instance from which to collect deprecation logs.
 774        """
 775        if hasattr(model, DEPRECATION_LOGS_TAG) and model._deprecation_logs is not None:
 776            for log in model._deprecation_logs:
 777                # avoid duplicates for deprecation logs observed.
 778                if log not in self._collected_deprecation_logs:
 779                    self._collected_deprecation_logs.append(log)
 780
 781    @staticmethod
 782    def create_added_field_definition(
 783        model: AddedFieldDefinitionModel, config: Config, **kwargs: Any
 784    ) -> AddedFieldDefinition:
 785        interpolated_value = InterpolatedString.create(
 786            model.value, parameters=model.parameters or {}
 787        )
 788        return AddedFieldDefinition(
 789            path=model.path,
 790            value=interpolated_value,
 791            value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
 792            parameters=model.parameters or {},
 793        )
 794
 795    def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields:
 796        added_field_definitions = [
 797            self._create_component_from_model(
 798                model=added_field_definition_model,
 799                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
 800                    added_field_definition_model.value_type
 801                ),
 802                config=config,
 803            )
 804            for added_field_definition_model in model.fields
 805        ]
 806        return AddFields(
 807            fields=added_field_definitions,
 808            condition=model.condition or "",
 809            parameters=model.parameters or {},
 810        )
 811
 812    def create_keys_to_lower_transformation(
 813        self, model: KeysToLowerModel, config: Config, **kwargs: Any
 814    ) -> KeysToLowerTransformation:
 815        return KeysToLowerTransformation()
 816
 817    def create_keys_to_snake_transformation(
 818        self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
 819    ) -> KeysToSnakeCaseTransformation:
 820        return KeysToSnakeCaseTransformation()
 821
 822    def create_keys_replace_transformation(
 823        self, model: KeysReplaceModel, config: Config, **kwargs: Any
 824    ) -> KeysReplaceTransformation:
 825        return KeysReplaceTransformation(
 826            old=model.old, new=model.new, parameters=model.parameters or {}
 827        )
 828
 829    def create_flatten_fields(
 830        self, model: FlattenFieldsModel, config: Config, **kwargs: Any
 831    ) -> FlattenFields:
 832        return FlattenFields(
 833            flatten_lists=model.flatten_lists if model.flatten_lists is not None else True
 834        )
 835
 836    def create_dpath_flatten_fields(
 837        self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any
 838    ) -> DpathFlattenFields:
 839        model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
 840        key_transformation = (
 841            KeyTransformation(
 842                config=config,
 843                prefix=model.key_transformation.prefix,
 844                suffix=model.key_transformation.suffix,
 845                parameters=model.parameters or {},
 846            )
 847            if model.key_transformation is not None
 848            else None
 849        )
 850        return DpathFlattenFields(
 851            config=config,
 852            field_path=model_field_path,
 853            delete_origin_value=model.delete_origin_value
 854            if model.delete_origin_value is not None
 855            else False,
 856            replace_record=model.replace_record if model.replace_record is not None else False,
 857            key_transformation=key_transformation,
 858            parameters=model.parameters or {},
 859        )
 860
 861    @staticmethod
 862    def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
 863        if not value_type:
 864            return None
 865        names_to_types = {
 866            ValueType.string: str,
 867            ValueType.number: float,
 868            ValueType.integer: int,
 869            ValueType.boolean: bool,
 870        }
 871        return names_to_types[value_type]
 872
 873    def create_api_key_authenticator(
 874        self,
 875        model: ApiKeyAuthenticatorModel,
 876        config: Config,
 877        token_provider: Optional[TokenProvider] = None,
 878        **kwargs: Any,
 879    ) -> ApiKeyAuthenticator:
 880        if model.inject_into is None and model.header is None:
 881            raise ValueError(
 882                "Expected either inject_into or header to be set for ApiKeyAuthenticator"
 883            )
 884
 885        if model.inject_into is not None and model.header is not None:
 886            raise ValueError(
 887                "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option"
 888            )
 889
 890        if token_provider is not None and model.api_token != "":
 891            raise ValueError(
 892                "If token_provider is set, api_token is ignored and has to be set to empty string."
 893            )
 894
 895        request_option = (
 896            self._create_component_from_model(
 897                model.inject_into, config, parameters=model.parameters or {}
 898            )
 899            if model.inject_into
 900            else RequestOption(
 901                inject_into=RequestOptionType.header,
 902                field_name=model.header or "",
 903                parameters=model.parameters or {},
 904            )
 905        )
 906
 907        return ApiKeyAuthenticator(
 908            token_provider=(
 909                token_provider
 910                if token_provider is not None
 911                else InterpolatedStringTokenProvider(
 912                    api_token=model.api_token or "",
 913                    config=config,
 914                    parameters=model.parameters or {},
 915                )
 916            ),
 917            request_option=request_option,
 918            config=config,
 919            parameters=model.parameters or {},
 920        )
 921
 922    def create_legacy_to_per_partition_state_migration(
 923        self,
 924        model: LegacyToPerPartitionStateMigrationModel,
 925        config: Mapping[str, Any],
 926        declarative_stream: DeclarativeStreamModel,
 927    ) -> LegacyToPerPartitionStateMigration:
 928        retriever = declarative_stream.retriever
 929        if not isinstance(retriever, SimpleRetrieverModel):
 930            raise ValueError(
 931                f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}"
 932            )
 933        partition_router = retriever.partition_router
 934        if not isinstance(
 935            partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel)
 936        ):
 937            raise ValueError(
 938                f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}"
 939            )
 940        if not hasattr(partition_router, "parent_stream_configs"):
 941            raise ValueError(
 942                "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration."
 943            )
 944
 945        if not hasattr(declarative_stream, "incremental_sync"):
 946            raise ValueError(
 947                "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration."
 948            )
 949
 950        return LegacyToPerPartitionStateMigration(
 951            partition_router,  # type: ignore # was already checked above
 952            declarative_stream.incremental_sync,  # type: ignore # was already checked. Migration can be applied only to incremental streams.
 953            config,
 954            declarative_stream.parameters,  # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
 955        )
 956
 957    def create_session_token_authenticator(
 958        self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any
 959    ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]:
 960        decoder = (
 961            self._create_component_from_model(model=model.decoder, config=config)
 962            if model.decoder
 963            else JsonDecoder(parameters={})
 964        )
 965        login_requester = self._create_component_from_model(
 966            model=model.login_requester,
 967            config=config,
 968            name=f"{name}_login_requester",
 969            decoder=decoder,
 970        )
 971        token_provider = SessionTokenProvider(
 972            login_requester=login_requester,
 973            session_token_path=model.session_token_path,
 974            expiration_duration=parse_duration(model.expiration_duration)
 975            if model.expiration_duration
 976            else None,
 977            parameters=model.parameters or {},
 978            message_repository=self._message_repository,
 979            decoder=decoder,
 980        )
 981        if model.request_authentication.type == "Bearer":
 982            return ModelToComponentFactory.create_bearer_authenticator(
 983                BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""),  # type: ignore # $parameters has a default value
 984                config,
 985                token_provider=token_provider,
 986            )
 987        else:
 988            return self.create_api_key_authenticator(
 989                ApiKeyAuthenticatorModel(
 990                    type="ApiKeyAuthenticator",
 991                    api_token="",
 992                    inject_into=model.request_authentication.inject_into,
 993                ),  # type: ignore # $parameters and headers default to None
 994                config=config,
 995                token_provider=token_provider,
 996            )
 997
 998    @staticmethod
 999    def create_basic_http_authenticator(
1000        model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any
1001    ) -> BasicHttpAuthenticator:
1002        return BasicHttpAuthenticator(
1003            password=model.password or "",
1004            username=model.username,
1005            config=config,
1006            parameters=model.parameters or {},
1007        )
1008
1009    @staticmethod
1010    def create_bearer_authenticator(
1011        model: BearerAuthenticatorModel,
1012        config: Config,
1013        token_provider: Optional[TokenProvider] = None,
1014        **kwargs: Any,
1015    ) -> BearerAuthenticator:
1016        if token_provider is not None and model.api_token != "":
1017            raise ValueError(
1018                "If token_provider is set, api_token is ignored and has to be set to empty string."
1019            )
1020        return BearerAuthenticator(
1021            token_provider=(
1022                token_provider
1023                if token_provider is not None
1024                else InterpolatedStringTokenProvider(
1025                    api_token=model.api_token or "",
1026                    config=config,
1027                    parameters=model.parameters or {},
1028                )
1029            ),
1030            config=config,
1031            parameters=model.parameters or {},
1032        )
1033
1034    @staticmethod
1035    def create_dynamic_stream_check_config(
1036        model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any
1037    ) -> DynamicStreamCheckConfig:
1038        return DynamicStreamCheckConfig(
1039            dynamic_stream_name=model.dynamic_stream_name,
1040            stream_count=model.stream_count or 0,
1041        )
1042
1043    def create_check_stream(
1044        self, model: CheckStreamModel, config: Config, **kwargs: Any
1045    ) -> CheckStream:
1046        if model.dynamic_streams_check_configs is None and model.stream_names is None:
1047            raise ValueError(
1048                "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream"
1049            )
1050
1051        dynamic_streams_check_configs = (
1052            [
1053                self._create_component_from_model(model=dynamic_stream_check_config, config=config)
1054                for dynamic_stream_check_config in model.dynamic_streams_check_configs
1055            ]
1056            if model.dynamic_streams_check_configs
1057            else []
1058        )
1059
1060        return CheckStream(
1061            stream_names=model.stream_names or [],
1062            dynamic_streams_check_configs=dynamic_streams_check_configs,
1063            parameters={},
1064        )
1065
1066    @staticmethod
1067    def create_check_dynamic_stream(
1068        model: CheckDynamicStreamModel, config: Config, **kwargs: Any
1069    ) -> CheckDynamicStream:
1070        assert model.use_check_availability is not None  # for mypy
1071
1072        use_check_availability = model.use_check_availability
1073
1074        return CheckDynamicStream(
1075            stream_count=model.stream_count,
1076            use_check_availability=use_check_availability,
1077            parameters={},
1078        )
1079
1080    def create_composite_error_handler(
1081        self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
1082    ) -> CompositeErrorHandler:
1083        error_handlers = [
1084            self._create_component_from_model(model=error_handler_model, config=config)
1085            for error_handler_model in model.error_handlers
1086        ]
1087        return CompositeErrorHandler(
1088            error_handlers=error_handlers, parameters=model.parameters or {}
1089        )
1090
1091    @staticmethod
1092    def create_concurrency_level(
1093        model: ConcurrencyLevelModel, config: Config, **kwargs: Any
1094    ) -> ConcurrencyLevel:
1095        return ConcurrencyLevel(
1096            default_concurrency=model.default_concurrency,
1097            max_concurrency=model.max_concurrency,
1098            config=config,
1099            parameters={},
1100        )
1101
1102    @staticmethod
1103    def apply_stream_state_migrations(
1104        stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any]
1105    ) -> MutableMapping[str, Any]:
1106        if stream_state_migrations:
1107            for state_migration in stream_state_migrations:
1108                if state_migration.should_migrate(stream_state):
1109                    # The state variable is expected to be mutable but the migrate method returns an immutable mapping.
1110                    stream_state = dict(state_migration.migrate(stream_state))
1111        return stream_state
1112
1113    def create_concurrent_cursor_from_datetime_based_cursor(
1114        self,
1115        model_type: Type[BaseModel],
1116        component_definition: ComponentDefinition,
1117        stream_name: str,
1118        stream_namespace: Optional[str],
1119        config: Config,
1120        message_repository: Optional[MessageRepository] = None,
1121        runtime_lookback_window: Optional[datetime.timedelta] = None,
1122        stream_state_migrations: Optional[List[Any]] = None,
1123        **kwargs: Any,
1124    ) -> ConcurrentCursor:
1125        # Per-partition incremental streams can dynamically create child cursors which will pass their current
1126        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1127        # incoming state and connector_state_manager that is initialized when the component factory is created
1128        stream_state = (
1129            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1130            if "stream_state" not in kwargs
1131            else kwargs["stream_state"]
1132        )
1133        stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1134
1135        component_type = component_definition.get("type")
1136        if component_definition.get("type") != model_type.__name__:
1137            raise ValueError(
1138                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1139            )
1140
1141        datetime_based_cursor_model = model_type.parse_obj(component_definition)
1142
1143        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1144            raise ValueError(
1145                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1146            )
1147
1148        interpolated_cursor_field = InterpolatedString.create(
1149            datetime_based_cursor_model.cursor_field,
1150            parameters=datetime_based_cursor_model.parameters or {},
1151        )
1152        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1153
1154        interpolated_partition_field_start = InterpolatedString.create(
1155            datetime_based_cursor_model.partition_field_start or "start_time",
1156            parameters=datetime_based_cursor_model.parameters or {},
1157        )
1158        interpolated_partition_field_end = InterpolatedString.create(
1159            datetime_based_cursor_model.partition_field_end or "end_time",
1160            parameters=datetime_based_cursor_model.parameters or {},
1161        )
1162
1163        slice_boundary_fields = (
1164            interpolated_partition_field_start.eval(config=config),
1165            interpolated_partition_field_end.eval(config=config),
1166        )
1167
1168        datetime_format = datetime_based_cursor_model.datetime_format
1169
1170        cursor_granularity = (
1171            parse_duration(datetime_based_cursor_model.cursor_granularity)
1172            if datetime_based_cursor_model.cursor_granularity
1173            else None
1174        )
1175
1176        lookback_window = None
1177        interpolated_lookback_window = (
1178            InterpolatedString.create(
1179                datetime_based_cursor_model.lookback_window,
1180                parameters=datetime_based_cursor_model.parameters or {},
1181            )
1182            if datetime_based_cursor_model.lookback_window
1183            else None
1184        )
1185        if interpolated_lookback_window:
1186            evaluated_lookback_window = interpolated_lookback_window.eval(config=config)
1187            if evaluated_lookback_window:
1188                lookback_window = parse_duration(evaluated_lookback_window)
1189
1190        connector_state_converter: DateTimeStreamStateConverter
1191        connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1192            datetime_format=datetime_format,
1193            input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1194            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1195            cursor_granularity=cursor_granularity,
1196        )
1197
1198        # Adjusts the stream state by applying the runtime lookback window.
1199        # This is used to ensure correct state handling in case of failed partitions.
1200        stream_state_value = stream_state.get(cursor_field.cursor_field_key)
1201        if runtime_lookback_window and stream_state_value:
1202            new_stream_state = (
1203                connector_state_converter.parse_timestamp(stream_state_value)
1204                - runtime_lookback_window
1205            )
1206            stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
1207                new_stream_state
1208            )
1209
1210        start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
1211        if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
1212            start_date_runtime_value = self.create_min_max_datetime(
1213                model=datetime_based_cursor_model.start_datetime, config=config
1214            )
1215        else:
1216            start_date_runtime_value = datetime_based_cursor_model.start_datetime
1217
1218        end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]]
1219        if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel):
1220            end_date_runtime_value = self.create_min_max_datetime(
1221                model=datetime_based_cursor_model.end_datetime, config=config
1222            )
1223        else:
1224            end_date_runtime_value = datetime_based_cursor_model.end_datetime
1225
1226        interpolated_start_date = MinMaxDatetime.create(
1227            interpolated_string_or_min_max_datetime=start_date_runtime_value,
1228            parameters=datetime_based_cursor_model.parameters,
1229        )
1230        interpolated_end_date = (
1231            None
1232            if not end_date_runtime_value
1233            else MinMaxDatetime.create(
1234                end_date_runtime_value, datetime_based_cursor_model.parameters
1235            )
1236        )
1237
1238        # If datetime format is not specified then start/end datetime should inherit it from the stream slicer
1239        if not interpolated_start_date.datetime_format:
1240            interpolated_start_date.datetime_format = datetime_format
1241        if interpolated_end_date and not interpolated_end_date.datetime_format:
1242            interpolated_end_date.datetime_format = datetime_format
1243
1244        start_date = interpolated_start_date.get_datetime(config=config)
1245        end_date_provider = (
1246            partial(interpolated_end_date.get_datetime, config)
1247            if interpolated_end_date
1248            else connector_state_converter.get_end_provider()
1249        )
1250
1251        if (
1252            datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity
1253        ) or (
1254            not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity
1255        ):
1256            raise ValueError(
1257                f"If step is defined, cursor_granularity should be as well and vice-versa. "
1258                f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`"
1259            )
1260
1261        # When step is not defined, default to a step size from the starting date to the present moment
1262        step_length = datetime.timedelta.max
1263        interpolated_step = (
1264            InterpolatedString.create(
1265                datetime_based_cursor_model.step,
1266                parameters=datetime_based_cursor_model.parameters or {},
1267            )
1268            if datetime_based_cursor_model.step
1269            else None
1270        )
1271        if interpolated_step:
1272            evaluated_step = interpolated_step.eval(config)
1273            if evaluated_step:
1274                step_length = parse_duration(evaluated_step)
1275
1276        clamping_strategy: ClampingStrategy = NoClamping()
1277        if datetime_based_cursor_model.clamping:
1278            # While it is undesirable to interpolate within the model factory (as opposed to at runtime),
1279            # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
1280            # object which we want to keep agnostic of being low-code
1281            target = InterpolatedString(
1282                string=datetime_based_cursor_model.clamping.target,
1283                parameters=datetime_based_cursor_model.parameters or {},
1284            )
1285            evaluated_target = target.eval(config=config)
1286            match evaluated_target:
1287                case "DAY":
1288                    clamping_strategy = DayClampingStrategy()
1289                    end_date_provider = ClampingEndProvider(
1290                        DayClampingStrategy(is_ceiling=False),
1291                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1292                        granularity=cursor_granularity or datetime.timedelta(seconds=1),
1293                    )
1294                case "WEEK":
1295                    if (
1296                        not datetime_based_cursor_model.clamping.target_details
1297                        or "weekday" not in datetime_based_cursor_model.clamping.target_details
1298                    ):
1299                        raise ValueError(
1300                            "Given WEEK clamping, weekday needs to be provided as target_details"
1301                        )
1302                    weekday = self._assemble_weekday(
1303                        datetime_based_cursor_model.clamping.target_details["weekday"]
1304                    )
1305                    clamping_strategy = WeekClampingStrategy(weekday)
1306                    end_date_provider = ClampingEndProvider(
1307                        WeekClampingStrategy(weekday, is_ceiling=False),
1308                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1309                        granularity=cursor_granularity or datetime.timedelta(days=1),
1310                    )
1311                case "MONTH":
1312                    clamping_strategy = MonthClampingStrategy()
1313                    end_date_provider = ClampingEndProvider(
1314                        MonthClampingStrategy(is_ceiling=False),
1315                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1316                        granularity=cursor_granularity or datetime.timedelta(days=1),
1317                    )
1318                case _:
1319                    raise ValueError(
1320                        f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
1321                    )
1322
1323        return ConcurrentCursor(
1324            stream_name=stream_name,
1325            stream_namespace=stream_namespace,
1326            stream_state=stream_state,
1327            message_repository=message_repository or self._message_repository,
1328            connector_state_manager=self._connector_state_manager,
1329            connector_state_converter=connector_state_converter,
1330            cursor_field=cursor_field,
1331            slice_boundary_fields=slice_boundary_fields,
1332            start=start_date,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1333            end_provider=end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1334            lookback_window=lookback_window,
1335            slice_range=step_length,
1336            cursor_granularity=cursor_granularity,
1337            clamping_strategy=clamping_strategy,
1338        )
1339
1340    def create_concurrent_cursor_from_incrementing_count_cursor(
1341        self,
1342        model_type: Type[BaseModel],
1343        component_definition: ComponentDefinition,
1344        stream_name: str,
1345        stream_namespace: Optional[str],
1346        config: Config,
1347        message_repository: Optional[MessageRepository] = None,
1348        **kwargs: Any,
1349    ) -> ConcurrentCursor:
1350        # Per-partition incremental streams can dynamically create child cursors which will pass their current
1351        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1352        # incoming state and connector_state_manager that is initialized when the component factory is created
1353        stream_state = (
1354            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1355            if "stream_state" not in kwargs
1356            else kwargs["stream_state"]
1357        )
1358
1359        component_type = component_definition.get("type")
1360        if component_definition.get("type") != model_type.__name__:
1361            raise ValueError(
1362                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1363            )
1364
1365        incrementing_count_cursor_model = model_type.parse_obj(component_definition)
1366
1367        if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
1368            raise ValueError(
1369                f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
1370            )
1371
1372        interpolated_start_value = (
1373            InterpolatedString.create(
1374                incrementing_count_cursor_model.start_value,  # type: ignore
1375                parameters=incrementing_count_cursor_model.parameters or {},
1376            )
1377            if incrementing_count_cursor_model.start_value
1378            else 0
1379        )
1380
1381        interpolated_cursor_field = InterpolatedString.create(
1382            incrementing_count_cursor_model.cursor_field,
1383            parameters=incrementing_count_cursor_model.parameters or {},
1384        )
1385        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1386
1387        connector_state_converter = IncrementingCountStreamStateConverter(
1388            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1389        )
1390
1391        return ConcurrentCursor(
1392            stream_name=stream_name,
1393            stream_namespace=stream_namespace,
1394            stream_state=stream_state,
1395            message_repository=message_repository or self._message_repository,
1396            connector_state_manager=self._connector_state_manager,
1397            connector_state_converter=connector_state_converter,
1398            cursor_field=cursor_field,
1399            slice_boundary_fields=None,
1400            start=interpolated_start_value,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1401            end_provider=connector_state_converter.get_end_provider(),  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1402        )
1403
1404    def _assemble_weekday(self, weekday: str) -> Weekday:
1405        match weekday:
1406            case "MONDAY":
1407                return Weekday.MONDAY
1408            case "TUESDAY":
1409                return Weekday.TUESDAY
1410            case "WEDNESDAY":
1411                return Weekday.WEDNESDAY
1412            case "THURSDAY":
1413                return Weekday.THURSDAY
1414            case "FRIDAY":
1415                return Weekday.FRIDAY
1416            case "SATURDAY":
1417                return Weekday.SATURDAY
1418            case "SUNDAY":
1419                return Weekday.SUNDAY
1420            case _:
1421                raise ValueError(f"Unknown weekday {weekday}")
1422
1423    def create_concurrent_cursor_from_perpartition_cursor(
1424        self,
1425        state_manager: ConnectorStateManager,
1426        model_type: Type[BaseModel],
1427        component_definition: ComponentDefinition,
1428        stream_name: str,
1429        stream_namespace: Optional[str],
1430        config: Config,
1431        stream_state: MutableMapping[str, Any],
1432        partition_router: PartitionRouter,
1433        stream_state_migrations: Optional[List[Any]] = None,
1434        **kwargs: Any,
1435    ) -> ConcurrentPerPartitionCursor:
1436        component_type = component_definition.get("type")
1437        if component_definition.get("type") != model_type.__name__:
1438            raise ValueError(
1439                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1440            )
1441
1442        datetime_based_cursor_model = model_type.parse_obj(component_definition)
1443
1444        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1445            raise ValueError(
1446                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1447            )
1448
1449        interpolated_cursor_field = InterpolatedString.create(
1450            datetime_based_cursor_model.cursor_field,
1451            parameters=datetime_based_cursor_model.parameters or {},
1452        )
1453        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1454
1455        datetime_format = datetime_based_cursor_model.datetime_format
1456
1457        cursor_granularity = (
1458            parse_duration(datetime_based_cursor_model.cursor_granularity)
1459            if datetime_based_cursor_model.cursor_granularity
1460            else None
1461        )
1462
1463        connector_state_converter: DateTimeStreamStateConverter
1464        connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1465            datetime_format=datetime_format,
1466            input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1467            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1468            cursor_granularity=cursor_granularity,
1469        )
1470
1471        # Create the cursor factory
1472        cursor_factory = ConcurrentCursorFactory(
1473            partial(
1474                self.create_concurrent_cursor_from_datetime_based_cursor,
1475                state_manager=state_manager,
1476                model_type=model_type,
1477                component_definition=component_definition,
1478                stream_name=stream_name,
1479                stream_namespace=stream_namespace,
1480                config=config,
1481                message_repository=NoopMessageRepository(),
1482                stream_state_migrations=stream_state_migrations,
1483            )
1484        )
1485        stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1486        # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
1487        use_global_cursor = isinstance(
1488            partition_router, GroupingPartitionRouter
1489        ) or component_definition.get("global_substream_cursor", False)
1490
1491        # Return the concurrent cursor and state converter
1492        return ConcurrentPerPartitionCursor(
1493            cursor_factory=cursor_factory,
1494            partition_router=partition_router,
1495            stream_name=stream_name,
1496            stream_namespace=stream_namespace,
1497            stream_state=stream_state,
1498            message_repository=self._message_repository,  # type: ignore
1499            connector_state_manager=state_manager,
1500            connector_state_converter=connector_state_converter,
1501            cursor_field=cursor_field,
1502            use_global_cursor=use_global_cursor,
1503        )
1504
1505    @staticmethod
1506    def create_constant_backoff_strategy(
1507        model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
1508    ) -> ConstantBackoffStrategy:
1509        return ConstantBackoffStrategy(
1510            backoff_time_in_seconds=model.backoff_time_in_seconds,
1511            config=config,
1512            parameters=model.parameters or {},
1513        )
1514
1515    def create_cursor_pagination(
1516        self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
1517    ) -> CursorPaginationStrategy:
1518        if isinstance(decoder, PaginationDecoderDecorator):
1519            inner_decoder = decoder.decoder
1520        else:
1521            inner_decoder = decoder
1522            decoder = PaginationDecoderDecorator(decoder=decoder)
1523
1524        if self._is_supported_decoder_for_pagination(inner_decoder):
1525            decoder_to_use = decoder
1526        else:
1527            raise ValueError(
1528                self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1529            )
1530
1531        return CursorPaginationStrategy(
1532            cursor_value=model.cursor_value,
1533            decoder=decoder_to_use,
1534            page_size=model.page_size,
1535            stop_condition=model.stop_condition,
1536            config=config,
1537            parameters=model.parameters or {},
1538        )
1539
1540    def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any:
1541        """
1542        Generically creates a custom component based on the model type and a class_name reference to the custom Python class being
1543        instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
1544        :param model: The Pydantic model of the custom component being created
1545        :param config: The custom defined connector config
1546        :return: The declarative component built from the Pydantic model to be used at runtime
1547        """
1548        custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
1549        component_fields = get_type_hints(custom_component_class)
1550        model_args = model.dict()
1551        model_args["config"] = config
1552
1553        # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions
1554        # we defer to these arguments over the component's definition
1555        for key, arg in kwargs.items():
1556            model_args[key] = arg
1557
1558        # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not
1559        # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to
1560        # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components
1561        for model_field, model_value in model_args.items():
1562            # If a custom component field doesn't have a type set, we try to use the type hints to infer the type
1563            if (
1564                isinstance(model_value, dict)
1565                and "type" not in model_value
1566                and model_field in component_fields
1567            ):
1568                derived_type = self._derive_component_type_from_type_hints(
1569                    component_fields.get(model_field)
1570                )
1571                if derived_type:
1572                    model_value["type"] = derived_type
1573
1574            if self._is_component(model_value):
1575                model_args[model_field] = self._create_nested_component(
1576                    model, model_field, model_value, config
1577                )
1578            elif isinstance(model_value, list):
1579                vals = []
1580                for v in model_value:
1581                    if isinstance(v, dict) and "type" not in v and model_field in component_fields:
1582                        derived_type = self._derive_component_type_from_type_hints(
1583                            component_fields.get(model_field)
1584                        )
1585                        if derived_type:
1586                            v["type"] = derived_type
1587                    if self._is_component(v):
1588                        vals.append(self._create_nested_component(model, model_field, v, config))
1589                    else:
1590                        vals.append(v)
1591                model_args[model_field] = vals
1592
1593        kwargs = {
1594            class_field: model_args[class_field]
1595            for class_field in component_fields.keys()
1596            if class_field in model_args
1597        }
1598        return custom_component_class(**kwargs)
1599
1600    @staticmethod
1601    def _get_class_from_fully_qualified_class_name(
1602        full_qualified_class_name: str,
1603    ) -> Any:
1604        """Get a class from its fully qualified name.
1605
1606        If a custom components module is needed, we assume it is already registered - probably
1607        as `source_declarative_manifest.components` or `components`.
1608
1609        Args:
1610            full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
1611
1612        Returns:
1613            Any: The class object.
1614
1615        Raises:
1616            ValueError: If the class cannot be loaded.
1617        """
1618        split = full_qualified_class_name.split(".")
1619        module_name_full = ".".join(split[:-1])
1620        class_name = split[-1]
1621
1622        try:
1623            module_ref = importlib.import_module(module_name_full)
1624        except ModuleNotFoundError as e:
1625            if split[0] == "source_declarative_manifest":
1626                # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append
1627                try:
1628                    import os
1629
1630                    module_name_with_source_declarative_manifest = ".".join(split[1:-1])
1631                    module_ref = importlib.import_module(
1632                        module_name_with_source_declarative_manifest
1633                    )
1634                except ModuleNotFoundError:
1635                    raise ValueError(f"Could not load module `{module_name_full}`.") from e
1636            else:
1637                raise ValueError(f"Could not load module `{module_name_full}`.") from e
1638
1639        try:
1640            return getattr(module_ref, class_name)
1641        except AttributeError as e:
1642            raise ValueError(
1643                f"Could not load class `{class_name}` from module `{module_name_full}`.",
1644            ) from e
1645
1646    @staticmethod
1647    def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
1648        interface = field_type
1649        while True:
1650            origin = get_origin(interface)
1651            if origin:
1652                # Unnest types until we reach the raw type
1653                # List[T] -> T
1654                # Optional[List[T]] -> T
1655                args = get_args(interface)
1656                interface = args[0]
1657            else:
1658                break
1659        if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface):
1660            return interface.__name__
1661        return None
1662
1663    @staticmethod
1664    def is_builtin_type(cls: Optional[Type[Any]]) -> bool:
1665        if not cls:
1666            return False
1667        return cls.__module__ == "builtins"
1668
1669    @staticmethod
1670    def _extract_missing_parameters(error: TypeError) -> List[str]:
1671        parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error))
1672        if parameter_search:
1673            return re.findall(r"\'(.+?)\'", parameter_search.group(1))
1674        else:
1675            return []
1676
1677    def _create_nested_component(
1678        self, model: Any, model_field: str, model_value: Any, config: Config
1679    ) -> Any:
1680        type_name = model_value.get("type", None)
1681        if not type_name:
1682            # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent
1683            return model_value
1684
1685        model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None)
1686        if model_type:
1687            parsed_model = model_type.parse_obj(model_value)
1688            try:
1689                # To improve usability of the language, certain fields are shared between components. This can come in the form of
1690                # a parent component passing some of its fields to a child component or the parent extracting fields from other child
1691                # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base
1692                # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created
1693                # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that
1694                # are needed by a component and could not be shared.
1695                model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__)
1696                constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs
1697                model_parameters = model_value.get("$parameters", {})
1698                matching_parameters = {
1699                    kwarg: model_parameters[kwarg]
1700                    for kwarg in constructor_kwargs
1701                    if kwarg in model_parameters
1702                }
1703                return self._create_component_from_model(
1704                    model=parsed_model, config=config, **matching_parameters
1705                )
1706            except TypeError as error:
1707                missing_parameters = self._extract_missing_parameters(error)
1708                if missing_parameters:
1709                    raise ValueError(
1710                        f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide "
1711                        + ", ".join(
1712                            (
1713                                f"{type_name}.$parameters.{parameter}"
1714                                for parameter in missing_parameters
1715                            )
1716                        )
1717                    )
1718                raise TypeError(
1719                    f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}"
1720                )
1721        else:
1722            raise ValueError(
1723                f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'"
1724            )
1725
1726    @staticmethod
1727    def _is_component(model_value: Any) -> bool:
1728        return isinstance(model_value, dict) and model_value.get("type") is not None
1729
1730    def create_datetime_based_cursor(
1731        self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any
1732    ) -> DatetimeBasedCursor:
1733        start_datetime: Union[str, MinMaxDatetime] = (
1734            model.start_datetime
1735            if isinstance(model.start_datetime, str)
1736            else self.create_min_max_datetime(model.start_datetime, config)
1737        )
1738        end_datetime: Union[str, MinMaxDatetime, None] = None
1739        if model.is_data_feed and model.end_datetime:
1740            raise ValueError("Data feed does not support end_datetime")
1741        if model.is_data_feed and model.is_client_side_incremental:
1742            raise ValueError(
1743                "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them."
1744            )
1745        if model.end_datetime:
1746            end_datetime = (
1747                model.end_datetime
1748                if isinstance(model.end_datetime, str)
1749                else self.create_min_max_datetime(model.end_datetime, config)
1750            )
1751
1752        end_time_option = (
1753            self._create_component_from_model(
1754                model.end_time_option, config, parameters=model.parameters or {}
1755            )
1756            if model.end_time_option
1757            else None
1758        )
1759        start_time_option = (
1760            self._create_component_from_model(
1761                model.start_time_option, config, parameters=model.parameters or {}
1762            )
1763            if model.start_time_option
1764            else None
1765        )
1766
1767        return DatetimeBasedCursor(
1768            cursor_field=model.cursor_field,
1769            cursor_datetime_formats=model.cursor_datetime_formats
1770            if model.cursor_datetime_formats
1771            else [],
1772            cursor_granularity=model.cursor_granularity,
1773            datetime_format=model.datetime_format,
1774            end_datetime=end_datetime,
1775            start_datetime=start_datetime,
1776            step=model.step,
1777            end_time_option=end_time_option,
1778            lookback_window=model.lookback_window,
1779            start_time_option=start_time_option,
1780            partition_field_end=model.partition_field_end,
1781            partition_field_start=model.partition_field_start,
1782            message_repository=self._message_repository,
1783            is_compare_strictly=model.is_compare_strictly,
1784            config=config,
1785            parameters=model.parameters or {},
1786        )
1787
1788    def create_declarative_stream(
1789        self, model: DeclarativeStreamModel, config: Config, **kwargs: Any
1790    ) -> DeclarativeStream:
1791        # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
1792        # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
1793        # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
1794        # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one.
1795        combined_slicers = self._merge_stream_slicers(model=model, config=config)
1796
1797        primary_key = model.primary_key.__root__ if model.primary_key else None
1798        stop_condition_on_cursor = (
1799            model.incremental_sync
1800            and hasattr(model.incremental_sync, "is_data_feed")
1801            and model.incremental_sync.is_data_feed
1802        )
1803        client_side_incremental_sync = None
1804        if (
1805            model.incremental_sync
1806            and hasattr(model.incremental_sync, "is_client_side_incremental")
1807            and model.incremental_sync.is_client_side_incremental
1808        ):
1809            supported_slicers = (
1810                DatetimeBasedCursor,
1811                GlobalSubstreamCursor,
1812                PerPartitionWithGlobalCursor,
1813            )
1814            if combined_slicers and not isinstance(combined_slicers, supported_slicers):
1815                raise ValueError(
1816                    "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1817                )
1818            cursor = (
1819                combined_slicers
1820                if isinstance(
1821                    combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1822                )
1823                else self._create_component_from_model(model=model.incremental_sync, config=config)
1824            )
1825
1826            client_side_incremental_sync = {"cursor": cursor}
1827
1828        if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1829            cursor_model = model.incremental_sync
1830
1831            end_time_option = (
1832                self._create_component_from_model(
1833                    cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
1834                )
1835                if cursor_model.end_time_option
1836                else None
1837            )
1838            start_time_option = (
1839                self._create_component_from_model(
1840                    cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
1841                )
1842                if cursor_model.start_time_option
1843                else None
1844            )
1845
1846            request_options_provider = DatetimeBasedRequestOptionsProvider(
1847                start_time_option=start_time_option,
1848                end_time_option=end_time_option,
1849                partition_field_start=cursor_model.partition_field_end,
1850                partition_field_end=cursor_model.partition_field_end,
1851                config=config,
1852                parameters=model.parameters or {},
1853            )
1854        elif model.incremental_sync and isinstance(
1855            model.incremental_sync, IncrementingCountCursorModel
1856        ):
1857            cursor_model: IncrementingCountCursorModel = model.incremental_sync  # type: ignore
1858
1859            start_time_option = (
1860                self._create_component_from_model(
1861                    cursor_model.start_value_option,  # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1862                    config,
1863                    parameters=cursor_model.parameters or {},
1864                )
1865                if cursor_model.start_value_option  # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1866                else None
1867            )
1868
1869            # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
1870            # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
1871            partition_field_start = "start"
1872
1873            request_options_provider = DatetimeBasedRequestOptionsProvider(
1874                start_time_option=start_time_option,
1875                partition_field_start=partition_field_start,
1876                config=config,
1877                parameters=model.parameters or {},
1878            )
1879        else:
1880            request_options_provider = None
1881
1882        transformations = []
1883        if model.transformations:
1884            for transformation_model in model.transformations:
1885                transformations.append(
1886                    self._create_component_from_model(model=transformation_model, config=config)
1887                )
1888        file_uploader = None
1889        if model.file_uploader:
1890            file_uploader = self._create_component_from_model(
1891                model=model.file_uploader, config=config
1892            )
1893
1894        retriever = self._create_component_from_model(
1895            model=model.retriever,
1896            config=config,
1897            name=model.name,
1898            primary_key=primary_key,
1899            stream_slicer=combined_slicers,
1900            request_options_provider=request_options_provider,
1901            stop_condition_on_cursor=stop_condition_on_cursor,
1902            client_side_incremental_sync=client_side_incremental_sync,
1903            transformations=transformations,
1904            file_uploader=file_uploader,
1905            incremental_sync=model.incremental_sync,
1906        )
1907        cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
1908
1909        if model.state_migrations:
1910            state_transformations = [
1911                self._create_component_from_model(state_migration, config, declarative_stream=model)
1912                for state_migration in model.state_migrations
1913            ]
1914        else:
1915            state_transformations = []
1916
1917        schema_loader: Union[
1918            CompositeSchemaLoader,
1919            DefaultSchemaLoader,
1920            DynamicSchemaLoader,
1921            InlineSchemaLoader,
1922            JsonFileSchemaLoader,
1923        ]
1924        if model.schema_loader and isinstance(model.schema_loader, list):
1925            nested_schema_loaders = [
1926                self._create_component_from_model(model=nested_schema_loader, config=config)
1927                for nested_schema_loader in model.schema_loader
1928            ]
1929            schema_loader = CompositeSchemaLoader(
1930                schema_loaders=nested_schema_loaders, parameters={}
1931            )
1932        elif model.schema_loader:
1933            schema_loader = self._create_component_from_model(
1934                model=model.schema_loader,  # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models
1935                config=config,
1936            )
1937        else:
1938            options = model.parameters or {}
1939            if "name" not in options:
1940                options["name"] = model.name
1941            schema_loader = DefaultSchemaLoader(config=config, parameters=options)
1942
1943        return DeclarativeStream(
1944            name=model.name or "",
1945            primary_key=primary_key,
1946            retriever=retriever,
1947            schema_loader=schema_loader,
1948            stream_cursor_field=cursor_field or "",
1949            state_migrations=state_transformations,
1950            config=config,
1951            parameters=model.parameters or {},
1952        )
1953
1954    def _build_stream_slicer_from_partition_router(
1955        self,
1956        model: Union[
1957            AsyncRetrieverModel,
1958            CustomRetrieverModel,
1959            SimpleRetrieverModel,
1960        ],
1961        config: Config,
1962        stream_name: Optional[str] = None,
1963    ) -> Optional[PartitionRouter]:
1964        if (
1965            hasattr(model, "partition_router")
1966            and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
1967            and model.partition_router
1968        ):
1969            stream_slicer_model = model.partition_router
1970            if isinstance(stream_slicer_model, list):
1971                return CartesianProductStreamSlicer(
1972                    [
1973                        self._create_component_from_model(
1974                            model=slicer, config=config, stream_name=stream_name or ""
1975                        )
1976                        for slicer in stream_slicer_model
1977                    ],
1978                    parameters={},
1979                )
1980            else:
1981                return self._create_component_from_model(  # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
1982                    model=stream_slicer_model, config=config, stream_name=stream_name or ""
1983                )
1984        return None
1985
1986    def _build_incremental_cursor(
1987        self,
1988        model: DeclarativeStreamModel,
1989        stream_slicer: Optional[PartitionRouter],
1990        config: Config,
1991    ) -> Optional[StreamSlicer]:
1992        if model.incremental_sync and stream_slicer:
1993            if model.retriever.type == "AsyncRetriever":
1994                return self.create_concurrent_cursor_from_perpartition_cursor(  # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1995                    state_manager=self._connector_state_manager,
1996                    model_type=DatetimeBasedCursorModel,
1997                    component_definition=model.incremental_sync.__dict__,
1998                    stream_name=model.name or "",
1999                    stream_namespace=None,
2000                    config=config or {},
2001                    stream_state={},
2002                    partition_router=stream_slicer,
2003                )
2004
2005            incremental_sync_model = model.incremental_sync
2006            cursor_component = self._create_component_from_model(
2007                model=incremental_sync_model, config=config
2008            )
2009            is_global_cursor = (
2010                hasattr(incremental_sync_model, "global_substream_cursor")
2011                and incremental_sync_model.global_substream_cursor
2012            )
2013
2014            if is_global_cursor:
2015                return GlobalSubstreamCursor(
2016                    stream_cursor=cursor_component, partition_router=stream_slicer
2017                )
2018            return PerPartitionWithGlobalCursor(
2019                cursor_factory=CursorFactory(
2020                    lambda: self._create_component_from_model(
2021                        model=incremental_sync_model, config=config
2022                    ),
2023                ),
2024                partition_router=stream_slicer,
2025                stream_cursor=cursor_component,
2026            )
2027        elif model.incremental_sync:
2028            if model.retriever.type == "AsyncRetriever":
2029                return self.create_concurrent_cursor_from_datetime_based_cursor(  # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
2030                    model_type=DatetimeBasedCursorModel,
2031                    component_definition=model.incremental_sync.__dict__,
2032                    stream_name=model.name or "",
2033                    stream_namespace=None,
2034                    config=config or {},
2035                    stream_state_migrations=model.state_migrations,
2036                )
2037            return self._create_component_from_model(model=model.incremental_sync, config=config)  # type: ignore[no-any-return]  # Will be created Cursor as stream_slicer_model is model.incremental_sync
2038        return None
2039
2040    def _build_resumable_cursor(
2041        self,
2042        model: Union[
2043            AsyncRetrieverModel,
2044            CustomRetrieverModel,
2045            SimpleRetrieverModel,
2046        ],
2047        stream_slicer: Optional[PartitionRouter],
2048    ) -> Optional[StreamSlicer]:
2049        if hasattr(model, "paginator") and model.paginator and not stream_slicer:
2050            # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
2051            return ResumableFullRefreshCursor(parameters={})
2052        elif stream_slicer:
2053            # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
2054            return PerPartitionCursor(
2055                cursor_factory=CursorFactory(
2056                    create_function=partial(ChildPartitionResumableFullRefreshCursor, {})
2057                ),
2058                partition_router=stream_slicer,
2059            )
2060        return None
2061
2062    def _merge_stream_slicers(
2063        self, model: DeclarativeStreamModel, config: Config
2064    ) -> Optional[StreamSlicer]:
2065        retriever_model = model.retriever
2066
2067        stream_slicer = self._build_stream_slicer_from_partition_router(
2068            retriever_model, config, stream_name=model.name
2069        )
2070
2071        if retriever_model.type == "AsyncRetriever":
2072            is_not_datetime_cursor = (
2073                model.incremental_sync.type != "DatetimeBasedCursor"
2074                if model.incremental_sync
2075                else None
2076            )
2077            is_partition_router = (
2078                bool(retriever_model.partition_router) if model.incremental_sync else None
2079            )
2080
2081            if is_not_datetime_cursor:
2082                # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
2083                # support or unordered slices (for example, when we trigger reports for January and February, the report
2084                # in February can be completed first). Once we have support for custom concurrent cursor or have a new
2085                # implementation available in the CDK, we can enable more cursors here.
2086                raise ValueError(
2087                    "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
2088                )
2089
2090            if is_partition_router and not stream_slicer:
2091                # Note that this development is also done in parallel to the per partition development which once merged
2092                # we could support here by calling create_concurrent_cursor_from_perpartition_cursor
2093                raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
2094
2095        if model.incremental_sync:
2096            return self._build_incremental_cursor(model, stream_slicer, config)
2097
2098        return (
2099            stream_slicer
2100            if self._disable_resumable_full_refresh
2101            else self._build_resumable_cursor(retriever_model, stream_slicer)
2102        )
2103
2104    def create_default_error_handler(
2105        self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
2106    ) -> DefaultErrorHandler:
2107        backoff_strategies = []
2108        if model.backoff_strategies:
2109            for backoff_strategy_model in model.backoff_strategies:
2110                backoff_strategies.append(
2111                    self._create_component_from_model(model=backoff_strategy_model, config=config)
2112                )
2113
2114        response_filters = []
2115        if model.response_filters:
2116            for response_filter_model in model.response_filters:
2117                response_filters.append(
2118                    self._create_component_from_model(model=response_filter_model, config=config)
2119                )
2120        response_filters.append(
2121            HttpResponseFilter(config=config, parameters=model.parameters or {})
2122        )
2123
2124        return DefaultErrorHandler(
2125            backoff_strategies=backoff_strategies,
2126            max_retries=model.max_retries,
2127            response_filters=response_filters,
2128            config=config,
2129            parameters=model.parameters or {},
2130        )
2131
2132    def create_default_paginator(
2133        self,
2134        model: DefaultPaginatorModel,
2135        config: Config,
2136        *,
2137        url_base: str,
2138        extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None,
2139        decoder: Optional[Decoder] = None,
2140        cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
2141    ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
2142        if decoder:
2143            if self._is_supported_decoder_for_pagination(decoder):
2144                decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
2145            else:
2146                raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder)))
2147        else:
2148            decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
2149        page_size_option = (
2150            self._create_component_from_model(model=model.page_size_option, config=config)
2151            if model.page_size_option
2152            else None
2153        )
2154        page_token_option = (
2155            self._create_component_from_model(model=model.page_token_option, config=config)
2156            if model.page_token_option
2157            else None
2158        )
2159        pagination_strategy = self._create_component_from_model(
2160            model=model.pagination_strategy,
2161            config=config,
2162            decoder=decoder_to_use,
2163            extractor_model=extractor_model,
2164        )
2165        if cursor_used_for_stop_condition:
2166            pagination_strategy = StopConditionPaginationStrategyDecorator(
2167                pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition)
2168            )
2169        paginator = DefaultPaginator(
2170            decoder=decoder_to_use,
2171            page_size_option=page_size_option,
2172            page_token_option=page_token_option,
2173            pagination_strategy=pagination_strategy,
2174            url_base=url_base,
2175            config=config,
2176            parameters=model.parameters or {},
2177        )
2178        if self._limit_pages_fetched_per_slice:
2179            return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice)
2180        return paginator
2181
2182    def create_dpath_extractor(
2183        self,
2184        model: DpathExtractorModel,
2185        config: Config,
2186        decoder: Optional[Decoder] = None,
2187        **kwargs: Any,
2188    ) -> DpathExtractor:
2189        if decoder:
2190            decoder_to_use = decoder
2191        else:
2192            decoder_to_use = JsonDecoder(parameters={})
2193        model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
2194        return DpathExtractor(
2195            decoder=decoder_to_use,
2196            field_path=model_field_path,
2197            config=config,
2198            parameters=model.parameters or {},
2199        )
2200
2201    @staticmethod
2202    def create_response_to_file_extractor(
2203        model: ResponseToFileExtractorModel,
2204        **kwargs: Any,
2205    ) -> ResponseToFileExtractor:
2206        return ResponseToFileExtractor(parameters=model.parameters or {})
2207
2208    @staticmethod
2209    def create_exponential_backoff_strategy(
2210        model: ExponentialBackoffStrategyModel, config: Config
2211    ) -> ExponentialBackoffStrategy:
2212        return ExponentialBackoffStrategy(
2213            factor=model.factor or 5, parameters=model.parameters or {}, config=config
2214        )
2215
2216    @staticmethod
2217    def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey:
2218        return GroupByKey(model.key, config=config, parameters=model.parameters or {})
2219
2220    def create_http_requester(
2221        self,
2222        model: HttpRequesterModel,
2223        config: Config,
2224        decoder: Decoder = JsonDecoder(parameters={}),
2225        query_properties_key: Optional[str] = None,
2226        use_cache: Optional[bool] = None,
2227        *,
2228        name: str,
2229    ) -> HttpRequester:
2230        authenticator = (
2231            self._create_component_from_model(
2232                model=model.authenticator,
2233                config=config,
2234                url_base=model.url or model.url_base,
2235                name=name,
2236                decoder=decoder,
2237            )
2238            if model.authenticator
2239            else None
2240        )
2241        error_handler = (
2242            self._create_component_from_model(model=model.error_handler, config=config)
2243            if model.error_handler
2244            else DefaultErrorHandler(
2245                backoff_strategies=[],
2246                response_filters=[],
2247                config=config,
2248                parameters=model.parameters or {},
2249            )
2250        )
2251
2252        api_budget = self._api_budget
2253
2254        # Removes QueryProperties components from the interpolated mappings because it has been designed
2255        # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly
2256        # instead of through jinja interpolation
2257        request_parameters: Optional[Union[str, Mapping[str, str]]]
2258        if isinstance(model.request_parameters, Mapping):
2259            request_parameters = self._remove_query_properties(model.request_parameters)
2260        else:
2261            request_parameters = model.request_parameters
2262
2263        request_options_provider = InterpolatedRequestOptionsProvider(
2264            request_body=model.request_body,
2265            request_body_data=model.request_body_data,
2266            request_body_json=model.request_body_json,
2267            request_headers=model.request_headers,
2268            request_parameters=request_parameters,
2269            query_properties_key=query_properties_key,
2270            config=config,
2271            parameters=model.parameters or {},
2272        )
2273
2274        assert model.use_cache is not None  # for mypy
2275        assert model.http_method is not None  # for mypy
2276
2277        should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache
2278
2279        return HttpRequester(
2280            name=name,
2281            url=model.url,
2282            url_base=model.url_base,
2283            path=model.path,
2284            authenticator=authenticator,
2285            error_handler=error_handler,
2286            api_budget=api_budget,
2287            http_method=HttpMethod[model.http_method.value],
2288            request_options_provider=request_options_provider,
2289            config=config,
2290            disable_retries=self._disable_retries,
2291            parameters=model.parameters or {},
2292            message_repository=self._message_repository,
2293            use_cache=should_use_cache,
2294            decoder=decoder,
2295            stream_response=decoder.is_stream_response() if decoder else False,
2296        )
2297
2298    @staticmethod
2299    def create_http_response_filter(
2300        model: HttpResponseFilterModel, config: Config, **kwargs: Any
2301    ) -> HttpResponseFilter:
2302        if model.action:
2303            action = ResponseAction(model.action.value)
2304        else:
2305            action = None
2306
2307        failure_type = FailureType(model.failure_type.value) if model.failure_type else None
2308
2309        http_codes = (
2310            set(model.http_codes) if model.http_codes else set()
2311        )  # JSON schema notation has no set data type. The schema enforces an array of unique elements
2312
2313        return HttpResponseFilter(
2314            action=action,
2315            failure_type=failure_type,
2316            error_message=model.error_message or "",
2317            error_message_contains=model.error_message_contains or "",
2318            http_codes=http_codes,
2319            predicate=model.predicate or "",
2320            config=config,
2321            parameters=model.parameters or {},
2322        )
2323
2324    @staticmethod
2325    def create_inline_schema_loader(
2326        model: InlineSchemaLoaderModel, config: Config, **kwargs: Any
2327    ) -> InlineSchemaLoader:
2328        return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
2329
2330    def create_complex_field_type(
2331        self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
2332    ) -> ComplexFieldType:
2333        items = (
2334            self._create_component_from_model(model=model.items, config=config)
2335            if isinstance(model.items, ComplexFieldTypeModel)
2336            else model.items
2337        )
2338
2339        return ComplexFieldType(field_type=model.field_type, items=items)
2340
2341    def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
2342        target_type = (
2343            self._create_component_from_model(model=model.target_type, config=config)
2344            if isinstance(model.target_type, ComplexFieldTypeModel)
2345            else model.target_type
2346        )
2347
2348        return TypesMap(
2349            target_type=target_type,
2350            current_type=model.current_type,
2351            condition=model.condition if model.condition is not None else "True",
2352        )
2353
2354    def create_schema_type_identifier(
2355        self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
2356    ) -> SchemaTypeIdentifier:
2357        types_mapping = []
2358        if model.types_mapping:
2359            types_mapping.extend(
2360                [
2361                    self._create_component_from_model(types_map, config=config)
2362                    for types_map in model.types_mapping
2363                ]
2364            )
2365        model_schema_pointer: List[Union[InterpolatedString, str]] = (
2366            [x for x in model.schema_pointer] if model.schema_pointer else []
2367        )
2368        model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
2369        model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
2370            [x for x in model.type_pointer] if model.type_pointer else None
2371        )
2372
2373        return SchemaTypeIdentifier(
2374            schema_pointer=model_schema_pointer,
2375            key_pointer=model_key_pointer,
2376            type_pointer=model_type_pointer,
2377            types_mapping=types_mapping,
2378            parameters=model.parameters or {},
2379        )
2380
2381    def create_dynamic_schema_loader(
2382        self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
2383    ) -> DynamicSchemaLoader:
2384        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
2385        combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
2386
2387        schema_transformations = []
2388        if model.schema_transformations:
2389            for transformation_model in model.schema_transformations:
2390                schema_transformations.append(
2391                    self._create_component_from_model(model=transformation_model, config=config)
2392                )
2393
2394        retriever = self._create_component_from_model(
2395            model=model.retriever,
2396            config=config,
2397            name="dynamic_properties",
2398            primary_key=None,
2399            stream_slicer=combined_slicers,
2400            transformations=[],
2401            use_cache=True,
2402        )
2403        schema_type_identifier = self._create_component_from_model(
2404            model.schema_type_identifier, config=config, parameters=model.parameters or {}
2405        )
2406        return DynamicSchemaLoader(
2407            retriever=retriever,
2408            config=config,
2409            schema_transformations=schema_transformations,
2410            schema_type_identifier=schema_type_identifier,
2411            parameters=model.parameters or {},
2412        )
2413
2414    @staticmethod
2415    def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2416        return JsonDecoder(parameters={})
2417
2418    def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2419        return CompositeRawDecoder(
2420            parser=ModelToComponentFactory._get_parser(model, config),
2421            stream_response=False if self._emit_connector_builder_messages else True,
2422        )
2423
2424    def create_jsonl_decoder(
2425        self, model: JsonlDecoderModel, config: Config, **kwargs: Any
2426    ) -> Decoder:
2427        return CompositeRawDecoder(
2428            parser=ModelToComponentFactory._get_parser(model, config),
2429            stream_response=False if self._emit_connector_builder_messages else True,
2430        )
2431
2432    def create_gzip_decoder(
2433        self, model: GzipDecoderModel, config: Config, **kwargs: Any
2434    ) -> Decoder:
2435        _compressed_response_types = {
2436            "gzip",
2437            "x-gzip",
2438            "gzip, deflate",
2439            "x-gzip, deflate",
2440            "application/zip",
2441            "application/gzip",
2442            "application/x-gzip",
2443            "application/x-zip-compressed",
2444        }
2445
2446        gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config)  # type: ignore  # based on the model, we know this will be a GzipParser
2447
2448        if self._emit_connector_builder_messages:
2449            # This is very surprising but if the response is not streamed,
2450            # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw,
2451            # which uses urllib3 directly and does not uncompress the data.
2452            return CompositeRawDecoder(gzip_parser.inner_parser, False)
2453
2454        return CompositeRawDecoder.by_headers(
2455            [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
2456            stream_response=True,
2457            fallback_parser=gzip_parser.inner_parser,
2458        )
2459
2460    @staticmethod
2461    def create_incrementing_count_cursor(
2462        model: IncrementingCountCursorModel, config: Config, **kwargs: Any
2463    ) -> DatetimeBasedCursor:
2464        # This should not actually get used anywhere at runtime, but needed to add this to pass checks since
2465        # we still parse models into components. The issue is that there's no runtime implementation of a
2466        # IncrementingCountCursor.
2467        # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
2468        return DatetimeBasedCursor(
2469            cursor_field=model.cursor_field,
2470            datetime_format="%Y-%m-%d",
2471            start_datetime="2024-12-12",
2472            config=config,
2473            parameters={},
2474        )
2475
2476    @staticmethod
2477    def create_iterable_decoder(
2478        model: IterableDecoderModel, config: Config, **kwargs: Any
2479    ) -> IterableDecoder:
2480        return IterableDecoder(parameters={})
2481
2482    @staticmethod
2483    def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
2484        return XmlDecoder(parameters={})
2485
2486    def create_zipfile_decoder(
2487        self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
2488    ) -> ZipfileDecoder:
2489        return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config))
2490
2491    @staticmethod
2492    def _get_parser(model: BaseModel, config: Config) -> Parser:
2493        if isinstance(model, JsonDecoderModel):
2494            # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases
2495            return JsonParser()
2496        elif isinstance(model, JsonlDecoderModel):
2497            return JsonLineParser()
2498        elif isinstance(model, CsvDecoderModel):
2499            return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
2500        elif isinstance(model, GzipDecoderModel):
2501            return GzipParser(
2502                inner_parser=ModelToComponentFactory._get_parser(model.decoder, config)
2503            )
2504        elif isinstance(
2505            model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel)
2506        ):
2507            raise ValueError(f"Decoder type {model} does not have parser associated to it")
2508
2509        raise ValueError(f"Unknown decoder type {model}")
2510
2511    @staticmethod
2512    def create_json_file_schema_loader(
2513        model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
2514    ) -> JsonFileSchemaLoader:
2515        return JsonFileSchemaLoader(
2516            file_path=model.file_path or "", config=config, parameters=model.parameters or {}
2517        )
2518
2519    @staticmethod
2520    def create_jwt_authenticator(
2521        model: JwtAuthenticatorModel, config: Config, **kwargs: Any
2522    ) -> JwtAuthenticator:
2523        jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None)
2524        jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None)
2525        return JwtAuthenticator(
2526            config=config,
2527            parameters=model.parameters or {},
2528            algorithm=JwtAlgorithm(model.algorithm.value),
2529            secret_key=model.secret_key,
2530            base64_encode_secret_key=model.base64_encode_secret_key,
2531            token_duration=model.token_duration,
2532            header_prefix=model.header_prefix,
2533            kid=jwt_headers.kid,
2534            typ=jwt_headers.typ,
2535            cty=jwt_headers.cty,
2536            iss=jwt_payload.iss,
2537            sub=jwt_payload.sub,
2538            aud=jwt_payload.aud,
2539            additional_jwt_headers=model.additional_jwt_headers,
2540            additional_jwt_payload=model.additional_jwt_payload,
2541        )
2542
2543    def create_list_partition_router(
2544        self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
2545    ) -> ListPartitionRouter:
2546        request_option = (
2547            self._create_component_from_model(model.request_option, config)
2548            if model.request_option
2549            else None
2550        )
2551        return ListPartitionRouter(
2552            cursor_field=model.cursor_field,
2553            request_option=request_option,
2554            values=model.values,
2555            config=config,
2556            parameters=model.parameters or {},
2557        )
2558
2559    @staticmethod
2560    def create_min_max_datetime(
2561        model: MinMaxDatetimeModel, config: Config, **kwargs: Any
2562    ) -> MinMaxDatetime:
2563        return MinMaxDatetime(
2564            datetime=model.datetime,
2565            datetime_format=model.datetime_format or "",
2566            max_datetime=model.max_datetime or "",
2567            min_datetime=model.min_datetime or "",
2568            parameters=model.parameters or {},
2569        )
2570
2571    @staticmethod
2572    def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth:
2573        return NoAuth(parameters=model.parameters or {})
2574
2575    @staticmethod
2576    def create_no_pagination(
2577        model: NoPaginationModel, config: Config, **kwargs: Any
2578    ) -> NoPagination:
2579        return NoPagination(parameters={})
2580
2581    def create_oauth_authenticator(
2582        self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
2583    ) -> DeclarativeOauth2Authenticator:
2584        profile_assertion = (
2585            self._create_component_from_model(model.profile_assertion, config=config)
2586            if model.profile_assertion
2587            else None
2588        )
2589
2590        if model.refresh_token_updater:
2591            # ignore type error because fixing it would have a lot of dependencies, revisit later
2592            return DeclarativeSingleUseRefreshTokenOauth2Authenticator(  # type: ignore
2593                config,
2594                InterpolatedString.create(
2595                    model.token_refresh_endpoint,  # type: ignore
2596                    parameters=model.parameters or {},
2597                ).eval(config),
2598                access_token_name=InterpolatedString.create(
2599                    model.access_token_name or "access_token", parameters=model.parameters or {}
2600                ).eval(config),
2601                refresh_token_name=model.refresh_token_updater.refresh_token_name,
2602                expires_in_name=InterpolatedString.create(
2603                    model.expires_in_name or "expires_in", parameters=model.parameters or {}
2604                ).eval(config),
2605                client_id_name=InterpolatedString.create(
2606                    model.client_id_name or "client_id", parameters=model.parameters or {}
2607                ).eval(config),
2608                client_id=InterpolatedString.create(
2609                    model.client_id, parameters=model.parameters or {}
2610                ).eval(config)
2611                if model.client_id
2612                else model.client_id,
2613                client_secret_name=InterpolatedString.create(
2614                    model.client_secret_name or "client_secret", parameters=model.parameters or {}
2615                ).eval(config),
2616                client_secret=InterpolatedString.create(
2617                    model.client_secret, parameters=model.parameters or {}
2618                ).eval(config)
2619                if model.client_secret
2620                else model.client_secret,
2621                access_token_config_path=model.refresh_token_updater.access_token_config_path,
2622                refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
2623                token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
2624                grant_type_name=InterpolatedString.create(
2625                    model.grant_type_name or "grant_type", parameters=model.parameters or {}
2626                ).eval(config),
2627                grant_type=InterpolatedString.create(
2628                    model.grant_type or "refresh_token", parameters=model.parameters or {}
2629                ).eval(config),
2630                refresh_request_body=InterpolatedMapping(
2631                    model.refresh_request_body or {}, parameters=model.parameters or {}
2632                ).eval(config),
2633                refresh_request_headers=InterpolatedMapping(
2634                    model.refresh_request_headers or {}, parameters=model.parameters or {}
2635                ).eval(config),
2636                scopes=model.scopes,
2637                token_expiry_date_format=model.token_expiry_date_format,
2638                message_repository=self._message_repository,
2639                refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes,
2640                refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key,
2641                refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values,
2642            )
2643        # ignore type error because fixing it would have a lot of dependencies, revisit later
2644        return DeclarativeOauth2Authenticator(  # type: ignore
2645            access_token_name=model.access_token_name or "access_token",
2646            access_token_value=model.access_token_value,
2647            client_id_name=model.client_id_name or "client_id",
2648            client_id=model.client_id,
2649            client_secret_name=model.client_secret_name or "client_secret",
2650            client_secret=model.client_secret,
2651            expires_in_name=model.expires_in_name or "expires_in",
2652            grant_type_name=model.grant_type_name or "grant_type",
2653            grant_type=model.grant_type or "refresh_token",
2654            refresh_request_body=model.refresh_request_body,
2655            refresh_request_headers=model.refresh_request_headers,
2656            refresh_token_name=model.refresh_token_name or "refresh_token",
2657            refresh_token=model.refresh_token,
2658            scopes=model.scopes,
2659            token_expiry_date=model.token_expiry_date,
2660            token_expiry_date_format=model.token_expiry_date_format,
2661            token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
2662            token_refresh_endpoint=model.token_refresh_endpoint,
2663            config=config,
2664            parameters=model.parameters or {},
2665            message_repository=self._message_repository,
2666            profile_assertion=profile_assertion,
2667            use_profile_assertion=model.use_profile_assertion,
2668        )
2669
2670    def create_offset_increment(
2671        self,
2672        model: OffsetIncrementModel,
2673        config: Config,
2674        decoder: Decoder,
2675        extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None,
2676        **kwargs: Any,
2677    ) -> OffsetIncrement:
2678        if isinstance(decoder, PaginationDecoderDecorator):
2679            inner_decoder = decoder.decoder
2680        else:
2681            inner_decoder = decoder
2682            decoder = PaginationDecoderDecorator(decoder=decoder)
2683
2684        if self._is_supported_decoder_for_pagination(inner_decoder):
2685            decoder_to_use = decoder
2686        else:
2687            raise ValueError(
2688                self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
2689            )
2690
2691        # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever)
2692        # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the
2693        # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing
2694        # behavior by having two separate extractors with identical behavior since they use the same extractor model.
2695        # When we have more time to investigate we can look into reusing the same component.
2696        extractor = (
2697            self._create_component_from_model(
2698                model=extractor_model, config=config, decoder=decoder_to_use
2699            )
2700            if extractor_model
2701            else None
2702        )
2703
2704        return OffsetIncrement(
2705            page_size=model.page_size,
2706            config=config,
2707            decoder=decoder_to_use,
2708            extractor=extractor,
2709            inject_on_first_request=model.inject_on_first_request or False,
2710            parameters=model.parameters or {},
2711        )
2712
2713    @staticmethod
2714    def create_page_increment(
2715        model: PageIncrementModel, config: Config, **kwargs: Any
2716    ) -> PageIncrement:
2717        return PageIncrement(
2718            page_size=model.page_size,
2719            config=config,
2720            start_from_page=model.start_from_page or 0,
2721            inject_on_first_request=model.inject_on_first_request or False,
2722            parameters=model.parameters or {},
2723        )
2724
2725    def create_parent_stream_config(
2726        self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2727    ) -> ParentStreamConfig:
2728        declarative_stream = self._create_component_from_model(
2729            model.stream, config=config, **kwargs
2730        )
2731        request_option = (
2732            self._create_component_from_model(model.request_option, config=config)
2733            if model.request_option
2734            else None
2735        )
2736
2737        if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer):
2738            raise ValueError(
2739                "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed."
2740            )
2741
2742        model_lazy_read_pointer: List[Union[InterpolatedString, str]] = (
2743            [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else []
2744        )
2745
2746        return ParentStreamConfig(
2747            parent_key=model.parent_key,
2748            request_option=request_option,
2749            stream=declarative_stream,
2750            partition_field=model.partition_field,
2751            config=config,
2752            incremental_dependency=model.incremental_dependency or False,
2753            parameters=model.parameters or {},
2754            extra_fields=model.extra_fields,
2755            lazy_read_pointer=model_lazy_read_pointer,
2756        )
2757
2758    def create_properties_from_endpoint(
2759        self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any
2760    ) -> PropertiesFromEndpoint:
2761        retriever = self._create_component_from_model(
2762            model=model.retriever,
2763            config=config,
2764            name="dynamic_properties",
2765            primary_key=None,
2766            stream_slicer=None,
2767            transformations=[],
2768            use_cache=True,  # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different
2769        )
2770        return PropertiesFromEndpoint(
2771            property_field_path=model.property_field_path,
2772            retriever=retriever,
2773            config=config,
2774            parameters=model.parameters or {},
2775        )
2776
2777    def create_property_chunking(
2778        self, model: PropertyChunkingModel, config: Config, **kwargs: Any
2779    ) -> PropertyChunking:
2780        record_merge_strategy = (
2781            self._create_component_from_model(
2782                model=model.record_merge_strategy, config=config, **kwargs
2783            )
2784            if model.record_merge_strategy
2785            else None
2786        )
2787
2788        property_limit_type: PropertyLimitType
2789        match model.property_limit_type:
2790            case PropertyLimitTypeModel.property_count:
2791                property_limit_type = PropertyLimitType.property_count
2792            case PropertyLimitTypeModel.characters:
2793                property_limit_type = PropertyLimitType.characters
2794            case _:
2795                raise ValueError(f"Invalid PropertyLimitType {property_limit_type}")
2796
2797        return PropertyChunking(
2798            property_limit_type=property_limit_type,
2799            property_limit=model.property_limit,
2800            record_merge_strategy=record_merge_strategy,
2801            config=config,
2802            parameters=model.parameters or {},
2803        )
2804
2805    def create_query_properties(
2806        self, model: QueryPropertiesModel, config: Config, **kwargs: Any
2807    ) -> QueryProperties:
2808        if isinstance(model.property_list, list):
2809            property_list = model.property_list
2810        else:
2811            property_list = self._create_component_from_model(
2812                model=model.property_list, config=config, **kwargs
2813            )
2814
2815        property_chunking = (
2816            self._create_component_from_model(
2817                model=model.property_chunking, config=config, **kwargs
2818            )
2819            if model.property_chunking
2820            else None
2821        )
2822
2823        return QueryProperties(
2824            property_list=property_list,
2825            always_include_properties=model.always_include_properties,
2826            property_chunking=property_chunking,
2827            config=config,
2828            parameters=model.parameters or {},
2829        )
2830
2831    @staticmethod
2832    def create_record_filter(
2833        model: RecordFilterModel, config: Config, **kwargs: Any
2834    ) -> RecordFilter:
2835        return RecordFilter(
2836            condition=model.condition or "", config=config, parameters=model.parameters or {}
2837        )
2838
2839    @staticmethod
2840    def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath:
2841        return RequestPath(parameters={})
2842
2843    @staticmethod
2844    def create_request_option(
2845        model: RequestOptionModel, config: Config, **kwargs: Any
2846    ) -> RequestOption:
2847        inject_into = RequestOptionType(model.inject_into.value)
2848        field_path: Optional[List[Union[InterpolatedString, str]]] = (
2849            [
2850                InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
2851                for segment in model.field_path
2852            ]
2853            if model.field_path
2854            else None
2855        )
2856        field_name = (
2857            InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
2858            if model.field_name
2859            else None
2860        )
2861        return RequestOption(
2862            field_name=field_name,
2863            field_path=field_path,
2864            inject_into=inject_into,
2865            parameters=kwargs.get("parameters", {}),
2866        )
2867
2868    def create_record_selector(
2869        self,
2870        model: RecordSelectorModel,
2871        config: Config,
2872        *,
2873        name: str,
2874        transformations: List[RecordTransformation] | None = None,
2875        decoder: Decoder | None = None,
2876        client_side_incremental_sync: Dict[str, Any] | None = None,
2877        file_uploader: Optional[DefaultFileUploader] = None,
2878        **kwargs: Any,
2879    ) -> RecordSelector:
2880        extractor = self._create_component_from_model(
2881            model=model.extractor, decoder=decoder, config=config
2882        )
2883        record_filter = (
2884            self._create_component_from_model(model.record_filter, config=config)
2885            if model.record_filter
2886            else None
2887        )
2888
2889        transform_before_filtering = (
2890            False if model.transform_before_filtering is None else model.transform_before_filtering
2891        )
2892        if client_side_incremental_sync:
2893            record_filter = ClientSideIncrementalRecordFilterDecorator(
2894                config=config,
2895                parameters=model.parameters,
2896                condition=model.record_filter.condition
2897                if (model.record_filter and hasattr(model.record_filter, "condition"))
2898                else None,
2899                **client_side_incremental_sync,
2900            )
2901            transform_before_filtering = (
2902                True
2903                if model.transform_before_filtering is None
2904                else model.transform_before_filtering
2905            )
2906
2907        if model.schema_normalization is None:
2908            # default to no schema normalization if not set
2909            model.schema_normalization = SchemaNormalizationModel.None_
2910
2911        schema_normalization = (
2912            TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
2913            if isinstance(model.schema_normalization, SchemaNormalizationModel)
2914            else self._create_component_from_model(model.schema_normalization, config=config)  # type: ignore[arg-type] # custom normalization model expected here
2915        )
2916
2917        return RecordSelector(
2918            extractor=extractor,
2919            name=name,
2920            config=config,
2921            record_filter=record_filter,
2922            transformations=transformations or [],
2923            file_uploader=file_uploader,
2924            schema_normalization=schema_normalization,
2925            parameters=model.parameters or {},
2926            transform_before_filtering=transform_before_filtering,
2927        )
2928
2929    @staticmethod
2930    def create_remove_fields(
2931        model: RemoveFieldsModel, config: Config, **kwargs: Any
2932    ) -> RemoveFields:
2933        return RemoveFields(
2934            field_pointers=model.field_pointers, condition=model.condition or "", parameters={}
2935        )
2936
2937    def create_selective_authenticator(
2938        self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any
2939    ) -> DeclarativeAuthenticator:
2940        authenticators = {
2941            name: self._create_component_from_model(model=auth, config=config)
2942            for name, auth in model.authenticators.items()
2943        }
2944        # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error
2945        return SelectiveAuthenticator(  # type: ignore[abstract]
2946            config=config,
2947            authenticators=authenticators,
2948            authenticator_selection_path=model.authenticator_selection_path,
2949            **kwargs,
2950        )
2951
2952    @staticmethod
2953    def create_legacy_session_token_authenticator(
2954        model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any
2955    ) -> LegacySessionTokenAuthenticator:
2956        return LegacySessionTokenAuthenticator(
2957            api_url=url_base,
2958            header=model.header,
2959            login_url=model.login_url,
2960            password=model.password or "",
2961            session_token=model.session_token or "",
2962            session_token_response_key=model.session_token_response_key or "",
2963            username=model.username or "",
2964            validate_session_url=model.validate_session_url,
2965            config=config,
2966            parameters=model.parameters or {},
2967        )
2968
2969    def create_simple_retriever(
2970        self,
2971        model: SimpleRetrieverModel,
2972        config: Config,
2973        *,
2974        name: str,
2975        primary_key: Optional[Union[str, List[str], List[List[str]]]],
2976        stream_slicer: Optional[StreamSlicer],
2977        request_options_provider: Optional[RequestOptionsProvider] = None,
2978        stop_condition_on_cursor: bool = False,
2979        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
2980        transformations: List[RecordTransformation],
2981        file_uploader: Optional[DefaultFileUploader] = None,
2982        incremental_sync: Optional[
2983            Union[
2984                IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
2985            ]
2986        ] = None,
2987        use_cache: Optional[bool] = None,
2988        **kwargs: Any,
2989    ) -> SimpleRetriever:
2990        def _get_url() -> str:
2991            """
2992            Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever.
2993            This is needed because the URL is not set until the requester is created.
2994            """
2995
2996            _url = (
2997                model.requester.url
2998                if hasattr(model.requester, "url") and model.requester.url is not None
2999                else requester.get_url()
3000            )
3001            _url_base = (
3002                model.requester.url_base
3003                if hasattr(model.requester, "url_base") and model.requester.url_base is not None
3004                else requester.get_url_base()
3005            )
3006
3007            return _url or _url_base
3008
3009        decoder = (
3010            self._create_component_from_model(model=model.decoder, config=config)
3011            if model.decoder
3012            else JsonDecoder(parameters={})
3013        )
3014        record_selector = self._create_component_from_model(
3015            model=model.record_selector,
3016            name=name,
3017            config=config,
3018            decoder=decoder,
3019            transformations=transformations,
3020            client_side_incremental_sync=client_side_incremental_sync,
3021            file_uploader=file_uploader,
3022        )
3023
3024        query_properties: Optional[QueryProperties] = None
3025        query_properties_key: Optional[str] = None
3026        if self._query_properties_in_request_parameters(model.requester):
3027            # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple
3028            # places instead of default to request_parameters which isn't clearly documented
3029            if (
3030                hasattr(model.requester, "fetch_properties_from_endpoint")
3031                and model.requester.fetch_properties_from_endpoint
3032            ):
3033                raise ValueError(
3034                    f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters"
3035                )
3036
3037            query_properties_definitions = []
3038            for key, request_parameter in model.requester.request_parameters.items():  # type: ignore # request_parameters is already validated to be a Mapping using _query_properties_in_request_parameters()
3039                if isinstance(request_parameter, QueryPropertiesModel):
3040                    query_properties_key = key
3041                    query_properties_definitions.append(request_parameter)
3042
3043            if len(query_properties_definitions) > 1:
3044                raise ValueError(
3045                    f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages"
3046                )
3047
3048            if len(query_properties_definitions) == 1:
3049                query_properties = self._create_component_from_model(
3050                    model=query_properties_definitions[0], config=config
3051                )
3052        elif (
3053            hasattr(model.requester, "fetch_properties_from_endpoint")
3054            and model.requester.fetch_properties_from_endpoint
3055        ):
3056            query_properties_definition = QueryPropertiesModel(
3057                type="QueryProperties",
3058                property_list=model.requester.fetch_properties_from_endpoint,
3059                always_include_properties=None,
3060                property_chunking=None,
3061            )  # type: ignore # $parameters has a default value
3062
3063            query_properties = self.create_query_properties(
3064                model=query_properties_definition,
3065                config=config,
3066            )
3067
3068        requester = self._create_component_from_model(
3069            model=model.requester,
3070            decoder=decoder,
3071            name=name,
3072            query_properties_key=query_properties_key,
3073            use_cache=use_cache,
3074            config=config,
3075        )
3076
3077        # Define cursor only if per partition or common incremental support is needed
3078        cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
3079
3080        if (
3081            not isinstance(stream_slicer, DatetimeBasedCursor)
3082            or type(stream_slicer) is not DatetimeBasedCursor
3083        ):
3084            # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
3085            # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
3086            # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
3087            # request_options_provider
3088            request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
3089        elif not request_options_provider:
3090            request_options_provider = DefaultRequestOptionsProvider(parameters={})
3091
3092        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3093
3094        cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None
3095        paginator = (
3096            self._create_component_from_model(
3097                model=model.paginator,
3098                config=config,
3099                url_base=_get_url(),
3100                extractor_model=model.record_selector.extractor,
3101                decoder=decoder,
3102                cursor_used_for_stop_condition=cursor_used_for_stop_condition,
3103            )
3104            if model.paginator
3105            else NoPagination(parameters={})
3106        )
3107
3108        ignore_stream_slicer_parameters_on_paginated_requests = (
3109            model.ignore_stream_slicer_parameters_on_paginated_requests or False
3110        )
3111
3112        if (
3113            model.partition_router
3114            and isinstance(model.partition_router, SubstreamPartitionRouterModel)
3115            and not bool(self._connector_state_manager.get_stream_state(name, None))
3116            and any(
3117                parent_stream_config.lazy_read_pointer
3118                for parent_stream_config in model.partition_router.parent_stream_configs
3119            )
3120        ):
3121            if incremental_sync:
3122                if incremental_sync.type != "DatetimeBasedCursor":
3123                    raise ValueError(
3124                        f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}."
3125                    )
3126
3127                elif incremental_sync.step or incremental_sync.cursor_granularity:
3128                    raise ValueError(
3129                        f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}."
3130                    )
3131
3132            if model.decoder and model.decoder.type != "JsonDecoder":
3133                raise ValueError(
3134                    f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}."
3135                )
3136
3137            return LazySimpleRetriever(
3138                name=name,
3139                paginator=paginator,
3140                primary_key=primary_key,
3141                requester=requester,
3142                record_selector=record_selector,
3143                stream_slicer=stream_slicer,
3144                request_option_provider=request_options_provider,
3145                cursor=cursor,
3146                config=config,
3147                ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3148                parameters=model.parameters or {},
3149            )
3150
3151        if self._limit_slices_fetched or self._emit_connector_builder_messages:
3152            return SimpleRetrieverTestReadDecorator(
3153                name=name,
3154                paginator=paginator,
3155                primary_key=primary_key,
3156                requester=requester,
3157                record_selector=record_selector,
3158                stream_slicer=stream_slicer,
3159                request_option_provider=request_options_provider,
3160                cursor=cursor,
3161                config=config,
3162                maximum_number_of_slices=self._limit_slices_fetched or 5,
3163                ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3164                parameters=model.parameters or {},
3165            )
3166        return SimpleRetriever(
3167            name=name,
3168            paginator=paginator,
3169            primary_key=primary_key,
3170            requester=requester,
3171            record_selector=record_selector,
3172            stream_slicer=stream_slicer,
3173            request_option_provider=request_options_provider,
3174            cursor=cursor,
3175            config=config,
3176            ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3177            additional_query_properties=query_properties,
3178            parameters=model.parameters or {},
3179        )
3180
3181    @staticmethod
3182    def _query_properties_in_request_parameters(
3183        requester: Union[HttpRequesterModel, CustomRequesterModel],
3184    ) -> bool:
3185        if not hasattr(requester, "request_parameters"):
3186            return False
3187        request_parameters = requester.request_parameters
3188        if request_parameters and isinstance(request_parameters, Mapping):
3189            for request_parameter in request_parameters.values():
3190                if isinstance(request_parameter, QueryPropertiesModel):
3191                    return True
3192        return False
3193
3194    @staticmethod
3195    def _remove_query_properties(
3196        request_parameters: Mapping[str, Union[str, QueryPropertiesModel]],
3197    ) -> Mapping[str, str]:
3198        return {
3199            parameter_field: request_parameter
3200            for parameter_field, request_parameter in request_parameters.items()
3201            if not isinstance(request_parameter, QueryPropertiesModel)
3202        }
3203
3204    def create_state_delegating_stream(
3205        self,
3206        model: StateDelegatingStreamModel,
3207        config: Config,
3208        has_parent_state: Optional[bool] = None,
3209        **kwargs: Any,
3210    ) -> DeclarativeStream:
3211        if (
3212            model.full_refresh_stream.name != model.name
3213            or model.name != model.incremental_stream.name
3214        ):
3215            raise ValueError(
3216                f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
3217            )
3218
3219        stream_model = (
3220            model.incremental_stream
3221            if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
3222            else model.full_refresh_stream
3223        )
3224
3225        return self._create_component_from_model(stream_model, config=config, **kwargs)  # type: ignore[no-any-return]  # Will be created DeclarativeStream as stream_model is stream description
3226
3227    def _create_async_job_status_mapping(
3228        self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
3229    ) -> Mapping[str, AsyncJobStatus]:
3230        api_status_to_cdk_status = {}
3231        for cdk_status, api_statuses in model.dict().items():
3232            if cdk_status == "type":
3233                # This is an element of the dict because of the typing of the CDK but it is not a CDK status
3234                continue
3235
3236            for status in api_statuses:
3237                if status in api_status_to_cdk_status:
3238                    raise ValueError(
3239                        f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once"
3240                    )
3241                api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status)
3242        return api_status_to_cdk_status
3243
3244    def _get_async_job_status(self, status: str) -> AsyncJobStatus:
3245        match status:
3246            case "running":
3247                return AsyncJobStatus.RUNNING
3248            case "completed":
3249                return AsyncJobStatus.COMPLETED
3250            case "failed":
3251                return AsyncJobStatus.FAILED
3252            case "timeout":
3253                return AsyncJobStatus.TIMED_OUT
3254            case _:
3255                raise ValueError(f"Unsupported CDK status {status}")
3256
3257    def create_async_retriever(
3258        self,
3259        model: AsyncRetrieverModel,
3260        config: Config,
3261        *,
3262        name: str,
3263        primary_key: Optional[
3264            Union[str, List[str], List[List[str]]]
3265        ],  # this seems to be needed to match create_simple_retriever
3266        stream_slicer: Optional[StreamSlicer],
3267        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
3268        transformations: List[RecordTransformation],
3269        **kwargs: Any,
3270    ) -> AsyncRetriever:
3271        def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever:
3272            record_selector = RecordSelector(
3273                extractor=download_extractor,
3274                name=name,
3275                record_filter=None,
3276                transformations=transformations,
3277                schema_normalization=TypeTransformer(TransformConfig.NoTransform),
3278                config=config,
3279                parameters={},
3280            )
3281            paginator = (
3282                self._create_component_from_model(
3283                    model=model.download_paginator,
3284                    decoder=decoder,
3285                    config=config,
3286                    url_base="",
3287                )
3288                if model.download_paginator
3289                else NoPagination(parameters={})
3290            )
3291            maximum_number_of_slices = self._limit_slices_fetched or 5
3292
3293            if self._limit_slices_fetched or self._emit_connector_builder_messages:
3294                return SimpleRetrieverTestReadDecorator(
3295                    requester=download_requester,
3296                    record_selector=record_selector,
3297                    primary_key=None,
3298                    name=job_download_components_name,
3299                    paginator=paginator,
3300                    config=config,
3301                    parameters={},
3302                    maximum_number_of_slices=maximum_number_of_slices,
3303                )
3304
3305            return SimpleRetriever(
3306                requester=download_requester,
3307                record_selector=record_selector,
3308                primary_key=None,
3309                name=job_download_components_name,
3310                paginator=paginator,
3311                config=config,
3312                parameters={},
3313            )
3314
3315        def _get_job_timeout() -> datetime.timedelta:
3316            user_defined_timeout: Optional[int] = (
3317                int(
3318                    InterpolatedString.create(
3319                        str(model.polling_job_timeout),
3320                        parameters={},
3321                    ).eval(config)
3322                )
3323                if model.polling_job_timeout
3324                else None
3325            )
3326
3327            # check for user defined timeout during the test read or 15 minutes
3328            test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15)
3329            # default value for non-connector builder is 60 minutes.
3330            default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60)
3331
3332            return (
3333                test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout
3334            )
3335
3336        decoder = (
3337            self._create_component_from_model(model=model.decoder, config=config)
3338            if model.decoder
3339            else JsonDecoder(parameters={})
3340        )
3341        record_selector = self._create_component_from_model(
3342            model=model.record_selector,
3343            config=config,
3344            decoder=decoder,
3345            name=name,
3346            transformations=transformations,
3347            client_side_incremental_sync=client_side_incremental_sync,
3348        )
3349        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3350        creation_requester = self._create_component_from_model(
3351            model=model.creation_requester,
3352            decoder=decoder,
3353            config=config,
3354            name=f"job creation - {name}",
3355        )
3356        polling_requester = self._create_component_from_model(
3357            model=model.polling_requester,
3358            decoder=decoder,
3359            config=config,
3360            name=f"job polling - {name}",
3361        )
3362        job_download_components_name = f"job download - {name}"
3363        download_decoder = (
3364            self._create_component_from_model(model=model.download_decoder, config=config)
3365            if model.download_decoder
3366            else JsonDecoder(parameters={})
3367        )
3368        download_extractor = (
3369            self._create_component_from_model(
3370                model=model.download_extractor,
3371                config=config,
3372                decoder=download_decoder,
3373                parameters=model.parameters,
3374            )
3375            if model.download_extractor
3376            else DpathExtractor(
3377                [],
3378                config=config,
3379                decoder=download_decoder,
3380                parameters=model.parameters or {},
3381            )
3382        )
3383        download_requester = self._create_component_from_model(
3384            model=model.download_requester,
3385            decoder=download_decoder,
3386            config=config,
3387            name=job_download_components_name,
3388        )
3389        download_retriever = _get_download_retriever()
3390        abort_requester = (
3391            self._create_component_from_model(
3392                model=model.abort_requester,
3393                decoder=decoder,
3394                config=config,
3395                name=f"job abort - {name}",
3396            )
3397            if model.abort_requester
3398            else None
3399        )
3400        delete_requester = (
3401            self._create_component_from_model(
3402                model=model.delete_requester,
3403                decoder=decoder,
3404                config=config,
3405                name=f"job delete - {name}",
3406            )
3407            if model.delete_requester
3408            else None
3409        )
3410        download_target_requester = (
3411            self._create_component_from_model(
3412                model=model.download_target_requester,
3413                decoder=decoder,
3414                config=config,
3415                name=f"job extract_url - {name}",
3416            )
3417            if model.download_target_requester
3418            else None
3419        )
3420        status_extractor = self._create_component_from_model(
3421            model=model.status_extractor, decoder=decoder, config=config, name=name
3422        )
3423        download_target_extractor = self._create_component_from_model(
3424            model=model.download_target_extractor,
3425            decoder=decoder,
3426            config=config,
3427            name=name,
3428        )
3429
3430        job_repository: AsyncJobRepository = AsyncHttpJobRepository(
3431            creation_requester=creation_requester,
3432            polling_requester=polling_requester,
3433            download_retriever=download_retriever,
3434            download_target_requester=download_target_requester,
3435            abort_requester=abort_requester,
3436            delete_requester=delete_requester,
3437            status_extractor=status_extractor,
3438            status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
3439            download_target_extractor=download_target_extractor,
3440            job_timeout=_get_job_timeout(),
3441        )
3442
3443        async_job_partition_router = AsyncJobPartitionRouter(
3444            job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
3445                job_repository,
3446                stream_slices,
3447                self._job_tracker,
3448                self._message_repository,
3449                # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
3450                has_bulk_parent=False,
3451                # set the `job_max_retry` to 1 for the `Connector Builder`` use-case.
3452                # `None` == default retry is set to 3 attempts, under the hood.
3453                job_max_retry=1 if self._emit_connector_builder_messages else None,
3454            ),
3455            stream_slicer=stream_slicer,
3456            config=config,
3457            parameters=model.parameters or {},
3458        )
3459
3460        return AsyncRetriever(
3461            record_selector=record_selector,
3462            stream_slicer=async_job_partition_router,
3463            config=config,
3464            parameters=model.parameters or {},
3465        )
3466
3467    @staticmethod
3468    def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
3469        return Spec(
3470            connection_specification=model.connection_specification,
3471            documentation_url=model.documentation_url,
3472            advanced_auth=model.advanced_auth,
3473            parameters={},
3474        )
3475
3476    def create_substream_partition_router(
3477        self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any
3478    ) -> SubstreamPartitionRouter:
3479        parent_stream_configs = []
3480        if model.parent_stream_configs:
3481            parent_stream_configs.extend(
3482                [
3483                    self._create_message_repository_substream_wrapper(
3484                        model=parent_stream_config, config=config, **kwargs
3485                    )
3486                    for parent_stream_config in model.parent_stream_configs
3487                ]
3488            )
3489
3490        return SubstreamPartitionRouter(
3491            parent_stream_configs=parent_stream_configs,
3492            parameters=model.parameters or {},
3493            config=config,
3494        )
3495
3496    def _create_message_repository_substream_wrapper(
3497        self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
3498    ) -> Any:
3499        substream_factory = ModelToComponentFactory(
3500            limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
3501            limit_slices_fetched=self._limit_slices_fetched,
3502            emit_connector_builder_messages=self._emit_connector_builder_messages,
3503            disable_retries=self._disable_retries,
3504            disable_cache=self._disable_cache,
3505            message_repository=LogAppenderMessageRepositoryDecorator(
3506                {"airbyte_cdk": {"stream": {"is_substream": True}}, "http": {"is_auxiliary": True}},
3507                self._message_repository,
3508                self._evaluate_log_level(self._emit_connector_builder_messages),
3509            ),
3510        )
3511
3512        # This flag will be used exclusively for StateDelegatingStream when a parent stream is created
3513        has_parent_state = bool(
3514            self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
3515            if model.incremental_dependency
3516            else False
3517        )
3518        return substream_factory._create_component_from_model(
3519            model=model, config=config, has_parent_state=has_parent_state, **kwargs
3520        )
3521
3522    @staticmethod
3523    def create_wait_time_from_header(
3524        model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
3525    ) -> WaitTimeFromHeaderBackoffStrategy:
3526        return WaitTimeFromHeaderBackoffStrategy(
3527            header=model.header,
3528            parameters=model.parameters or {},
3529            config=config,
3530            regex=model.regex,
3531            max_waiting_time_in_seconds=model.max_waiting_time_in_seconds
3532            if model.max_waiting_time_in_seconds is not None
3533            else None,
3534        )
3535
3536    @staticmethod
3537    def create_wait_until_time_from_header(
3538        model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any
3539    ) -> WaitUntilTimeFromHeaderBackoffStrategy:
3540        return WaitUntilTimeFromHeaderBackoffStrategy(
3541            header=model.header,
3542            parameters=model.parameters or {},
3543            config=config,
3544            min_wait=model.min_wait,
3545            regex=model.regex,
3546        )
3547
3548    def get_message_repository(self) -> MessageRepository:
3549        return self._message_repository
3550
3551    def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level:
3552        return Level.DEBUG if emit_connector_builder_messages else Level.INFO
3553
3554    @staticmethod
3555    def create_components_mapping_definition(
3556        model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any
3557    ) -> ComponentMappingDefinition:
3558        interpolated_value = InterpolatedString.create(
3559            model.value, parameters=model.parameters or {}
3560        )
3561        field_path = [
3562            InterpolatedString.create(path, parameters=model.parameters or {})
3563            for path in model.field_path
3564        ]
3565        return ComponentMappingDefinition(
3566            field_path=field_path,  # type: ignore[arg-type] # field_path can be str and InterpolatedString
3567            value=interpolated_value,
3568            value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
3569            parameters=model.parameters or {},
3570        )
3571
3572    def create_http_components_resolver(
3573        self, model: HttpComponentsResolverModel, config: Config
3574    ) -> Any:
3575        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
3576        combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
3577
3578        retriever = self._create_component_from_model(
3579            model=model.retriever,
3580            config=config,
3581            name="",
3582            primary_key=None,
3583            stream_slicer=stream_slicer if stream_slicer else combined_slicers,
3584            transformations=[],
3585        )
3586
3587        components_mapping = [
3588            self._create_component_from_model(
3589                model=components_mapping_definition_model,
3590                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3591                    components_mapping_definition_model.value_type
3592                ),
3593                config=config,
3594            )
3595            for components_mapping_definition_model in model.components_mapping
3596        ]
3597
3598        return HttpComponentsResolver(
3599            retriever=retriever,
3600            config=config,
3601            components_mapping=components_mapping,
3602            parameters=model.parameters or {},
3603        )
3604
3605    @staticmethod
3606    def create_stream_config(
3607        model: StreamConfigModel, config: Config, **kwargs: Any
3608    ) -> StreamConfig:
3609        model_configs_pointer: List[Union[InterpolatedString, str]] = (
3610            [x for x in model.configs_pointer] if model.configs_pointer else []
3611        )
3612
3613        return StreamConfig(
3614            configs_pointer=model_configs_pointer,
3615            parameters=model.parameters or {},
3616        )
3617
3618    def create_config_components_resolver(
3619        self, model: ConfigComponentsResolverModel, config: Config
3620    ) -> Any:
3621        stream_config = self._create_component_from_model(
3622            model.stream_config, config=config, parameters=model.parameters or {}
3623        )
3624
3625        components_mapping = [
3626            self._create_component_from_model(
3627                model=components_mapping_definition_model,
3628                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3629                    components_mapping_definition_model.value_type
3630                ),
3631                config=config,
3632            )
3633            for components_mapping_definition_model in model.components_mapping
3634        ]
3635
3636        return ConfigComponentsResolver(
3637            stream_config=stream_config,
3638            config=config,
3639            components_mapping=components_mapping,
3640            parameters=model.parameters or {},
3641        )
3642
3643    _UNSUPPORTED_DECODER_ERROR = (
3644        "Specified decoder of {decoder_type} is not supported for pagination."
3645        "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead."
3646        "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`."
3647    )
3648
3649    def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool:
3650        if isinstance(decoder, (JsonDecoder, XmlDecoder)):
3651            return True
3652        elif isinstance(decoder, CompositeRawDecoder):
3653            return self._is_supported_parser_for_pagination(decoder.parser)
3654        else:
3655            return False
3656
3657    def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
3658        if isinstance(parser, JsonParser):
3659            return True
3660        elif isinstance(parser, GzipParser):
3661            return isinstance(parser.inner_parser, JsonParser)
3662        else:
3663            return False
3664
3665    def create_http_api_budget(
3666        self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
3667    ) -> HttpAPIBudget:
3668        policies = [
3669            self._create_component_from_model(model=policy, config=config)
3670            for policy in model.policies
3671        ]
3672
3673        return HttpAPIBudget(
3674            policies=policies,
3675            ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
3676            ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
3677            status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
3678        )
3679
3680    def create_fixed_window_call_rate_policy(
3681        self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
3682    ) -> FixedWindowCallRatePolicy:
3683        matchers = [
3684            self._create_component_from_model(model=matcher, config=config)
3685            for matcher in model.matchers
3686        ]
3687
3688        # Set the initial reset timestamp to 10 days from now.
3689        # This value will be updated by the first request.
3690        return FixedWindowCallRatePolicy(
3691            next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
3692            period=parse_duration(model.period),
3693            call_limit=model.call_limit,
3694            matchers=matchers,
3695        )
3696
3697    def create_file_uploader(
3698        self, model: FileUploaderModel, config: Config, **kwargs: Any
3699    ) -> FileUploader:
3700        name = "File Uploader"
3701        requester = self._create_component_from_model(
3702            model=model.requester,
3703            config=config,
3704            name=name,
3705            **kwargs,
3706        )
3707        download_target_extractor = self._create_component_from_model(
3708            model=model.download_target_extractor,
3709            config=config,
3710            name=name,
3711            **kwargs,
3712        )
3713        emit_connector_builder_messages = self._emit_connector_builder_messages
3714        file_uploader = DefaultFileUploader(
3715            requester=requester,
3716            download_target_extractor=download_target_extractor,
3717            config=config,
3718            file_writer=NoopFileWriter()
3719            if emit_connector_builder_messages
3720            else LocalFileSystemFileWriter(),
3721            parameters=model.parameters or {},
3722            filename_extractor=model.filename_extractor if model.filename_extractor else None,
3723        )
3724
3725        return (
3726            ConnectorBuilderFileUploader(file_uploader)
3727            if emit_connector_builder_messages
3728            else file_uploader
3729        )
3730
3731    def create_moving_window_call_rate_policy(
3732        self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
3733    ) -> MovingWindowCallRatePolicy:
3734        rates = [
3735            self._create_component_from_model(model=rate, config=config) for rate in model.rates
3736        ]
3737        matchers = [
3738            self._create_component_from_model(model=matcher, config=config)
3739            for matcher in model.matchers
3740        ]
3741        return MovingWindowCallRatePolicy(
3742            rates=rates,
3743            matchers=matchers,
3744        )
3745
3746    def create_unlimited_call_rate_policy(
3747        self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
3748    ) -> UnlimitedCallRatePolicy:
3749        matchers = [
3750            self._create_component_from_model(model=matcher, config=config)
3751            for matcher in model.matchers
3752        ]
3753
3754        return UnlimitedCallRatePolicy(
3755            matchers=matchers,
3756        )
3757
3758    def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
3759        interpolated_limit = InterpolatedString.create(str(model.limit), parameters={})
3760        return Rate(
3761            limit=int(interpolated_limit.eval(config=config)),
3762            interval=parse_duration(model.interval),
3763        )
3764
3765    def create_http_request_matcher(
3766        self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
3767    ) -> HttpRequestRegexMatcher:
3768        return HttpRequestRegexMatcher(
3769            method=model.method,
3770            url_base=model.url_base,
3771            url_path_pattern=model.url_path_pattern,
3772            params=model.params,
3773            headers=model.headers,
3774        )
3775
3776    def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
3777        self._api_budget = self.create_component(
3778            model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
3779        )
3780
3781    def create_grouping_partition_router(
3782        self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
3783    ) -> GroupingPartitionRouter:
3784        underlying_router = self._create_component_from_model(
3785            model=model.underlying_partition_router, config=config
3786        )
3787        if model.group_size < 1:
3788            raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
3789
3790        # Request options in underlying partition routers are not supported for GroupingPartitionRouter
3791        # because they are specific to individual partitions and cannot be aggregated or handled
3792        # when grouping, potentially leading to incorrect API calls. Any request customization
3793        # should be managed at the stream level through the requester's configuration.
3794        if isinstance(underlying_router, SubstreamPartitionRouter):
3795            if any(
3796                parent_config.request_option
3797                for parent_config in underlying_router.parent_stream_configs
3798            ):
3799                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
3800
3801        if isinstance(underlying_router, ListPartitionRouter):
3802            if underlying_router.request_option:
3803                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
3804
3805        return GroupingPartitionRouter(
3806            group_size=model.group_size,
3807            underlying_partition_router=underlying_router,
3808            deduplicate=model.deduplicate if model.deduplicate is not None else True,
3809            config=config,
3810        )
ComponentDefinition = typing.Mapping[str, typing.Any]
SCHEMA_TRANSFORMER_TYPE_MAPPING = {<SchemaNormalization.None_: 'None'>: <TransformConfig.NoTransform: 1>, <SchemaNormalization.Default: 'Default'>: <TransformConfig.DefaultSchemaNormalization: 2>}
class ModelToComponentFactory:
 577class ModelToComponentFactory:
 578    EPOCH_DATETIME_FORMAT = "%s"
 579
 580    def __init__(
 581        self,
 582        limit_pages_fetched_per_slice: Optional[int] = None,
 583        limit_slices_fetched: Optional[int] = None,
 584        emit_connector_builder_messages: bool = False,
 585        disable_retries: bool = False,
 586        disable_cache: bool = False,
 587        disable_resumable_full_refresh: bool = False,
 588        message_repository: Optional[MessageRepository] = None,
 589        connector_state_manager: Optional[ConnectorStateManager] = None,
 590        max_concurrent_async_job_count: Optional[int] = None,
 591    ):
 592        self._init_mappings()
 593        self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
 594        self._limit_slices_fetched = limit_slices_fetched
 595        self._emit_connector_builder_messages = emit_connector_builder_messages
 596        self._disable_retries = disable_retries
 597        self._disable_cache = disable_cache
 598        self._disable_resumable_full_refresh = disable_resumable_full_refresh
 599        self._message_repository = message_repository or InMemoryMessageRepository(
 600            self._evaluate_log_level(emit_connector_builder_messages)
 601        )
 602        self._connector_state_manager = connector_state_manager or ConnectorStateManager()
 603        self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
 604        self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1)
 605        # placeholder for deprecation warnings
 606        self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = []
 607
 608    def _init_mappings(self) -> None:
 609        self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
 610            AddedFieldDefinitionModel: self.create_added_field_definition,
 611            AddFieldsModel: self.create_add_fields,
 612            ApiKeyAuthenticatorModel: self.create_api_key_authenticator,
 613            BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
 614            BearerAuthenticatorModel: self.create_bearer_authenticator,
 615            CheckStreamModel: self.create_check_stream,
 616            DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config,
 617            CheckDynamicStreamModel: self.create_check_dynamic_stream,
 618            CompositeErrorHandlerModel: self.create_composite_error_handler,
 619            ConcurrencyLevelModel: self.create_concurrency_level,
 620            ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
 621            CsvDecoderModel: self.create_csv_decoder,
 622            CursorPaginationModel: self.create_cursor_pagination,
 623            CustomAuthenticatorModel: self.create_custom_component,
 624            CustomBackoffStrategyModel: self.create_custom_component,
 625            CustomDecoderModel: self.create_custom_component,
 626            CustomErrorHandlerModel: self.create_custom_component,
 627            CustomIncrementalSyncModel: self.create_custom_component,
 628            CustomRecordExtractorModel: self.create_custom_component,
 629            CustomRecordFilterModel: self.create_custom_component,
 630            CustomRequesterModel: self.create_custom_component,
 631            CustomRetrieverModel: self.create_custom_component,
 632            CustomSchemaLoader: self.create_custom_component,
 633            CustomSchemaNormalizationModel: self.create_custom_component,
 634            CustomStateMigration: self.create_custom_component,
 635            CustomPaginationStrategyModel: self.create_custom_component,
 636            CustomPartitionRouterModel: self.create_custom_component,
 637            CustomTransformationModel: self.create_custom_component,
 638            DatetimeBasedCursorModel: self.create_datetime_based_cursor,
 639            DeclarativeStreamModel: self.create_declarative_stream,
 640            DefaultErrorHandlerModel: self.create_default_error_handler,
 641            DefaultPaginatorModel: self.create_default_paginator,
 642            DpathExtractorModel: self.create_dpath_extractor,
 643            ResponseToFileExtractorModel: self.create_response_to_file_extractor,
 644            ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
 645            SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
 646            GroupByKeyMergeStrategyModel: self.create_group_by_key,
 647            HttpRequesterModel: self.create_http_requester,
 648            HttpResponseFilterModel: self.create_http_response_filter,
 649            InlineSchemaLoaderModel: self.create_inline_schema_loader,
 650            JsonDecoderModel: self.create_json_decoder,
 651            JsonlDecoderModel: self.create_jsonl_decoder,
 652            GzipDecoderModel: self.create_gzip_decoder,
 653            KeysToLowerModel: self.create_keys_to_lower_transformation,
 654            KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
 655            KeysReplaceModel: self.create_keys_replace_transformation,
 656            FlattenFieldsModel: self.create_flatten_fields,
 657            DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
 658            IterableDecoderModel: self.create_iterable_decoder,
 659            IncrementingCountCursorModel: self.create_incrementing_count_cursor,
 660            XmlDecoderModel: self.create_xml_decoder,
 661            JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
 662            DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
 663            SchemaTypeIdentifierModel: self.create_schema_type_identifier,
 664            TypesMapModel: self.create_types_map,
 665            ComplexFieldTypeModel: self.create_complex_field_type,
 666            JwtAuthenticatorModel: self.create_jwt_authenticator,
 667            LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
 668            ListPartitionRouterModel: self.create_list_partition_router,
 669            MinMaxDatetimeModel: self.create_min_max_datetime,
 670            NoAuthModel: self.create_no_auth,
 671            NoPaginationModel: self.create_no_pagination,
 672            OAuthAuthenticatorModel: self.create_oauth_authenticator,
 673            OffsetIncrementModel: self.create_offset_increment,
 674            PageIncrementModel: self.create_page_increment,
 675            ParentStreamConfigModel: self.create_parent_stream_config,
 676            PropertiesFromEndpointModel: self.create_properties_from_endpoint,
 677            PropertyChunkingModel: self.create_property_chunking,
 678            QueryPropertiesModel: self.create_query_properties,
 679            RecordFilterModel: self.create_record_filter,
 680            RecordSelectorModel: self.create_record_selector,
 681            RemoveFieldsModel: self.create_remove_fields,
 682            RequestPathModel: self.create_request_path,
 683            RequestOptionModel: self.create_request_option,
 684            LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator,
 685            SelectiveAuthenticatorModel: self.create_selective_authenticator,
 686            SimpleRetrieverModel: self.create_simple_retriever,
 687            StateDelegatingStreamModel: self.create_state_delegating_stream,
 688            SpecModel: self.create_spec,
 689            SubstreamPartitionRouterModel: self.create_substream_partition_router,
 690            WaitTimeFromHeaderModel: self.create_wait_time_from_header,
 691            WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
 692            AsyncRetrieverModel: self.create_async_retriever,
 693            HttpComponentsResolverModel: self.create_http_components_resolver,
 694            ConfigComponentsResolverModel: self.create_config_components_resolver,
 695            StreamConfigModel: self.create_stream_config,
 696            ComponentMappingDefinitionModel: self.create_components_mapping_definition,
 697            ZipfileDecoderModel: self.create_zipfile_decoder,
 698            HTTPAPIBudgetModel: self.create_http_api_budget,
 699            FileUploaderModel: self.create_file_uploader,
 700            FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
 701            MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
 702            UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
 703            RateModel: self.create_rate,
 704            HttpRequestRegexMatcherModel: self.create_http_request_matcher,
 705            GroupingPartitionRouterModel: self.create_grouping_partition_router,
 706        }
 707
 708        # Needed for the case where we need to perform a second parse on the fields of a custom component
 709        self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR}
 710
 711    def create_component(
 712        self,
 713        model_type: Type[BaseModel],
 714        component_definition: ComponentDefinition,
 715        config: Config,
 716        **kwargs: Any,
 717    ) -> Any:
 718        """
 719        Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and
 720        subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating
 721        creating declarative components from that model.
 722
 723        :param model_type: The type of declarative component that is being initialized
 724        :param component_definition: The mapping that represents a declarative component
 725        :param config: The connector config that is provided by the customer
 726        :return: The declarative component to be used at runtime
 727        """
 728
 729        component_type = component_definition.get("type")
 730        if component_definition.get("type") != model_type.__name__:
 731            raise ValueError(
 732                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
 733            )
 734
 735        declarative_component_model = model_type.parse_obj(component_definition)
 736
 737        if not isinstance(declarative_component_model, model_type):
 738            raise ValueError(
 739                f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}"
 740            )
 741
 742        return self._create_component_from_model(
 743            model=declarative_component_model, config=config, **kwargs
 744        )
 745
 746    def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any:
 747        if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR:
 748            raise ValueError(
 749                f"{model.__class__} with attributes {model} is not a valid component type"
 750            )
 751        component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__)
 752        if not component_constructor:
 753            raise ValueError(f"Could not find constructor for {model.__class__}")
 754
 755        # collect deprecation warnings for supported models.
 756        if isinstance(model, BaseModelWithDeprecations):
 757            self._collect_model_deprecations(model)
 758
 759        return component_constructor(model=model, config=config, **kwargs)
 760
 761    def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]:
 762        """
 763        Returns the deprecation warnings that were collected during the creation of components.
 764        """
 765        return self._collected_deprecation_logs
 766
 767    def _collect_model_deprecations(self, model: BaseModelWithDeprecations) -> None:
 768        """
 769        Collects deprecation logs from the given model and appends any new logs to the internal collection.
 770
 771        This method checks if the provided model has deprecation logs (identified by the presence of the DEPRECATION_LOGS_TAG attribute and a non-None `_deprecation_logs` property). It iterates through each deprecation log in the model and appends it to the `_collected_deprecation_logs` list if it has not already been collected, ensuring that duplicate logs are avoided.
 772
 773        Args:
 774            model (BaseModelWithDeprecations): The model instance from which to collect deprecation logs.
 775        """
 776        if hasattr(model, DEPRECATION_LOGS_TAG) and model._deprecation_logs is not None:
 777            for log in model._deprecation_logs:
 778                # avoid duplicates for deprecation logs observed.
 779                if log not in self._collected_deprecation_logs:
 780                    self._collected_deprecation_logs.append(log)
 781
 782    @staticmethod
 783    def create_added_field_definition(
 784        model: AddedFieldDefinitionModel, config: Config, **kwargs: Any
 785    ) -> AddedFieldDefinition:
 786        interpolated_value = InterpolatedString.create(
 787            model.value, parameters=model.parameters or {}
 788        )
 789        return AddedFieldDefinition(
 790            path=model.path,
 791            value=interpolated_value,
 792            value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
 793            parameters=model.parameters or {},
 794        )
 795
 796    def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields:
 797        added_field_definitions = [
 798            self._create_component_from_model(
 799                model=added_field_definition_model,
 800                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
 801                    added_field_definition_model.value_type
 802                ),
 803                config=config,
 804            )
 805            for added_field_definition_model in model.fields
 806        ]
 807        return AddFields(
 808            fields=added_field_definitions,
 809            condition=model.condition or "",
 810            parameters=model.parameters or {},
 811        )
 812
 813    def create_keys_to_lower_transformation(
 814        self, model: KeysToLowerModel, config: Config, **kwargs: Any
 815    ) -> KeysToLowerTransformation:
 816        return KeysToLowerTransformation()
 817
 818    def create_keys_to_snake_transformation(
 819        self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
 820    ) -> KeysToSnakeCaseTransformation:
 821        return KeysToSnakeCaseTransformation()
 822
 823    def create_keys_replace_transformation(
 824        self, model: KeysReplaceModel, config: Config, **kwargs: Any
 825    ) -> KeysReplaceTransformation:
 826        return KeysReplaceTransformation(
 827            old=model.old, new=model.new, parameters=model.parameters or {}
 828        )
 829
 830    def create_flatten_fields(
 831        self, model: FlattenFieldsModel, config: Config, **kwargs: Any
 832    ) -> FlattenFields:
 833        return FlattenFields(
 834            flatten_lists=model.flatten_lists if model.flatten_lists is not None else True
 835        )
 836
 837    def create_dpath_flatten_fields(
 838        self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any
 839    ) -> DpathFlattenFields:
 840        model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
 841        key_transformation = (
 842            KeyTransformation(
 843                config=config,
 844                prefix=model.key_transformation.prefix,
 845                suffix=model.key_transformation.suffix,
 846                parameters=model.parameters or {},
 847            )
 848            if model.key_transformation is not None
 849            else None
 850        )
 851        return DpathFlattenFields(
 852            config=config,
 853            field_path=model_field_path,
 854            delete_origin_value=model.delete_origin_value
 855            if model.delete_origin_value is not None
 856            else False,
 857            replace_record=model.replace_record if model.replace_record is not None else False,
 858            key_transformation=key_transformation,
 859            parameters=model.parameters or {},
 860        )
 861
 862    @staticmethod
 863    def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
 864        if not value_type:
 865            return None
 866        names_to_types = {
 867            ValueType.string: str,
 868            ValueType.number: float,
 869            ValueType.integer: int,
 870            ValueType.boolean: bool,
 871        }
 872        return names_to_types[value_type]
 873
 874    def create_api_key_authenticator(
 875        self,
 876        model: ApiKeyAuthenticatorModel,
 877        config: Config,
 878        token_provider: Optional[TokenProvider] = None,
 879        **kwargs: Any,
 880    ) -> ApiKeyAuthenticator:
 881        if model.inject_into is None and model.header is None:
 882            raise ValueError(
 883                "Expected either inject_into or header to be set for ApiKeyAuthenticator"
 884            )
 885
 886        if model.inject_into is not None and model.header is not None:
 887            raise ValueError(
 888                "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option"
 889            )
 890
 891        if token_provider is not None and model.api_token != "":
 892            raise ValueError(
 893                "If token_provider is set, api_token is ignored and has to be set to empty string."
 894            )
 895
 896        request_option = (
 897            self._create_component_from_model(
 898                model.inject_into, config, parameters=model.parameters or {}
 899            )
 900            if model.inject_into
 901            else RequestOption(
 902                inject_into=RequestOptionType.header,
 903                field_name=model.header or "",
 904                parameters=model.parameters or {},
 905            )
 906        )
 907
 908        return ApiKeyAuthenticator(
 909            token_provider=(
 910                token_provider
 911                if token_provider is not None
 912                else InterpolatedStringTokenProvider(
 913                    api_token=model.api_token or "",
 914                    config=config,
 915                    parameters=model.parameters or {},
 916                )
 917            ),
 918            request_option=request_option,
 919            config=config,
 920            parameters=model.parameters or {},
 921        )
 922
 923    def create_legacy_to_per_partition_state_migration(
 924        self,
 925        model: LegacyToPerPartitionStateMigrationModel,
 926        config: Mapping[str, Any],
 927        declarative_stream: DeclarativeStreamModel,
 928    ) -> LegacyToPerPartitionStateMigration:
 929        retriever = declarative_stream.retriever
 930        if not isinstance(retriever, SimpleRetrieverModel):
 931            raise ValueError(
 932                f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}"
 933            )
 934        partition_router = retriever.partition_router
 935        if not isinstance(
 936            partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel)
 937        ):
 938            raise ValueError(
 939                f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}"
 940            )
 941        if not hasattr(partition_router, "parent_stream_configs"):
 942            raise ValueError(
 943                "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration."
 944            )
 945
 946        if not hasattr(declarative_stream, "incremental_sync"):
 947            raise ValueError(
 948                "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration."
 949            )
 950
 951        return LegacyToPerPartitionStateMigration(
 952            partition_router,  # type: ignore # was already checked above
 953            declarative_stream.incremental_sync,  # type: ignore # was already checked. Migration can be applied only to incremental streams.
 954            config,
 955            declarative_stream.parameters,  # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
 956        )
 957
 958    def create_session_token_authenticator(
 959        self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any
 960    ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]:
 961        decoder = (
 962            self._create_component_from_model(model=model.decoder, config=config)
 963            if model.decoder
 964            else JsonDecoder(parameters={})
 965        )
 966        login_requester = self._create_component_from_model(
 967            model=model.login_requester,
 968            config=config,
 969            name=f"{name}_login_requester",
 970            decoder=decoder,
 971        )
 972        token_provider = SessionTokenProvider(
 973            login_requester=login_requester,
 974            session_token_path=model.session_token_path,
 975            expiration_duration=parse_duration(model.expiration_duration)
 976            if model.expiration_duration
 977            else None,
 978            parameters=model.parameters or {},
 979            message_repository=self._message_repository,
 980            decoder=decoder,
 981        )
 982        if model.request_authentication.type == "Bearer":
 983            return ModelToComponentFactory.create_bearer_authenticator(
 984                BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""),  # type: ignore # $parameters has a default value
 985                config,
 986                token_provider=token_provider,
 987            )
 988        else:
 989            return self.create_api_key_authenticator(
 990                ApiKeyAuthenticatorModel(
 991                    type="ApiKeyAuthenticator",
 992                    api_token="",
 993                    inject_into=model.request_authentication.inject_into,
 994                ),  # type: ignore # $parameters and headers default to None
 995                config=config,
 996                token_provider=token_provider,
 997            )
 998
 999    @staticmethod
1000    def create_basic_http_authenticator(
1001        model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any
1002    ) -> BasicHttpAuthenticator:
1003        return BasicHttpAuthenticator(
1004            password=model.password or "",
1005            username=model.username,
1006            config=config,
1007            parameters=model.parameters or {},
1008        )
1009
1010    @staticmethod
1011    def create_bearer_authenticator(
1012        model: BearerAuthenticatorModel,
1013        config: Config,
1014        token_provider: Optional[TokenProvider] = None,
1015        **kwargs: Any,
1016    ) -> BearerAuthenticator:
1017        if token_provider is not None and model.api_token != "":
1018            raise ValueError(
1019                "If token_provider is set, api_token is ignored and has to be set to empty string."
1020            )
1021        return BearerAuthenticator(
1022            token_provider=(
1023                token_provider
1024                if token_provider is not None
1025                else InterpolatedStringTokenProvider(
1026                    api_token=model.api_token or "",
1027                    config=config,
1028                    parameters=model.parameters or {},
1029                )
1030            ),
1031            config=config,
1032            parameters=model.parameters or {},
1033        )
1034
1035    @staticmethod
1036    def create_dynamic_stream_check_config(
1037        model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any
1038    ) -> DynamicStreamCheckConfig:
1039        return DynamicStreamCheckConfig(
1040            dynamic_stream_name=model.dynamic_stream_name,
1041            stream_count=model.stream_count or 0,
1042        )
1043
1044    def create_check_stream(
1045        self, model: CheckStreamModel, config: Config, **kwargs: Any
1046    ) -> CheckStream:
1047        if model.dynamic_streams_check_configs is None and model.stream_names is None:
1048            raise ValueError(
1049                "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream"
1050            )
1051
1052        dynamic_streams_check_configs = (
1053            [
1054                self._create_component_from_model(model=dynamic_stream_check_config, config=config)
1055                for dynamic_stream_check_config in model.dynamic_streams_check_configs
1056            ]
1057            if model.dynamic_streams_check_configs
1058            else []
1059        )
1060
1061        return CheckStream(
1062            stream_names=model.stream_names or [],
1063            dynamic_streams_check_configs=dynamic_streams_check_configs,
1064            parameters={},
1065        )
1066
1067    @staticmethod
1068    def create_check_dynamic_stream(
1069        model: CheckDynamicStreamModel, config: Config, **kwargs: Any
1070    ) -> CheckDynamicStream:
1071        assert model.use_check_availability is not None  # for mypy
1072
1073        use_check_availability = model.use_check_availability
1074
1075        return CheckDynamicStream(
1076            stream_count=model.stream_count,
1077            use_check_availability=use_check_availability,
1078            parameters={},
1079        )
1080
1081    def create_composite_error_handler(
1082        self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
1083    ) -> CompositeErrorHandler:
1084        error_handlers = [
1085            self._create_component_from_model(model=error_handler_model, config=config)
1086            for error_handler_model in model.error_handlers
1087        ]
1088        return CompositeErrorHandler(
1089            error_handlers=error_handlers, parameters=model.parameters or {}
1090        )
1091
1092    @staticmethod
1093    def create_concurrency_level(
1094        model: ConcurrencyLevelModel, config: Config, **kwargs: Any
1095    ) -> ConcurrencyLevel:
1096        return ConcurrencyLevel(
1097            default_concurrency=model.default_concurrency,
1098            max_concurrency=model.max_concurrency,
1099            config=config,
1100            parameters={},
1101        )
1102
1103    @staticmethod
1104    def apply_stream_state_migrations(
1105        stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any]
1106    ) -> MutableMapping[str, Any]:
1107        if stream_state_migrations:
1108            for state_migration in stream_state_migrations:
1109                if state_migration.should_migrate(stream_state):
1110                    # The state variable is expected to be mutable but the migrate method returns an immutable mapping.
1111                    stream_state = dict(state_migration.migrate(stream_state))
1112        return stream_state
1113
1114    def create_concurrent_cursor_from_datetime_based_cursor(
1115        self,
1116        model_type: Type[BaseModel],
1117        component_definition: ComponentDefinition,
1118        stream_name: str,
1119        stream_namespace: Optional[str],
1120        config: Config,
1121        message_repository: Optional[MessageRepository] = None,
1122        runtime_lookback_window: Optional[datetime.timedelta] = None,
1123        stream_state_migrations: Optional[List[Any]] = None,
1124        **kwargs: Any,
1125    ) -> ConcurrentCursor:
1126        # Per-partition incremental streams can dynamically create child cursors which will pass their current
1127        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1128        # incoming state and connector_state_manager that is initialized when the component factory is created
1129        stream_state = (
1130            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1131            if "stream_state" not in kwargs
1132            else kwargs["stream_state"]
1133        )
1134        stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1135
1136        component_type = component_definition.get("type")
1137        if component_definition.get("type") != model_type.__name__:
1138            raise ValueError(
1139                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1140            )
1141
1142        datetime_based_cursor_model = model_type.parse_obj(component_definition)
1143
1144        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1145            raise ValueError(
1146                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1147            )
1148
1149        interpolated_cursor_field = InterpolatedString.create(
1150            datetime_based_cursor_model.cursor_field,
1151            parameters=datetime_based_cursor_model.parameters or {},
1152        )
1153        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1154
1155        interpolated_partition_field_start = InterpolatedString.create(
1156            datetime_based_cursor_model.partition_field_start or "start_time",
1157            parameters=datetime_based_cursor_model.parameters or {},
1158        )
1159        interpolated_partition_field_end = InterpolatedString.create(
1160            datetime_based_cursor_model.partition_field_end or "end_time",
1161            parameters=datetime_based_cursor_model.parameters or {},
1162        )
1163
1164        slice_boundary_fields = (
1165            interpolated_partition_field_start.eval(config=config),
1166            interpolated_partition_field_end.eval(config=config),
1167        )
1168
1169        datetime_format = datetime_based_cursor_model.datetime_format
1170
1171        cursor_granularity = (
1172            parse_duration(datetime_based_cursor_model.cursor_granularity)
1173            if datetime_based_cursor_model.cursor_granularity
1174            else None
1175        )
1176
1177        lookback_window = None
1178        interpolated_lookback_window = (
1179            InterpolatedString.create(
1180                datetime_based_cursor_model.lookback_window,
1181                parameters=datetime_based_cursor_model.parameters or {},
1182            )
1183            if datetime_based_cursor_model.lookback_window
1184            else None
1185        )
1186        if interpolated_lookback_window:
1187            evaluated_lookback_window = interpolated_lookback_window.eval(config=config)
1188            if evaluated_lookback_window:
1189                lookback_window = parse_duration(evaluated_lookback_window)
1190
1191        connector_state_converter: DateTimeStreamStateConverter
1192        connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1193            datetime_format=datetime_format,
1194            input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1195            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1196            cursor_granularity=cursor_granularity,
1197        )
1198
1199        # Adjusts the stream state by applying the runtime lookback window.
1200        # This is used to ensure correct state handling in case of failed partitions.
1201        stream_state_value = stream_state.get(cursor_field.cursor_field_key)
1202        if runtime_lookback_window and stream_state_value:
1203            new_stream_state = (
1204                connector_state_converter.parse_timestamp(stream_state_value)
1205                - runtime_lookback_window
1206            )
1207            stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
1208                new_stream_state
1209            )
1210
1211        start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
1212        if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
1213            start_date_runtime_value = self.create_min_max_datetime(
1214                model=datetime_based_cursor_model.start_datetime, config=config
1215            )
1216        else:
1217            start_date_runtime_value = datetime_based_cursor_model.start_datetime
1218
1219        end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]]
1220        if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel):
1221            end_date_runtime_value = self.create_min_max_datetime(
1222                model=datetime_based_cursor_model.end_datetime, config=config
1223            )
1224        else:
1225            end_date_runtime_value = datetime_based_cursor_model.end_datetime
1226
1227        interpolated_start_date = MinMaxDatetime.create(
1228            interpolated_string_or_min_max_datetime=start_date_runtime_value,
1229            parameters=datetime_based_cursor_model.parameters,
1230        )
1231        interpolated_end_date = (
1232            None
1233            if not end_date_runtime_value
1234            else MinMaxDatetime.create(
1235                end_date_runtime_value, datetime_based_cursor_model.parameters
1236            )
1237        )
1238
1239        # If datetime format is not specified then start/end datetime should inherit it from the stream slicer
1240        if not interpolated_start_date.datetime_format:
1241            interpolated_start_date.datetime_format = datetime_format
1242        if interpolated_end_date and not interpolated_end_date.datetime_format:
1243            interpolated_end_date.datetime_format = datetime_format
1244
1245        start_date = interpolated_start_date.get_datetime(config=config)
1246        end_date_provider = (
1247            partial(interpolated_end_date.get_datetime, config)
1248            if interpolated_end_date
1249            else connector_state_converter.get_end_provider()
1250        )
1251
1252        if (
1253            datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity
1254        ) or (
1255            not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity
1256        ):
1257            raise ValueError(
1258                f"If step is defined, cursor_granularity should be as well and vice-versa. "
1259                f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`"
1260            )
1261
1262        # When step is not defined, default to a step size from the starting date to the present moment
1263        step_length = datetime.timedelta.max
1264        interpolated_step = (
1265            InterpolatedString.create(
1266                datetime_based_cursor_model.step,
1267                parameters=datetime_based_cursor_model.parameters or {},
1268            )
1269            if datetime_based_cursor_model.step
1270            else None
1271        )
1272        if interpolated_step:
1273            evaluated_step = interpolated_step.eval(config)
1274            if evaluated_step:
1275                step_length = parse_duration(evaluated_step)
1276
1277        clamping_strategy: ClampingStrategy = NoClamping()
1278        if datetime_based_cursor_model.clamping:
1279            # While it is undesirable to interpolate within the model factory (as opposed to at runtime),
1280            # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
1281            # object which we want to keep agnostic of being low-code
1282            target = InterpolatedString(
1283                string=datetime_based_cursor_model.clamping.target,
1284                parameters=datetime_based_cursor_model.parameters or {},
1285            )
1286            evaluated_target = target.eval(config=config)
1287            match evaluated_target:
1288                case "DAY":
1289                    clamping_strategy = DayClampingStrategy()
1290                    end_date_provider = ClampingEndProvider(
1291                        DayClampingStrategy(is_ceiling=False),
1292                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1293                        granularity=cursor_granularity or datetime.timedelta(seconds=1),
1294                    )
1295                case "WEEK":
1296                    if (
1297                        not datetime_based_cursor_model.clamping.target_details
1298                        or "weekday" not in datetime_based_cursor_model.clamping.target_details
1299                    ):
1300                        raise ValueError(
1301                            "Given WEEK clamping, weekday needs to be provided as target_details"
1302                        )
1303                    weekday = self._assemble_weekday(
1304                        datetime_based_cursor_model.clamping.target_details["weekday"]
1305                    )
1306                    clamping_strategy = WeekClampingStrategy(weekday)
1307                    end_date_provider = ClampingEndProvider(
1308                        WeekClampingStrategy(weekday, is_ceiling=False),
1309                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1310                        granularity=cursor_granularity or datetime.timedelta(days=1),
1311                    )
1312                case "MONTH":
1313                    clamping_strategy = MonthClampingStrategy()
1314                    end_date_provider = ClampingEndProvider(
1315                        MonthClampingStrategy(is_ceiling=False),
1316                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1317                        granularity=cursor_granularity or datetime.timedelta(days=1),
1318                    )
1319                case _:
1320                    raise ValueError(
1321                        f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
1322                    )
1323
1324        return ConcurrentCursor(
1325            stream_name=stream_name,
1326            stream_namespace=stream_namespace,
1327            stream_state=stream_state,
1328            message_repository=message_repository or self._message_repository,
1329            connector_state_manager=self._connector_state_manager,
1330            connector_state_converter=connector_state_converter,
1331            cursor_field=cursor_field,
1332            slice_boundary_fields=slice_boundary_fields,
1333            start=start_date,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1334            end_provider=end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1335            lookback_window=lookback_window,
1336            slice_range=step_length,
1337            cursor_granularity=cursor_granularity,
1338            clamping_strategy=clamping_strategy,
1339        )
1340
1341    def create_concurrent_cursor_from_incrementing_count_cursor(
1342        self,
1343        model_type: Type[BaseModel],
1344        component_definition: ComponentDefinition,
1345        stream_name: str,
1346        stream_namespace: Optional[str],
1347        config: Config,
1348        message_repository: Optional[MessageRepository] = None,
1349        **kwargs: Any,
1350    ) -> ConcurrentCursor:
1351        # Per-partition incremental streams can dynamically create child cursors which will pass their current
1352        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1353        # incoming state and connector_state_manager that is initialized when the component factory is created
1354        stream_state = (
1355            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1356            if "stream_state" not in kwargs
1357            else kwargs["stream_state"]
1358        )
1359
1360        component_type = component_definition.get("type")
1361        if component_definition.get("type") != model_type.__name__:
1362            raise ValueError(
1363                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1364            )
1365
1366        incrementing_count_cursor_model = model_type.parse_obj(component_definition)
1367
1368        if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
1369            raise ValueError(
1370                f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
1371            )
1372
1373        interpolated_start_value = (
1374            InterpolatedString.create(
1375                incrementing_count_cursor_model.start_value,  # type: ignore
1376                parameters=incrementing_count_cursor_model.parameters or {},
1377            )
1378            if incrementing_count_cursor_model.start_value
1379            else 0
1380        )
1381
1382        interpolated_cursor_field = InterpolatedString.create(
1383            incrementing_count_cursor_model.cursor_field,
1384            parameters=incrementing_count_cursor_model.parameters or {},
1385        )
1386        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1387
1388        connector_state_converter = IncrementingCountStreamStateConverter(
1389            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1390        )
1391
1392        return ConcurrentCursor(
1393            stream_name=stream_name,
1394            stream_namespace=stream_namespace,
1395            stream_state=stream_state,
1396            message_repository=message_repository or self._message_repository,
1397            connector_state_manager=self._connector_state_manager,
1398            connector_state_converter=connector_state_converter,
1399            cursor_field=cursor_field,
1400            slice_boundary_fields=None,
1401            start=interpolated_start_value,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1402            end_provider=connector_state_converter.get_end_provider(),  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1403        )
1404
1405    def _assemble_weekday(self, weekday: str) -> Weekday:
1406        match weekday:
1407            case "MONDAY":
1408                return Weekday.MONDAY
1409            case "TUESDAY":
1410                return Weekday.TUESDAY
1411            case "WEDNESDAY":
1412                return Weekday.WEDNESDAY
1413            case "THURSDAY":
1414                return Weekday.THURSDAY
1415            case "FRIDAY":
1416                return Weekday.FRIDAY
1417            case "SATURDAY":
1418                return Weekday.SATURDAY
1419            case "SUNDAY":
1420                return Weekday.SUNDAY
1421            case _:
1422                raise ValueError(f"Unknown weekday {weekday}")
1423
1424    def create_concurrent_cursor_from_perpartition_cursor(
1425        self,
1426        state_manager: ConnectorStateManager,
1427        model_type: Type[BaseModel],
1428        component_definition: ComponentDefinition,
1429        stream_name: str,
1430        stream_namespace: Optional[str],
1431        config: Config,
1432        stream_state: MutableMapping[str, Any],
1433        partition_router: PartitionRouter,
1434        stream_state_migrations: Optional[List[Any]] = None,
1435        **kwargs: Any,
1436    ) -> ConcurrentPerPartitionCursor:
1437        component_type = component_definition.get("type")
1438        if component_definition.get("type") != model_type.__name__:
1439            raise ValueError(
1440                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1441            )
1442
1443        datetime_based_cursor_model = model_type.parse_obj(component_definition)
1444
1445        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1446            raise ValueError(
1447                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1448            )
1449
1450        interpolated_cursor_field = InterpolatedString.create(
1451            datetime_based_cursor_model.cursor_field,
1452            parameters=datetime_based_cursor_model.parameters or {},
1453        )
1454        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1455
1456        datetime_format = datetime_based_cursor_model.datetime_format
1457
1458        cursor_granularity = (
1459            parse_duration(datetime_based_cursor_model.cursor_granularity)
1460            if datetime_based_cursor_model.cursor_granularity
1461            else None
1462        )
1463
1464        connector_state_converter: DateTimeStreamStateConverter
1465        connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1466            datetime_format=datetime_format,
1467            input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1468            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1469            cursor_granularity=cursor_granularity,
1470        )
1471
1472        # Create the cursor factory
1473        cursor_factory = ConcurrentCursorFactory(
1474            partial(
1475                self.create_concurrent_cursor_from_datetime_based_cursor,
1476                state_manager=state_manager,
1477                model_type=model_type,
1478                component_definition=component_definition,
1479                stream_name=stream_name,
1480                stream_namespace=stream_namespace,
1481                config=config,
1482                message_repository=NoopMessageRepository(),
1483                stream_state_migrations=stream_state_migrations,
1484            )
1485        )
1486        stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1487        # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
1488        use_global_cursor = isinstance(
1489            partition_router, GroupingPartitionRouter
1490        ) or component_definition.get("global_substream_cursor", False)
1491
1492        # Return the concurrent cursor and state converter
1493        return ConcurrentPerPartitionCursor(
1494            cursor_factory=cursor_factory,
1495            partition_router=partition_router,
1496            stream_name=stream_name,
1497            stream_namespace=stream_namespace,
1498            stream_state=stream_state,
1499            message_repository=self._message_repository,  # type: ignore
1500            connector_state_manager=state_manager,
1501            connector_state_converter=connector_state_converter,
1502            cursor_field=cursor_field,
1503            use_global_cursor=use_global_cursor,
1504        )
1505
1506    @staticmethod
1507    def create_constant_backoff_strategy(
1508        model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
1509    ) -> ConstantBackoffStrategy:
1510        return ConstantBackoffStrategy(
1511            backoff_time_in_seconds=model.backoff_time_in_seconds,
1512            config=config,
1513            parameters=model.parameters or {},
1514        )
1515
1516    def create_cursor_pagination(
1517        self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
1518    ) -> CursorPaginationStrategy:
1519        if isinstance(decoder, PaginationDecoderDecorator):
1520            inner_decoder = decoder.decoder
1521        else:
1522            inner_decoder = decoder
1523            decoder = PaginationDecoderDecorator(decoder=decoder)
1524
1525        if self._is_supported_decoder_for_pagination(inner_decoder):
1526            decoder_to_use = decoder
1527        else:
1528            raise ValueError(
1529                self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1530            )
1531
1532        return CursorPaginationStrategy(
1533            cursor_value=model.cursor_value,
1534            decoder=decoder_to_use,
1535            page_size=model.page_size,
1536            stop_condition=model.stop_condition,
1537            config=config,
1538            parameters=model.parameters or {},
1539        )
1540
1541    def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any:
1542        """
1543        Generically creates a custom component based on the model type and a class_name reference to the custom Python class being
1544        instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
1545        :param model: The Pydantic model of the custom component being created
1546        :param config: The custom defined connector config
1547        :return: The declarative component built from the Pydantic model to be used at runtime
1548        """
1549        custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
1550        component_fields = get_type_hints(custom_component_class)
1551        model_args = model.dict()
1552        model_args["config"] = config
1553
1554        # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions
1555        # we defer to these arguments over the component's definition
1556        for key, arg in kwargs.items():
1557            model_args[key] = arg
1558
1559        # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not
1560        # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to
1561        # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components
1562        for model_field, model_value in model_args.items():
1563            # If a custom component field doesn't have a type set, we try to use the type hints to infer the type
1564            if (
1565                isinstance(model_value, dict)
1566                and "type" not in model_value
1567                and model_field in component_fields
1568            ):
1569                derived_type = self._derive_component_type_from_type_hints(
1570                    component_fields.get(model_field)
1571                )
1572                if derived_type:
1573                    model_value["type"] = derived_type
1574
1575            if self._is_component(model_value):
1576                model_args[model_field] = self._create_nested_component(
1577                    model, model_field, model_value, config
1578                )
1579            elif isinstance(model_value, list):
1580                vals = []
1581                for v in model_value:
1582                    if isinstance(v, dict) and "type" not in v and model_field in component_fields:
1583                        derived_type = self._derive_component_type_from_type_hints(
1584                            component_fields.get(model_field)
1585                        )
1586                        if derived_type:
1587                            v["type"] = derived_type
1588                    if self._is_component(v):
1589                        vals.append(self._create_nested_component(model, model_field, v, config))
1590                    else:
1591                        vals.append(v)
1592                model_args[model_field] = vals
1593
1594        kwargs = {
1595            class_field: model_args[class_field]
1596            for class_field in component_fields.keys()
1597            if class_field in model_args
1598        }
1599        return custom_component_class(**kwargs)
1600
1601    @staticmethod
1602    def _get_class_from_fully_qualified_class_name(
1603        full_qualified_class_name: str,
1604    ) -> Any:
1605        """Get a class from its fully qualified name.
1606
1607        If a custom components module is needed, we assume it is already registered - probably
1608        as `source_declarative_manifest.components` or `components`.
1609
1610        Args:
1611            full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
1612
1613        Returns:
1614            Any: The class object.
1615
1616        Raises:
1617            ValueError: If the class cannot be loaded.
1618        """
1619        split = full_qualified_class_name.split(".")
1620        module_name_full = ".".join(split[:-1])
1621        class_name = split[-1]
1622
1623        try:
1624            module_ref = importlib.import_module(module_name_full)
1625        except ModuleNotFoundError as e:
1626            if split[0] == "source_declarative_manifest":
1627                # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append
1628                try:
1629                    import os
1630
1631                    module_name_with_source_declarative_manifest = ".".join(split[1:-1])
1632                    module_ref = importlib.import_module(
1633                        module_name_with_source_declarative_manifest
1634                    )
1635                except ModuleNotFoundError:
1636                    raise ValueError(f"Could not load module `{module_name_full}`.") from e
1637            else:
1638                raise ValueError(f"Could not load module `{module_name_full}`.") from e
1639
1640        try:
1641            return getattr(module_ref, class_name)
1642        except AttributeError as e:
1643            raise ValueError(
1644                f"Could not load class `{class_name}` from module `{module_name_full}`.",
1645            ) from e
1646
1647    @staticmethod
1648    def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
1649        interface = field_type
1650        while True:
1651            origin = get_origin(interface)
1652            if origin:
1653                # Unnest types until we reach the raw type
1654                # List[T] -> T
1655                # Optional[List[T]] -> T
1656                args = get_args(interface)
1657                interface = args[0]
1658            else:
1659                break
1660        if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface):
1661            return interface.__name__
1662        return None
1663
1664    @staticmethod
1665    def is_builtin_type(cls: Optional[Type[Any]]) -> bool:
1666        if not cls:
1667            return False
1668        return cls.__module__ == "builtins"
1669
1670    @staticmethod
1671    def _extract_missing_parameters(error: TypeError) -> List[str]:
1672        parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error))
1673        if parameter_search:
1674            return re.findall(r"\'(.+?)\'", parameter_search.group(1))
1675        else:
1676            return []
1677
1678    def _create_nested_component(
1679        self, model: Any, model_field: str, model_value: Any, config: Config
1680    ) -> Any:
1681        type_name = model_value.get("type", None)
1682        if not type_name:
1683            # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent
1684            return model_value
1685
1686        model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None)
1687        if model_type:
1688            parsed_model = model_type.parse_obj(model_value)
1689            try:
1690                # To improve usability of the language, certain fields are shared between components. This can come in the form of
1691                # a parent component passing some of its fields to a child component or the parent extracting fields from other child
1692                # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base
1693                # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created
1694                # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that
1695                # are needed by a component and could not be shared.
1696                model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__)
1697                constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs
1698                model_parameters = model_value.get("$parameters", {})
1699                matching_parameters = {
1700                    kwarg: model_parameters[kwarg]
1701                    for kwarg in constructor_kwargs
1702                    if kwarg in model_parameters
1703                }
1704                return self._create_component_from_model(
1705                    model=parsed_model, config=config, **matching_parameters
1706                )
1707            except TypeError as error:
1708                missing_parameters = self._extract_missing_parameters(error)
1709                if missing_parameters:
1710                    raise ValueError(
1711                        f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide "
1712                        + ", ".join(
1713                            (
1714                                f"{type_name}.$parameters.{parameter}"
1715                                for parameter in missing_parameters
1716                            )
1717                        )
1718                    )
1719                raise TypeError(
1720                    f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}"
1721                )
1722        else:
1723            raise ValueError(
1724                f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'"
1725            )
1726
1727    @staticmethod
1728    def _is_component(model_value: Any) -> bool:
1729        return isinstance(model_value, dict) and model_value.get("type") is not None
1730
1731    def create_datetime_based_cursor(
1732        self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any
1733    ) -> DatetimeBasedCursor:
1734        start_datetime: Union[str, MinMaxDatetime] = (
1735            model.start_datetime
1736            if isinstance(model.start_datetime, str)
1737            else self.create_min_max_datetime(model.start_datetime, config)
1738        )
1739        end_datetime: Union[str, MinMaxDatetime, None] = None
1740        if model.is_data_feed and model.end_datetime:
1741            raise ValueError("Data feed does not support end_datetime")
1742        if model.is_data_feed and model.is_client_side_incremental:
1743            raise ValueError(
1744                "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them."
1745            )
1746        if model.end_datetime:
1747            end_datetime = (
1748                model.end_datetime
1749                if isinstance(model.end_datetime, str)
1750                else self.create_min_max_datetime(model.end_datetime, config)
1751            )
1752
1753        end_time_option = (
1754            self._create_component_from_model(
1755                model.end_time_option, config, parameters=model.parameters or {}
1756            )
1757            if model.end_time_option
1758            else None
1759        )
1760        start_time_option = (
1761            self._create_component_from_model(
1762                model.start_time_option, config, parameters=model.parameters or {}
1763            )
1764            if model.start_time_option
1765            else None
1766        )
1767
1768        return DatetimeBasedCursor(
1769            cursor_field=model.cursor_field,
1770            cursor_datetime_formats=model.cursor_datetime_formats
1771            if model.cursor_datetime_formats
1772            else [],
1773            cursor_granularity=model.cursor_granularity,
1774            datetime_format=model.datetime_format,
1775            end_datetime=end_datetime,
1776            start_datetime=start_datetime,
1777            step=model.step,
1778            end_time_option=end_time_option,
1779            lookback_window=model.lookback_window,
1780            start_time_option=start_time_option,
1781            partition_field_end=model.partition_field_end,
1782            partition_field_start=model.partition_field_start,
1783            message_repository=self._message_repository,
1784            is_compare_strictly=model.is_compare_strictly,
1785            config=config,
1786            parameters=model.parameters or {},
1787        )
1788
1789    def create_declarative_stream(
1790        self, model: DeclarativeStreamModel, config: Config, **kwargs: Any
1791    ) -> DeclarativeStream:
1792        # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
1793        # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
1794        # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
1795        # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one.
1796        combined_slicers = self._merge_stream_slicers(model=model, config=config)
1797
1798        primary_key = model.primary_key.__root__ if model.primary_key else None
1799        stop_condition_on_cursor = (
1800            model.incremental_sync
1801            and hasattr(model.incremental_sync, "is_data_feed")
1802            and model.incremental_sync.is_data_feed
1803        )
1804        client_side_incremental_sync = None
1805        if (
1806            model.incremental_sync
1807            and hasattr(model.incremental_sync, "is_client_side_incremental")
1808            and model.incremental_sync.is_client_side_incremental
1809        ):
1810            supported_slicers = (
1811                DatetimeBasedCursor,
1812                GlobalSubstreamCursor,
1813                PerPartitionWithGlobalCursor,
1814            )
1815            if combined_slicers and not isinstance(combined_slicers, supported_slicers):
1816                raise ValueError(
1817                    "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1818                )
1819            cursor = (
1820                combined_slicers
1821                if isinstance(
1822                    combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1823                )
1824                else self._create_component_from_model(model=model.incremental_sync, config=config)
1825            )
1826
1827            client_side_incremental_sync = {"cursor": cursor}
1828
1829        if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1830            cursor_model = model.incremental_sync
1831
1832            end_time_option = (
1833                self._create_component_from_model(
1834                    cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
1835                )
1836                if cursor_model.end_time_option
1837                else None
1838            )
1839            start_time_option = (
1840                self._create_component_from_model(
1841                    cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
1842                )
1843                if cursor_model.start_time_option
1844                else None
1845            )
1846
1847            request_options_provider = DatetimeBasedRequestOptionsProvider(
1848                start_time_option=start_time_option,
1849                end_time_option=end_time_option,
1850                partition_field_start=cursor_model.partition_field_end,
1851                partition_field_end=cursor_model.partition_field_end,
1852                config=config,
1853                parameters=model.parameters or {},
1854            )
1855        elif model.incremental_sync and isinstance(
1856            model.incremental_sync, IncrementingCountCursorModel
1857        ):
1858            cursor_model: IncrementingCountCursorModel = model.incremental_sync  # type: ignore
1859
1860            start_time_option = (
1861                self._create_component_from_model(
1862                    cursor_model.start_value_option,  # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1863                    config,
1864                    parameters=cursor_model.parameters or {},
1865                )
1866                if cursor_model.start_value_option  # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1867                else None
1868            )
1869
1870            # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
1871            # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
1872            partition_field_start = "start"
1873
1874            request_options_provider = DatetimeBasedRequestOptionsProvider(
1875                start_time_option=start_time_option,
1876                partition_field_start=partition_field_start,
1877                config=config,
1878                parameters=model.parameters or {},
1879            )
1880        else:
1881            request_options_provider = None
1882
1883        transformations = []
1884        if model.transformations:
1885            for transformation_model in model.transformations:
1886                transformations.append(
1887                    self._create_component_from_model(model=transformation_model, config=config)
1888                )
1889        file_uploader = None
1890        if model.file_uploader:
1891            file_uploader = self._create_component_from_model(
1892                model=model.file_uploader, config=config
1893            )
1894
1895        retriever = self._create_component_from_model(
1896            model=model.retriever,
1897            config=config,
1898            name=model.name,
1899            primary_key=primary_key,
1900            stream_slicer=combined_slicers,
1901            request_options_provider=request_options_provider,
1902            stop_condition_on_cursor=stop_condition_on_cursor,
1903            client_side_incremental_sync=client_side_incremental_sync,
1904            transformations=transformations,
1905            file_uploader=file_uploader,
1906            incremental_sync=model.incremental_sync,
1907        )
1908        cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
1909
1910        if model.state_migrations:
1911            state_transformations = [
1912                self._create_component_from_model(state_migration, config, declarative_stream=model)
1913                for state_migration in model.state_migrations
1914            ]
1915        else:
1916            state_transformations = []
1917
1918        schema_loader: Union[
1919            CompositeSchemaLoader,
1920            DefaultSchemaLoader,
1921            DynamicSchemaLoader,
1922            InlineSchemaLoader,
1923            JsonFileSchemaLoader,
1924        ]
1925        if model.schema_loader and isinstance(model.schema_loader, list):
1926            nested_schema_loaders = [
1927                self._create_component_from_model(model=nested_schema_loader, config=config)
1928                for nested_schema_loader in model.schema_loader
1929            ]
1930            schema_loader = CompositeSchemaLoader(
1931                schema_loaders=nested_schema_loaders, parameters={}
1932            )
1933        elif model.schema_loader:
1934            schema_loader = self._create_component_from_model(
1935                model=model.schema_loader,  # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models
1936                config=config,
1937            )
1938        else:
1939            options = model.parameters or {}
1940            if "name" not in options:
1941                options["name"] = model.name
1942            schema_loader = DefaultSchemaLoader(config=config, parameters=options)
1943
1944        return DeclarativeStream(
1945            name=model.name or "",
1946            primary_key=primary_key,
1947            retriever=retriever,
1948            schema_loader=schema_loader,
1949            stream_cursor_field=cursor_field or "",
1950            state_migrations=state_transformations,
1951            config=config,
1952            parameters=model.parameters or {},
1953        )
1954
1955    def _build_stream_slicer_from_partition_router(
1956        self,
1957        model: Union[
1958            AsyncRetrieverModel,
1959            CustomRetrieverModel,
1960            SimpleRetrieverModel,
1961        ],
1962        config: Config,
1963        stream_name: Optional[str] = None,
1964    ) -> Optional[PartitionRouter]:
1965        if (
1966            hasattr(model, "partition_router")
1967            and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
1968            and model.partition_router
1969        ):
1970            stream_slicer_model = model.partition_router
1971            if isinstance(stream_slicer_model, list):
1972                return CartesianProductStreamSlicer(
1973                    [
1974                        self._create_component_from_model(
1975                            model=slicer, config=config, stream_name=stream_name or ""
1976                        )
1977                        for slicer in stream_slicer_model
1978                    ],
1979                    parameters={},
1980                )
1981            else:
1982                return self._create_component_from_model(  # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
1983                    model=stream_slicer_model, config=config, stream_name=stream_name or ""
1984                )
1985        return None
1986
1987    def _build_incremental_cursor(
1988        self,
1989        model: DeclarativeStreamModel,
1990        stream_slicer: Optional[PartitionRouter],
1991        config: Config,
1992    ) -> Optional[StreamSlicer]:
1993        if model.incremental_sync and stream_slicer:
1994            if model.retriever.type == "AsyncRetriever":
1995                return self.create_concurrent_cursor_from_perpartition_cursor(  # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1996                    state_manager=self._connector_state_manager,
1997                    model_type=DatetimeBasedCursorModel,
1998                    component_definition=model.incremental_sync.__dict__,
1999                    stream_name=model.name or "",
2000                    stream_namespace=None,
2001                    config=config or {},
2002                    stream_state={},
2003                    partition_router=stream_slicer,
2004                )
2005
2006            incremental_sync_model = model.incremental_sync
2007            cursor_component = self._create_component_from_model(
2008                model=incremental_sync_model, config=config
2009            )
2010            is_global_cursor = (
2011                hasattr(incremental_sync_model, "global_substream_cursor")
2012                and incremental_sync_model.global_substream_cursor
2013            )
2014
2015            if is_global_cursor:
2016                return GlobalSubstreamCursor(
2017                    stream_cursor=cursor_component, partition_router=stream_slicer
2018                )
2019            return PerPartitionWithGlobalCursor(
2020                cursor_factory=CursorFactory(
2021                    lambda: self._create_component_from_model(
2022                        model=incremental_sync_model, config=config
2023                    ),
2024                ),
2025                partition_router=stream_slicer,
2026                stream_cursor=cursor_component,
2027            )
2028        elif model.incremental_sync:
2029            if model.retriever.type == "AsyncRetriever":
2030                return self.create_concurrent_cursor_from_datetime_based_cursor(  # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
2031                    model_type=DatetimeBasedCursorModel,
2032                    component_definition=model.incremental_sync.__dict__,
2033                    stream_name=model.name or "",
2034                    stream_namespace=None,
2035                    config=config or {},
2036                    stream_state_migrations=model.state_migrations,
2037                )
2038            return self._create_component_from_model(model=model.incremental_sync, config=config)  # type: ignore[no-any-return]  # Will be created Cursor as stream_slicer_model is model.incremental_sync
2039        return None
2040
2041    def _build_resumable_cursor(
2042        self,
2043        model: Union[
2044            AsyncRetrieverModel,
2045            CustomRetrieverModel,
2046            SimpleRetrieverModel,
2047        ],
2048        stream_slicer: Optional[PartitionRouter],
2049    ) -> Optional[StreamSlicer]:
2050        if hasattr(model, "paginator") and model.paginator and not stream_slicer:
2051            # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
2052            return ResumableFullRefreshCursor(parameters={})
2053        elif stream_slicer:
2054            # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
2055            return PerPartitionCursor(
2056                cursor_factory=CursorFactory(
2057                    create_function=partial(ChildPartitionResumableFullRefreshCursor, {})
2058                ),
2059                partition_router=stream_slicer,
2060            )
2061        return None
2062
2063    def _merge_stream_slicers(
2064        self, model: DeclarativeStreamModel, config: Config
2065    ) -> Optional[StreamSlicer]:
2066        retriever_model = model.retriever
2067
2068        stream_slicer = self._build_stream_slicer_from_partition_router(
2069            retriever_model, config, stream_name=model.name
2070        )
2071
2072        if retriever_model.type == "AsyncRetriever":
2073            is_not_datetime_cursor = (
2074                model.incremental_sync.type != "DatetimeBasedCursor"
2075                if model.incremental_sync
2076                else None
2077            )
2078            is_partition_router = (
2079                bool(retriever_model.partition_router) if model.incremental_sync else None
2080            )
2081
2082            if is_not_datetime_cursor:
2083                # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
2084                # support or unordered slices (for example, when we trigger reports for January and February, the report
2085                # in February can be completed first). Once we have support for custom concurrent cursor or have a new
2086                # implementation available in the CDK, we can enable more cursors here.
2087                raise ValueError(
2088                    "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
2089                )
2090
2091            if is_partition_router and not stream_slicer:
2092                # Note that this development is also done in parallel to the per partition development which once merged
2093                # we could support here by calling create_concurrent_cursor_from_perpartition_cursor
2094                raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
2095
2096        if model.incremental_sync:
2097            return self._build_incremental_cursor(model, stream_slicer, config)
2098
2099        return (
2100            stream_slicer
2101            if self._disable_resumable_full_refresh
2102            else self._build_resumable_cursor(retriever_model, stream_slicer)
2103        )
2104
2105    def create_default_error_handler(
2106        self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
2107    ) -> DefaultErrorHandler:
2108        backoff_strategies = []
2109        if model.backoff_strategies:
2110            for backoff_strategy_model in model.backoff_strategies:
2111                backoff_strategies.append(
2112                    self._create_component_from_model(model=backoff_strategy_model, config=config)
2113                )
2114
2115        response_filters = []
2116        if model.response_filters:
2117            for response_filter_model in model.response_filters:
2118                response_filters.append(
2119                    self._create_component_from_model(model=response_filter_model, config=config)
2120                )
2121        response_filters.append(
2122            HttpResponseFilter(config=config, parameters=model.parameters or {})
2123        )
2124
2125        return DefaultErrorHandler(
2126            backoff_strategies=backoff_strategies,
2127            max_retries=model.max_retries,
2128            response_filters=response_filters,
2129            config=config,
2130            parameters=model.parameters or {},
2131        )
2132
2133    def create_default_paginator(
2134        self,
2135        model: DefaultPaginatorModel,
2136        config: Config,
2137        *,
2138        url_base: str,
2139        extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None,
2140        decoder: Optional[Decoder] = None,
2141        cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
2142    ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
2143        if decoder:
2144            if self._is_supported_decoder_for_pagination(decoder):
2145                decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
2146            else:
2147                raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder)))
2148        else:
2149            decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
2150        page_size_option = (
2151            self._create_component_from_model(model=model.page_size_option, config=config)
2152            if model.page_size_option
2153            else None
2154        )
2155        page_token_option = (
2156            self._create_component_from_model(model=model.page_token_option, config=config)
2157            if model.page_token_option
2158            else None
2159        )
2160        pagination_strategy = self._create_component_from_model(
2161            model=model.pagination_strategy,
2162            config=config,
2163            decoder=decoder_to_use,
2164            extractor_model=extractor_model,
2165        )
2166        if cursor_used_for_stop_condition:
2167            pagination_strategy = StopConditionPaginationStrategyDecorator(
2168                pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition)
2169            )
2170        paginator = DefaultPaginator(
2171            decoder=decoder_to_use,
2172            page_size_option=page_size_option,
2173            page_token_option=page_token_option,
2174            pagination_strategy=pagination_strategy,
2175            url_base=url_base,
2176            config=config,
2177            parameters=model.parameters or {},
2178        )
2179        if self._limit_pages_fetched_per_slice:
2180            return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice)
2181        return paginator
2182
2183    def create_dpath_extractor(
2184        self,
2185        model: DpathExtractorModel,
2186        config: Config,
2187        decoder: Optional[Decoder] = None,
2188        **kwargs: Any,
2189    ) -> DpathExtractor:
2190        if decoder:
2191            decoder_to_use = decoder
2192        else:
2193            decoder_to_use = JsonDecoder(parameters={})
2194        model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
2195        return DpathExtractor(
2196            decoder=decoder_to_use,
2197            field_path=model_field_path,
2198            config=config,
2199            parameters=model.parameters or {},
2200        )
2201
2202    @staticmethod
2203    def create_response_to_file_extractor(
2204        model: ResponseToFileExtractorModel,
2205        **kwargs: Any,
2206    ) -> ResponseToFileExtractor:
2207        return ResponseToFileExtractor(parameters=model.parameters or {})
2208
2209    @staticmethod
2210    def create_exponential_backoff_strategy(
2211        model: ExponentialBackoffStrategyModel, config: Config
2212    ) -> ExponentialBackoffStrategy:
2213        return ExponentialBackoffStrategy(
2214            factor=model.factor or 5, parameters=model.parameters or {}, config=config
2215        )
2216
2217    @staticmethod
2218    def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey:
2219        return GroupByKey(model.key, config=config, parameters=model.parameters or {})
2220
2221    def create_http_requester(
2222        self,
2223        model: HttpRequesterModel,
2224        config: Config,
2225        decoder: Decoder = JsonDecoder(parameters={}),
2226        query_properties_key: Optional[str] = None,
2227        use_cache: Optional[bool] = None,
2228        *,
2229        name: str,
2230    ) -> HttpRequester:
2231        authenticator = (
2232            self._create_component_from_model(
2233                model=model.authenticator,
2234                config=config,
2235                url_base=model.url or model.url_base,
2236                name=name,
2237                decoder=decoder,
2238            )
2239            if model.authenticator
2240            else None
2241        )
2242        error_handler = (
2243            self._create_component_from_model(model=model.error_handler, config=config)
2244            if model.error_handler
2245            else DefaultErrorHandler(
2246                backoff_strategies=[],
2247                response_filters=[],
2248                config=config,
2249                parameters=model.parameters or {},
2250            )
2251        )
2252
2253        api_budget = self._api_budget
2254
2255        # Removes QueryProperties components from the interpolated mappings because it has been designed
2256        # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly
2257        # instead of through jinja interpolation
2258        request_parameters: Optional[Union[str, Mapping[str, str]]]
2259        if isinstance(model.request_parameters, Mapping):
2260            request_parameters = self._remove_query_properties(model.request_parameters)
2261        else:
2262            request_parameters = model.request_parameters
2263
2264        request_options_provider = InterpolatedRequestOptionsProvider(
2265            request_body=model.request_body,
2266            request_body_data=model.request_body_data,
2267            request_body_json=model.request_body_json,
2268            request_headers=model.request_headers,
2269            request_parameters=request_parameters,
2270            query_properties_key=query_properties_key,
2271            config=config,
2272            parameters=model.parameters or {},
2273        )
2274
2275        assert model.use_cache is not None  # for mypy
2276        assert model.http_method is not None  # for mypy
2277
2278        should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache
2279
2280        return HttpRequester(
2281            name=name,
2282            url=model.url,
2283            url_base=model.url_base,
2284            path=model.path,
2285            authenticator=authenticator,
2286            error_handler=error_handler,
2287            api_budget=api_budget,
2288            http_method=HttpMethod[model.http_method.value],
2289            request_options_provider=request_options_provider,
2290            config=config,
2291            disable_retries=self._disable_retries,
2292            parameters=model.parameters or {},
2293            message_repository=self._message_repository,
2294            use_cache=should_use_cache,
2295            decoder=decoder,
2296            stream_response=decoder.is_stream_response() if decoder else False,
2297        )
2298
2299    @staticmethod
2300    def create_http_response_filter(
2301        model: HttpResponseFilterModel, config: Config, **kwargs: Any
2302    ) -> HttpResponseFilter:
2303        if model.action:
2304            action = ResponseAction(model.action.value)
2305        else:
2306            action = None
2307
2308        failure_type = FailureType(model.failure_type.value) if model.failure_type else None
2309
2310        http_codes = (
2311            set(model.http_codes) if model.http_codes else set()
2312        )  # JSON schema notation has no set data type. The schema enforces an array of unique elements
2313
2314        return HttpResponseFilter(
2315            action=action,
2316            failure_type=failure_type,
2317            error_message=model.error_message or "",
2318            error_message_contains=model.error_message_contains or "",
2319            http_codes=http_codes,
2320            predicate=model.predicate or "",
2321            config=config,
2322            parameters=model.parameters or {},
2323        )
2324
2325    @staticmethod
2326    def create_inline_schema_loader(
2327        model: InlineSchemaLoaderModel, config: Config, **kwargs: Any
2328    ) -> InlineSchemaLoader:
2329        return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
2330
2331    def create_complex_field_type(
2332        self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
2333    ) -> ComplexFieldType:
2334        items = (
2335            self._create_component_from_model(model=model.items, config=config)
2336            if isinstance(model.items, ComplexFieldTypeModel)
2337            else model.items
2338        )
2339
2340        return ComplexFieldType(field_type=model.field_type, items=items)
2341
2342    def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
2343        target_type = (
2344            self._create_component_from_model(model=model.target_type, config=config)
2345            if isinstance(model.target_type, ComplexFieldTypeModel)
2346            else model.target_type
2347        )
2348
2349        return TypesMap(
2350            target_type=target_type,
2351            current_type=model.current_type,
2352            condition=model.condition if model.condition is not None else "True",
2353        )
2354
2355    def create_schema_type_identifier(
2356        self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
2357    ) -> SchemaTypeIdentifier:
2358        types_mapping = []
2359        if model.types_mapping:
2360            types_mapping.extend(
2361                [
2362                    self._create_component_from_model(types_map, config=config)
2363                    for types_map in model.types_mapping
2364                ]
2365            )
2366        model_schema_pointer: List[Union[InterpolatedString, str]] = (
2367            [x for x in model.schema_pointer] if model.schema_pointer else []
2368        )
2369        model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
2370        model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
2371            [x for x in model.type_pointer] if model.type_pointer else None
2372        )
2373
2374        return SchemaTypeIdentifier(
2375            schema_pointer=model_schema_pointer,
2376            key_pointer=model_key_pointer,
2377            type_pointer=model_type_pointer,
2378            types_mapping=types_mapping,
2379            parameters=model.parameters or {},
2380        )
2381
2382    def create_dynamic_schema_loader(
2383        self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
2384    ) -> DynamicSchemaLoader:
2385        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
2386        combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
2387
2388        schema_transformations = []
2389        if model.schema_transformations:
2390            for transformation_model in model.schema_transformations:
2391                schema_transformations.append(
2392                    self._create_component_from_model(model=transformation_model, config=config)
2393                )
2394
2395        retriever = self._create_component_from_model(
2396            model=model.retriever,
2397            config=config,
2398            name="dynamic_properties",
2399            primary_key=None,
2400            stream_slicer=combined_slicers,
2401            transformations=[],
2402            use_cache=True,
2403        )
2404        schema_type_identifier = self._create_component_from_model(
2405            model.schema_type_identifier, config=config, parameters=model.parameters or {}
2406        )
2407        return DynamicSchemaLoader(
2408            retriever=retriever,
2409            config=config,
2410            schema_transformations=schema_transformations,
2411            schema_type_identifier=schema_type_identifier,
2412            parameters=model.parameters or {},
2413        )
2414
2415    @staticmethod
2416    def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2417        return JsonDecoder(parameters={})
2418
2419    def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2420        return CompositeRawDecoder(
2421            parser=ModelToComponentFactory._get_parser(model, config),
2422            stream_response=False if self._emit_connector_builder_messages else True,
2423        )
2424
2425    def create_jsonl_decoder(
2426        self, model: JsonlDecoderModel, config: Config, **kwargs: Any
2427    ) -> Decoder:
2428        return CompositeRawDecoder(
2429            parser=ModelToComponentFactory._get_parser(model, config),
2430            stream_response=False if self._emit_connector_builder_messages else True,
2431        )
2432
2433    def create_gzip_decoder(
2434        self, model: GzipDecoderModel, config: Config, **kwargs: Any
2435    ) -> Decoder:
2436        _compressed_response_types = {
2437            "gzip",
2438            "x-gzip",
2439            "gzip, deflate",
2440            "x-gzip, deflate",
2441            "application/zip",
2442            "application/gzip",
2443            "application/x-gzip",
2444            "application/x-zip-compressed",
2445        }
2446
2447        gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config)  # type: ignore  # based on the model, we know this will be a GzipParser
2448
2449        if self._emit_connector_builder_messages:
2450            # This is very surprising but if the response is not streamed,
2451            # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw,
2452            # which uses urllib3 directly and does not uncompress the data.
2453            return CompositeRawDecoder(gzip_parser.inner_parser, False)
2454
2455        return CompositeRawDecoder.by_headers(
2456            [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
2457            stream_response=True,
2458            fallback_parser=gzip_parser.inner_parser,
2459        )
2460
2461    @staticmethod
2462    def create_incrementing_count_cursor(
2463        model: IncrementingCountCursorModel, config: Config, **kwargs: Any
2464    ) -> DatetimeBasedCursor:
2465        # This should not actually get used anywhere at runtime, but needed to add this to pass checks since
2466        # we still parse models into components. The issue is that there's no runtime implementation of a
2467        # IncrementingCountCursor.
2468        # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
2469        return DatetimeBasedCursor(
2470            cursor_field=model.cursor_field,
2471            datetime_format="%Y-%m-%d",
2472            start_datetime="2024-12-12",
2473            config=config,
2474            parameters={},
2475        )
2476
2477    @staticmethod
2478    def create_iterable_decoder(
2479        model: IterableDecoderModel, config: Config, **kwargs: Any
2480    ) -> IterableDecoder:
2481        return IterableDecoder(parameters={})
2482
2483    @staticmethod
2484    def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
2485        return XmlDecoder(parameters={})
2486
2487    def create_zipfile_decoder(
2488        self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
2489    ) -> ZipfileDecoder:
2490        return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config))
2491
2492    @staticmethod
2493    def _get_parser(model: BaseModel, config: Config) -> Parser:
2494        if isinstance(model, JsonDecoderModel):
2495            # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases
2496            return JsonParser()
2497        elif isinstance(model, JsonlDecoderModel):
2498            return JsonLineParser()
2499        elif isinstance(model, CsvDecoderModel):
2500            return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
2501        elif isinstance(model, GzipDecoderModel):
2502            return GzipParser(
2503                inner_parser=ModelToComponentFactory._get_parser(model.decoder, config)
2504            )
2505        elif isinstance(
2506            model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel)
2507        ):
2508            raise ValueError(f"Decoder type {model} does not have parser associated to it")
2509
2510        raise ValueError(f"Unknown decoder type {model}")
2511
2512    @staticmethod
2513    def create_json_file_schema_loader(
2514        model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
2515    ) -> JsonFileSchemaLoader:
2516        return JsonFileSchemaLoader(
2517            file_path=model.file_path or "", config=config, parameters=model.parameters or {}
2518        )
2519
2520    @staticmethod
2521    def create_jwt_authenticator(
2522        model: JwtAuthenticatorModel, config: Config, **kwargs: Any
2523    ) -> JwtAuthenticator:
2524        jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None)
2525        jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None)
2526        return JwtAuthenticator(
2527            config=config,
2528            parameters=model.parameters or {},
2529            algorithm=JwtAlgorithm(model.algorithm.value),
2530            secret_key=model.secret_key,
2531            base64_encode_secret_key=model.base64_encode_secret_key,
2532            token_duration=model.token_duration,
2533            header_prefix=model.header_prefix,
2534            kid=jwt_headers.kid,
2535            typ=jwt_headers.typ,
2536            cty=jwt_headers.cty,
2537            iss=jwt_payload.iss,
2538            sub=jwt_payload.sub,
2539            aud=jwt_payload.aud,
2540            additional_jwt_headers=model.additional_jwt_headers,
2541            additional_jwt_payload=model.additional_jwt_payload,
2542        )
2543
2544    def create_list_partition_router(
2545        self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
2546    ) -> ListPartitionRouter:
2547        request_option = (
2548            self._create_component_from_model(model.request_option, config)
2549            if model.request_option
2550            else None
2551        )
2552        return ListPartitionRouter(
2553            cursor_field=model.cursor_field,
2554            request_option=request_option,
2555            values=model.values,
2556            config=config,
2557            parameters=model.parameters or {},
2558        )
2559
2560    @staticmethod
2561    def create_min_max_datetime(
2562        model: MinMaxDatetimeModel, config: Config, **kwargs: Any
2563    ) -> MinMaxDatetime:
2564        return MinMaxDatetime(
2565            datetime=model.datetime,
2566            datetime_format=model.datetime_format or "",
2567            max_datetime=model.max_datetime or "",
2568            min_datetime=model.min_datetime or "",
2569            parameters=model.parameters or {},
2570        )
2571
2572    @staticmethod
2573    def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth:
2574        return NoAuth(parameters=model.parameters or {})
2575
2576    @staticmethod
2577    def create_no_pagination(
2578        model: NoPaginationModel, config: Config, **kwargs: Any
2579    ) -> NoPagination:
2580        return NoPagination(parameters={})
2581
2582    def create_oauth_authenticator(
2583        self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
2584    ) -> DeclarativeOauth2Authenticator:
2585        profile_assertion = (
2586            self._create_component_from_model(model.profile_assertion, config=config)
2587            if model.profile_assertion
2588            else None
2589        )
2590
2591        if model.refresh_token_updater:
2592            # ignore type error because fixing it would have a lot of dependencies, revisit later
2593            return DeclarativeSingleUseRefreshTokenOauth2Authenticator(  # type: ignore
2594                config,
2595                InterpolatedString.create(
2596                    model.token_refresh_endpoint,  # type: ignore
2597                    parameters=model.parameters or {},
2598                ).eval(config),
2599                access_token_name=InterpolatedString.create(
2600                    model.access_token_name or "access_token", parameters=model.parameters or {}
2601                ).eval(config),
2602                refresh_token_name=model.refresh_token_updater.refresh_token_name,
2603                expires_in_name=InterpolatedString.create(
2604                    model.expires_in_name or "expires_in", parameters=model.parameters or {}
2605                ).eval(config),
2606                client_id_name=InterpolatedString.create(
2607                    model.client_id_name or "client_id", parameters=model.parameters or {}
2608                ).eval(config),
2609                client_id=InterpolatedString.create(
2610                    model.client_id, parameters=model.parameters or {}
2611                ).eval(config)
2612                if model.client_id
2613                else model.client_id,
2614                client_secret_name=InterpolatedString.create(
2615                    model.client_secret_name or "client_secret", parameters=model.parameters or {}
2616                ).eval(config),
2617                client_secret=InterpolatedString.create(
2618                    model.client_secret, parameters=model.parameters or {}
2619                ).eval(config)
2620                if model.client_secret
2621                else model.client_secret,
2622                access_token_config_path=model.refresh_token_updater.access_token_config_path,
2623                refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
2624                token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
2625                grant_type_name=InterpolatedString.create(
2626                    model.grant_type_name or "grant_type", parameters=model.parameters or {}
2627                ).eval(config),
2628                grant_type=InterpolatedString.create(
2629                    model.grant_type or "refresh_token", parameters=model.parameters or {}
2630                ).eval(config),
2631                refresh_request_body=InterpolatedMapping(
2632                    model.refresh_request_body or {}, parameters=model.parameters or {}
2633                ).eval(config),
2634                refresh_request_headers=InterpolatedMapping(
2635                    model.refresh_request_headers or {}, parameters=model.parameters or {}
2636                ).eval(config),
2637                scopes=model.scopes,
2638                token_expiry_date_format=model.token_expiry_date_format,
2639                message_repository=self._message_repository,
2640                refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes,
2641                refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key,
2642                refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values,
2643            )
2644        # ignore type error because fixing it would have a lot of dependencies, revisit later
2645        return DeclarativeOauth2Authenticator(  # type: ignore
2646            access_token_name=model.access_token_name or "access_token",
2647            access_token_value=model.access_token_value,
2648            client_id_name=model.client_id_name or "client_id",
2649            client_id=model.client_id,
2650            client_secret_name=model.client_secret_name or "client_secret",
2651            client_secret=model.client_secret,
2652            expires_in_name=model.expires_in_name or "expires_in",
2653            grant_type_name=model.grant_type_name or "grant_type",
2654            grant_type=model.grant_type or "refresh_token",
2655            refresh_request_body=model.refresh_request_body,
2656            refresh_request_headers=model.refresh_request_headers,
2657            refresh_token_name=model.refresh_token_name or "refresh_token",
2658            refresh_token=model.refresh_token,
2659            scopes=model.scopes,
2660            token_expiry_date=model.token_expiry_date,
2661            token_expiry_date_format=model.token_expiry_date_format,
2662            token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
2663            token_refresh_endpoint=model.token_refresh_endpoint,
2664            config=config,
2665            parameters=model.parameters or {},
2666            message_repository=self._message_repository,
2667            profile_assertion=profile_assertion,
2668            use_profile_assertion=model.use_profile_assertion,
2669        )
2670
2671    def create_offset_increment(
2672        self,
2673        model: OffsetIncrementModel,
2674        config: Config,
2675        decoder: Decoder,
2676        extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None,
2677        **kwargs: Any,
2678    ) -> OffsetIncrement:
2679        if isinstance(decoder, PaginationDecoderDecorator):
2680            inner_decoder = decoder.decoder
2681        else:
2682            inner_decoder = decoder
2683            decoder = PaginationDecoderDecorator(decoder=decoder)
2684
2685        if self._is_supported_decoder_for_pagination(inner_decoder):
2686            decoder_to_use = decoder
2687        else:
2688            raise ValueError(
2689                self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
2690            )
2691
2692        # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever)
2693        # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the
2694        # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing
2695        # behavior by having two separate extractors with identical behavior since they use the same extractor model.
2696        # When we have more time to investigate we can look into reusing the same component.
2697        extractor = (
2698            self._create_component_from_model(
2699                model=extractor_model, config=config, decoder=decoder_to_use
2700            )
2701            if extractor_model
2702            else None
2703        )
2704
2705        return OffsetIncrement(
2706            page_size=model.page_size,
2707            config=config,
2708            decoder=decoder_to_use,
2709            extractor=extractor,
2710            inject_on_first_request=model.inject_on_first_request or False,
2711            parameters=model.parameters or {},
2712        )
2713
2714    @staticmethod
2715    def create_page_increment(
2716        model: PageIncrementModel, config: Config, **kwargs: Any
2717    ) -> PageIncrement:
2718        return PageIncrement(
2719            page_size=model.page_size,
2720            config=config,
2721            start_from_page=model.start_from_page or 0,
2722            inject_on_first_request=model.inject_on_first_request or False,
2723            parameters=model.parameters or {},
2724        )
2725
2726    def create_parent_stream_config(
2727        self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2728    ) -> ParentStreamConfig:
2729        declarative_stream = self._create_component_from_model(
2730            model.stream, config=config, **kwargs
2731        )
2732        request_option = (
2733            self._create_component_from_model(model.request_option, config=config)
2734            if model.request_option
2735            else None
2736        )
2737
2738        if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer):
2739            raise ValueError(
2740                "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed."
2741            )
2742
2743        model_lazy_read_pointer: List[Union[InterpolatedString, str]] = (
2744            [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else []
2745        )
2746
2747        return ParentStreamConfig(
2748            parent_key=model.parent_key,
2749            request_option=request_option,
2750            stream=declarative_stream,
2751            partition_field=model.partition_field,
2752            config=config,
2753            incremental_dependency=model.incremental_dependency or False,
2754            parameters=model.parameters or {},
2755            extra_fields=model.extra_fields,
2756            lazy_read_pointer=model_lazy_read_pointer,
2757        )
2758
2759    def create_properties_from_endpoint(
2760        self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any
2761    ) -> PropertiesFromEndpoint:
2762        retriever = self._create_component_from_model(
2763            model=model.retriever,
2764            config=config,
2765            name="dynamic_properties",
2766            primary_key=None,
2767            stream_slicer=None,
2768            transformations=[],
2769            use_cache=True,  # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different
2770        )
2771        return PropertiesFromEndpoint(
2772            property_field_path=model.property_field_path,
2773            retriever=retriever,
2774            config=config,
2775            parameters=model.parameters or {},
2776        )
2777
2778    def create_property_chunking(
2779        self, model: PropertyChunkingModel, config: Config, **kwargs: Any
2780    ) -> PropertyChunking:
2781        record_merge_strategy = (
2782            self._create_component_from_model(
2783                model=model.record_merge_strategy, config=config, **kwargs
2784            )
2785            if model.record_merge_strategy
2786            else None
2787        )
2788
2789        property_limit_type: PropertyLimitType
2790        match model.property_limit_type:
2791            case PropertyLimitTypeModel.property_count:
2792                property_limit_type = PropertyLimitType.property_count
2793            case PropertyLimitTypeModel.characters:
2794                property_limit_type = PropertyLimitType.characters
2795            case _:
2796                raise ValueError(f"Invalid PropertyLimitType {property_limit_type}")
2797
2798        return PropertyChunking(
2799            property_limit_type=property_limit_type,
2800            property_limit=model.property_limit,
2801            record_merge_strategy=record_merge_strategy,
2802            config=config,
2803            parameters=model.parameters or {},
2804        )
2805
2806    def create_query_properties(
2807        self, model: QueryPropertiesModel, config: Config, **kwargs: Any
2808    ) -> QueryProperties:
2809        if isinstance(model.property_list, list):
2810            property_list = model.property_list
2811        else:
2812            property_list = self._create_component_from_model(
2813                model=model.property_list, config=config, **kwargs
2814            )
2815
2816        property_chunking = (
2817            self._create_component_from_model(
2818                model=model.property_chunking, config=config, **kwargs
2819            )
2820            if model.property_chunking
2821            else None
2822        )
2823
2824        return QueryProperties(
2825            property_list=property_list,
2826            always_include_properties=model.always_include_properties,
2827            property_chunking=property_chunking,
2828            config=config,
2829            parameters=model.parameters or {},
2830        )
2831
2832    @staticmethod
2833    def create_record_filter(
2834        model: RecordFilterModel, config: Config, **kwargs: Any
2835    ) -> RecordFilter:
2836        return RecordFilter(
2837            condition=model.condition or "", config=config, parameters=model.parameters or {}
2838        )
2839
2840    @staticmethod
2841    def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath:
2842        return RequestPath(parameters={})
2843
2844    @staticmethod
2845    def create_request_option(
2846        model: RequestOptionModel, config: Config, **kwargs: Any
2847    ) -> RequestOption:
2848        inject_into = RequestOptionType(model.inject_into.value)
2849        field_path: Optional[List[Union[InterpolatedString, str]]] = (
2850            [
2851                InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
2852                for segment in model.field_path
2853            ]
2854            if model.field_path
2855            else None
2856        )
2857        field_name = (
2858            InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
2859            if model.field_name
2860            else None
2861        )
2862        return RequestOption(
2863            field_name=field_name,
2864            field_path=field_path,
2865            inject_into=inject_into,
2866            parameters=kwargs.get("parameters", {}),
2867        )
2868
2869    def create_record_selector(
2870        self,
2871        model: RecordSelectorModel,
2872        config: Config,
2873        *,
2874        name: str,
2875        transformations: List[RecordTransformation] | None = None,
2876        decoder: Decoder | None = None,
2877        client_side_incremental_sync: Dict[str, Any] | None = None,
2878        file_uploader: Optional[DefaultFileUploader] = None,
2879        **kwargs: Any,
2880    ) -> RecordSelector:
2881        extractor = self._create_component_from_model(
2882            model=model.extractor, decoder=decoder, config=config
2883        )
2884        record_filter = (
2885            self._create_component_from_model(model.record_filter, config=config)
2886            if model.record_filter
2887            else None
2888        )
2889
2890        transform_before_filtering = (
2891            False if model.transform_before_filtering is None else model.transform_before_filtering
2892        )
2893        if client_side_incremental_sync:
2894            record_filter = ClientSideIncrementalRecordFilterDecorator(
2895                config=config,
2896                parameters=model.parameters,
2897                condition=model.record_filter.condition
2898                if (model.record_filter and hasattr(model.record_filter, "condition"))
2899                else None,
2900                **client_side_incremental_sync,
2901            )
2902            transform_before_filtering = (
2903                True
2904                if model.transform_before_filtering is None
2905                else model.transform_before_filtering
2906            )
2907
2908        if model.schema_normalization is None:
2909            # default to no schema normalization if not set
2910            model.schema_normalization = SchemaNormalizationModel.None_
2911
2912        schema_normalization = (
2913            TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
2914            if isinstance(model.schema_normalization, SchemaNormalizationModel)
2915            else self._create_component_from_model(model.schema_normalization, config=config)  # type: ignore[arg-type] # custom normalization model expected here
2916        )
2917
2918        return RecordSelector(
2919            extractor=extractor,
2920            name=name,
2921            config=config,
2922            record_filter=record_filter,
2923            transformations=transformations or [],
2924            file_uploader=file_uploader,
2925            schema_normalization=schema_normalization,
2926            parameters=model.parameters or {},
2927            transform_before_filtering=transform_before_filtering,
2928        )
2929
2930    @staticmethod
2931    def create_remove_fields(
2932        model: RemoveFieldsModel, config: Config, **kwargs: Any
2933    ) -> RemoveFields:
2934        return RemoveFields(
2935            field_pointers=model.field_pointers, condition=model.condition or "", parameters={}
2936        )
2937
2938    def create_selective_authenticator(
2939        self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any
2940    ) -> DeclarativeAuthenticator:
2941        authenticators = {
2942            name: self._create_component_from_model(model=auth, config=config)
2943            for name, auth in model.authenticators.items()
2944        }
2945        # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error
2946        return SelectiveAuthenticator(  # type: ignore[abstract]
2947            config=config,
2948            authenticators=authenticators,
2949            authenticator_selection_path=model.authenticator_selection_path,
2950            **kwargs,
2951        )
2952
2953    @staticmethod
2954    def create_legacy_session_token_authenticator(
2955        model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any
2956    ) -> LegacySessionTokenAuthenticator:
2957        return LegacySessionTokenAuthenticator(
2958            api_url=url_base,
2959            header=model.header,
2960            login_url=model.login_url,
2961            password=model.password or "",
2962            session_token=model.session_token or "",
2963            session_token_response_key=model.session_token_response_key or "",
2964            username=model.username or "",
2965            validate_session_url=model.validate_session_url,
2966            config=config,
2967            parameters=model.parameters or {},
2968        )
2969
2970    def create_simple_retriever(
2971        self,
2972        model: SimpleRetrieverModel,
2973        config: Config,
2974        *,
2975        name: str,
2976        primary_key: Optional[Union[str, List[str], List[List[str]]]],
2977        stream_slicer: Optional[StreamSlicer],
2978        request_options_provider: Optional[RequestOptionsProvider] = None,
2979        stop_condition_on_cursor: bool = False,
2980        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
2981        transformations: List[RecordTransformation],
2982        file_uploader: Optional[DefaultFileUploader] = None,
2983        incremental_sync: Optional[
2984            Union[
2985                IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
2986            ]
2987        ] = None,
2988        use_cache: Optional[bool] = None,
2989        **kwargs: Any,
2990    ) -> SimpleRetriever:
2991        def _get_url() -> str:
2992            """
2993            Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever.
2994            This is needed because the URL is not set until the requester is created.
2995            """
2996
2997            _url = (
2998                model.requester.url
2999                if hasattr(model.requester, "url") and model.requester.url is not None
3000                else requester.get_url()
3001            )
3002            _url_base = (
3003                model.requester.url_base
3004                if hasattr(model.requester, "url_base") and model.requester.url_base is not None
3005                else requester.get_url_base()
3006            )
3007
3008            return _url or _url_base
3009
3010        decoder = (
3011            self._create_component_from_model(model=model.decoder, config=config)
3012            if model.decoder
3013            else JsonDecoder(parameters={})
3014        )
3015        record_selector = self._create_component_from_model(
3016            model=model.record_selector,
3017            name=name,
3018            config=config,
3019            decoder=decoder,
3020            transformations=transformations,
3021            client_side_incremental_sync=client_side_incremental_sync,
3022            file_uploader=file_uploader,
3023        )
3024
3025        query_properties: Optional[QueryProperties] = None
3026        query_properties_key: Optional[str] = None
3027        if self._query_properties_in_request_parameters(model.requester):
3028            # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple
3029            # places instead of default to request_parameters which isn't clearly documented
3030            if (
3031                hasattr(model.requester, "fetch_properties_from_endpoint")
3032                and model.requester.fetch_properties_from_endpoint
3033            ):
3034                raise ValueError(
3035                    f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters"
3036                )
3037
3038            query_properties_definitions = []
3039            for key, request_parameter in model.requester.request_parameters.items():  # type: ignore # request_parameters is already validated to be a Mapping using _query_properties_in_request_parameters()
3040                if isinstance(request_parameter, QueryPropertiesModel):
3041                    query_properties_key = key
3042                    query_properties_definitions.append(request_parameter)
3043
3044            if len(query_properties_definitions) > 1:
3045                raise ValueError(
3046                    f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages"
3047                )
3048
3049            if len(query_properties_definitions) == 1:
3050                query_properties = self._create_component_from_model(
3051                    model=query_properties_definitions[0], config=config
3052                )
3053        elif (
3054            hasattr(model.requester, "fetch_properties_from_endpoint")
3055            and model.requester.fetch_properties_from_endpoint
3056        ):
3057            query_properties_definition = QueryPropertiesModel(
3058                type="QueryProperties",
3059                property_list=model.requester.fetch_properties_from_endpoint,
3060                always_include_properties=None,
3061                property_chunking=None,
3062            )  # type: ignore # $parameters has a default value
3063
3064            query_properties = self.create_query_properties(
3065                model=query_properties_definition,
3066                config=config,
3067            )
3068
3069        requester = self._create_component_from_model(
3070            model=model.requester,
3071            decoder=decoder,
3072            name=name,
3073            query_properties_key=query_properties_key,
3074            use_cache=use_cache,
3075            config=config,
3076        )
3077
3078        # Define cursor only if per partition or common incremental support is needed
3079        cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
3080
3081        if (
3082            not isinstance(stream_slicer, DatetimeBasedCursor)
3083            or type(stream_slicer) is not DatetimeBasedCursor
3084        ):
3085            # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
3086            # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
3087            # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
3088            # request_options_provider
3089            request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
3090        elif not request_options_provider:
3091            request_options_provider = DefaultRequestOptionsProvider(parameters={})
3092
3093        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3094
3095        cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None
3096        paginator = (
3097            self._create_component_from_model(
3098                model=model.paginator,
3099                config=config,
3100                url_base=_get_url(),
3101                extractor_model=model.record_selector.extractor,
3102                decoder=decoder,
3103                cursor_used_for_stop_condition=cursor_used_for_stop_condition,
3104            )
3105            if model.paginator
3106            else NoPagination(parameters={})
3107        )
3108
3109        ignore_stream_slicer_parameters_on_paginated_requests = (
3110            model.ignore_stream_slicer_parameters_on_paginated_requests or False
3111        )
3112
3113        if (
3114            model.partition_router
3115            and isinstance(model.partition_router, SubstreamPartitionRouterModel)
3116            and not bool(self._connector_state_manager.get_stream_state(name, None))
3117            and any(
3118                parent_stream_config.lazy_read_pointer
3119                for parent_stream_config in model.partition_router.parent_stream_configs
3120            )
3121        ):
3122            if incremental_sync:
3123                if incremental_sync.type != "DatetimeBasedCursor":
3124                    raise ValueError(
3125                        f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}."
3126                    )
3127
3128                elif incremental_sync.step or incremental_sync.cursor_granularity:
3129                    raise ValueError(
3130                        f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}."
3131                    )
3132
3133            if model.decoder and model.decoder.type != "JsonDecoder":
3134                raise ValueError(
3135                    f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}."
3136                )
3137
3138            return LazySimpleRetriever(
3139                name=name,
3140                paginator=paginator,
3141                primary_key=primary_key,
3142                requester=requester,
3143                record_selector=record_selector,
3144                stream_slicer=stream_slicer,
3145                request_option_provider=request_options_provider,
3146                cursor=cursor,
3147                config=config,
3148                ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3149                parameters=model.parameters or {},
3150            )
3151
3152        if self._limit_slices_fetched or self._emit_connector_builder_messages:
3153            return SimpleRetrieverTestReadDecorator(
3154                name=name,
3155                paginator=paginator,
3156                primary_key=primary_key,
3157                requester=requester,
3158                record_selector=record_selector,
3159                stream_slicer=stream_slicer,
3160                request_option_provider=request_options_provider,
3161                cursor=cursor,
3162                config=config,
3163                maximum_number_of_slices=self._limit_slices_fetched or 5,
3164                ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3165                parameters=model.parameters or {},
3166            )
3167        return SimpleRetriever(
3168            name=name,
3169            paginator=paginator,
3170            primary_key=primary_key,
3171            requester=requester,
3172            record_selector=record_selector,
3173            stream_slicer=stream_slicer,
3174            request_option_provider=request_options_provider,
3175            cursor=cursor,
3176            config=config,
3177            ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3178            additional_query_properties=query_properties,
3179            parameters=model.parameters or {},
3180        )
3181
3182    @staticmethod
3183    def _query_properties_in_request_parameters(
3184        requester: Union[HttpRequesterModel, CustomRequesterModel],
3185    ) -> bool:
3186        if not hasattr(requester, "request_parameters"):
3187            return False
3188        request_parameters = requester.request_parameters
3189        if request_parameters and isinstance(request_parameters, Mapping):
3190            for request_parameter in request_parameters.values():
3191                if isinstance(request_parameter, QueryPropertiesModel):
3192                    return True
3193        return False
3194
3195    @staticmethod
3196    def _remove_query_properties(
3197        request_parameters: Mapping[str, Union[str, QueryPropertiesModel]],
3198    ) -> Mapping[str, str]:
3199        return {
3200            parameter_field: request_parameter
3201            for parameter_field, request_parameter in request_parameters.items()
3202            if not isinstance(request_parameter, QueryPropertiesModel)
3203        }
3204
3205    def create_state_delegating_stream(
3206        self,
3207        model: StateDelegatingStreamModel,
3208        config: Config,
3209        has_parent_state: Optional[bool] = None,
3210        **kwargs: Any,
3211    ) -> DeclarativeStream:
3212        if (
3213            model.full_refresh_stream.name != model.name
3214            or model.name != model.incremental_stream.name
3215        ):
3216            raise ValueError(
3217                f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
3218            )
3219
3220        stream_model = (
3221            model.incremental_stream
3222            if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
3223            else model.full_refresh_stream
3224        )
3225
3226        return self._create_component_from_model(stream_model, config=config, **kwargs)  # type: ignore[no-any-return]  # Will be created DeclarativeStream as stream_model is stream description
3227
3228    def _create_async_job_status_mapping(
3229        self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
3230    ) -> Mapping[str, AsyncJobStatus]:
3231        api_status_to_cdk_status = {}
3232        for cdk_status, api_statuses in model.dict().items():
3233            if cdk_status == "type":
3234                # This is an element of the dict because of the typing of the CDK but it is not a CDK status
3235                continue
3236
3237            for status in api_statuses:
3238                if status in api_status_to_cdk_status:
3239                    raise ValueError(
3240                        f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once"
3241                    )
3242                api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status)
3243        return api_status_to_cdk_status
3244
3245    def _get_async_job_status(self, status: str) -> AsyncJobStatus:
3246        match status:
3247            case "running":
3248                return AsyncJobStatus.RUNNING
3249            case "completed":
3250                return AsyncJobStatus.COMPLETED
3251            case "failed":
3252                return AsyncJobStatus.FAILED
3253            case "timeout":
3254                return AsyncJobStatus.TIMED_OUT
3255            case _:
3256                raise ValueError(f"Unsupported CDK status {status}")
3257
3258    def create_async_retriever(
3259        self,
3260        model: AsyncRetrieverModel,
3261        config: Config,
3262        *,
3263        name: str,
3264        primary_key: Optional[
3265            Union[str, List[str], List[List[str]]]
3266        ],  # this seems to be needed to match create_simple_retriever
3267        stream_slicer: Optional[StreamSlicer],
3268        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
3269        transformations: List[RecordTransformation],
3270        **kwargs: Any,
3271    ) -> AsyncRetriever:
3272        def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever:
3273            record_selector = RecordSelector(
3274                extractor=download_extractor,
3275                name=name,
3276                record_filter=None,
3277                transformations=transformations,
3278                schema_normalization=TypeTransformer(TransformConfig.NoTransform),
3279                config=config,
3280                parameters={},
3281            )
3282            paginator = (
3283                self._create_component_from_model(
3284                    model=model.download_paginator,
3285                    decoder=decoder,
3286                    config=config,
3287                    url_base="",
3288                )
3289                if model.download_paginator
3290                else NoPagination(parameters={})
3291            )
3292            maximum_number_of_slices = self._limit_slices_fetched or 5
3293
3294            if self._limit_slices_fetched or self._emit_connector_builder_messages:
3295                return SimpleRetrieverTestReadDecorator(
3296                    requester=download_requester,
3297                    record_selector=record_selector,
3298                    primary_key=None,
3299                    name=job_download_components_name,
3300                    paginator=paginator,
3301                    config=config,
3302                    parameters={},
3303                    maximum_number_of_slices=maximum_number_of_slices,
3304                )
3305
3306            return SimpleRetriever(
3307                requester=download_requester,
3308                record_selector=record_selector,
3309                primary_key=None,
3310                name=job_download_components_name,
3311                paginator=paginator,
3312                config=config,
3313                parameters={},
3314            )
3315
3316        def _get_job_timeout() -> datetime.timedelta:
3317            user_defined_timeout: Optional[int] = (
3318                int(
3319                    InterpolatedString.create(
3320                        str(model.polling_job_timeout),
3321                        parameters={},
3322                    ).eval(config)
3323                )
3324                if model.polling_job_timeout
3325                else None
3326            )
3327
3328            # check for user defined timeout during the test read or 15 minutes
3329            test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15)
3330            # default value for non-connector builder is 60 minutes.
3331            default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60)
3332
3333            return (
3334                test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout
3335            )
3336
3337        decoder = (
3338            self._create_component_from_model(model=model.decoder, config=config)
3339            if model.decoder
3340            else JsonDecoder(parameters={})
3341        )
3342        record_selector = self._create_component_from_model(
3343            model=model.record_selector,
3344            config=config,
3345            decoder=decoder,
3346            name=name,
3347            transformations=transformations,
3348            client_side_incremental_sync=client_side_incremental_sync,
3349        )
3350        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3351        creation_requester = self._create_component_from_model(
3352            model=model.creation_requester,
3353            decoder=decoder,
3354            config=config,
3355            name=f"job creation - {name}",
3356        )
3357        polling_requester = self._create_component_from_model(
3358            model=model.polling_requester,
3359            decoder=decoder,
3360            config=config,
3361            name=f"job polling - {name}",
3362        )
3363        job_download_components_name = f"job download - {name}"
3364        download_decoder = (
3365            self._create_component_from_model(model=model.download_decoder, config=config)
3366            if model.download_decoder
3367            else JsonDecoder(parameters={})
3368        )
3369        download_extractor = (
3370            self._create_component_from_model(
3371                model=model.download_extractor,
3372                config=config,
3373                decoder=download_decoder,
3374                parameters=model.parameters,
3375            )
3376            if model.download_extractor
3377            else DpathExtractor(
3378                [],
3379                config=config,
3380                decoder=download_decoder,
3381                parameters=model.parameters or {},
3382            )
3383        )
3384        download_requester = self._create_component_from_model(
3385            model=model.download_requester,
3386            decoder=download_decoder,
3387            config=config,
3388            name=job_download_components_name,
3389        )
3390        download_retriever = _get_download_retriever()
3391        abort_requester = (
3392            self._create_component_from_model(
3393                model=model.abort_requester,
3394                decoder=decoder,
3395                config=config,
3396                name=f"job abort - {name}",
3397            )
3398            if model.abort_requester
3399            else None
3400        )
3401        delete_requester = (
3402            self._create_component_from_model(
3403                model=model.delete_requester,
3404                decoder=decoder,
3405                config=config,
3406                name=f"job delete - {name}",
3407            )
3408            if model.delete_requester
3409            else None
3410        )
3411        download_target_requester = (
3412            self._create_component_from_model(
3413                model=model.download_target_requester,
3414                decoder=decoder,
3415                config=config,
3416                name=f"job extract_url - {name}",
3417            )
3418            if model.download_target_requester
3419            else None
3420        )
3421        status_extractor = self._create_component_from_model(
3422            model=model.status_extractor, decoder=decoder, config=config, name=name
3423        )
3424        download_target_extractor = self._create_component_from_model(
3425            model=model.download_target_extractor,
3426            decoder=decoder,
3427            config=config,
3428            name=name,
3429        )
3430
3431        job_repository: AsyncJobRepository = AsyncHttpJobRepository(
3432            creation_requester=creation_requester,
3433            polling_requester=polling_requester,
3434            download_retriever=download_retriever,
3435            download_target_requester=download_target_requester,
3436            abort_requester=abort_requester,
3437            delete_requester=delete_requester,
3438            status_extractor=status_extractor,
3439            status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
3440            download_target_extractor=download_target_extractor,
3441            job_timeout=_get_job_timeout(),
3442        )
3443
3444        async_job_partition_router = AsyncJobPartitionRouter(
3445            job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
3446                job_repository,
3447                stream_slices,
3448                self._job_tracker,
3449                self._message_repository,
3450                # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
3451                has_bulk_parent=False,
3452                # set the `job_max_retry` to 1 for the `Connector Builder`` use-case.
3453                # `None` == default retry is set to 3 attempts, under the hood.
3454                job_max_retry=1 if self._emit_connector_builder_messages else None,
3455            ),
3456            stream_slicer=stream_slicer,
3457            config=config,
3458            parameters=model.parameters or {},
3459        )
3460
3461        return AsyncRetriever(
3462            record_selector=record_selector,
3463            stream_slicer=async_job_partition_router,
3464            config=config,
3465            parameters=model.parameters or {},
3466        )
3467
3468    @staticmethod
3469    def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
3470        return Spec(
3471            connection_specification=model.connection_specification,
3472            documentation_url=model.documentation_url,
3473            advanced_auth=model.advanced_auth,
3474            parameters={},
3475        )
3476
3477    def create_substream_partition_router(
3478        self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any
3479    ) -> SubstreamPartitionRouter:
3480        parent_stream_configs = []
3481        if model.parent_stream_configs:
3482            parent_stream_configs.extend(
3483                [
3484                    self._create_message_repository_substream_wrapper(
3485                        model=parent_stream_config, config=config, **kwargs
3486                    )
3487                    for parent_stream_config in model.parent_stream_configs
3488                ]
3489            )
3490
3491        return SubstreamPartitionRouter(
3492            parent_stream_configs=parent_stream_configs,
3493            parameters=model.parameters or {},
3494            config=config,
3495        )
3496
3497    def _create_message_repository_substream_wrapper(
3498        self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
3499    ) -> Any:
3500        substream_factory = ModelToComponentFactory(
3501            limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
3502            limit_slices_fetched=self._limit_slices_fetched,
3503            emit_connector_builder_messages=self._emit_connector_builder_messages,
3504            disable_retries=self._disable_retries,
3505            disable_cache=self._disable_cache,
3506            message_repository=LogAppenderMessageRepositoryDecorator(
3507                {"airbyte_cdk": {"stream": {"is_substream": True}}, "http": {"is_auxiliary": True}},
3508                self._message_repository,
3509                self._evaluate_log_level(self._emit_connector_builder_messages),
3510            ),
3511        )
3512
3513        # This flag will be used exclusively for StateDelegatingStream when a parent stream is created
3514        has_parent_state = bool(
3515            self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
3516            if model.incremental_dependency
3517            else False
3518        )
3519        return substream_factory._create_component_from_model(
3520            model=model, config=config, has_parent_state=has_parent_state, **kwargs
3521        )
3522
3523    @staticmethod
3524    def create_wait_time_from_header(
3525        model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
3526    ) -> WaitTimeFromHeaderBackoffStrategy:
3527        return WaitTimeFromHeaderBackoffStrategy(
3528            header=model.header,
3529            parameters=model.parameters or {},
3530            config=config,
3531            regex=model.regex,
3532            max_waiting_time_in_seconds=model.max_waiting_time_in_seconds
3533            if model.max_waiting_time_in_seconds is not None
3534            else None,
3535        )
3536
3537    @staticmethod
3538    def create_wait_until_time_from_header(
3539        model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any
3540    ) -> WaitUntilTimeFromHeaderBackoffStrategy:
3541        return WaitUntilTimeFromHeaderBackoffStrategy(
3542            header=model.header,
3543            parameters=model.parameters or {},
3544            config=config,
3545            min_wait=model.min_wait,
3546            regex=model.regex,
3547        )
3548
3549    def get_message_repository(self) -> MessageRepository:
3550        return self._message_repository
3551
3552    def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level:
3553        return Level.DEBUG if emit_connector_builder_messages else Level.INFO
3554
3555    @staticmethod
3556    def create_components_mapping_definition(
3557        model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any
3558    ) -> ComponentMappingDefinition:
3559        interpolated_value = InterpolatedString.create(
3560            model.value, parameters=model.parameters or {}
3561        )
3562        field_path = [
3563            InterpolatedString.create(path, parameters=model.parameters or {})
3564            for path in model.field_path
3565        ]
3566        return ComponentMappingDefinition(
3567            field_path=field_path,  # type: ignore[arg-type] # field_path can be str and InterpolatedString
3568            value=interpolated_value,
3569            value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
3570            parameters=model.parameters or {},
3571        )
3572
3573    def create_http_components_resolver(
3574        self, model: HttpComponentsResolverModel, config: Config
3575    ) -> Any:
3576        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
3577        combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
3578
3579        retriever = self._create_component_from_model(
3580            model=model.retriever,
3581            config=config,
3582            name="",
3583            primary_key=None,
3584            stream_slicer=stream_slicer if stream_slicer else combined_slicers,
3585            transformations=[],
3586        )
3587
3588        components_mapping = [
3589            self._create_component_from_model(
3590                model=components_mapping_definition_model,
3591                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3592                    components_mapping_definition_model.value_type
3593                ),
3594                config=config,
3595            )
3596            for components_mapping_definition_model in model.components_mapping
3597        ]
3598
3599        return HttpComponentsResolver(
3600            retriever=retriever,
3601            config=config,
3602            components_mapping=components_mapping,
3603            parameters=model.parameters or {},
3604        )
3605
3606    @staticmethod
3607    def create_stream_config(
3608        model: StreamConfigModel, config: Config, **kwargs: Any
3609    ) -> StreamConfig:
3610        model_configs_pointer: List[Union[InterpolatedString, str]] = (
3611            [x for x in model.configs_pointer] if model.configs_pointer else []
3612        )
3613
3614        return StreamConfig(
3615            configs_pointer=model_configs_pointer,
3616            parameters=model.parameters or {},
3617        )
3618
3619    def create_config_components_resolver(
3620        self, model: ConfigComponentsResolverModel, config: Config
3621    ) -> Any:
3622        stream_config = self._create_component_from_model(
3623            model.stream_config, config=config, parameters=model.parameters or {}
3624        )
3625
3626        components_mapping = [
3627            self._create_component_from_model(
3628                model=components_mapping_definition_model,
3629                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3630                    components_mapping_definition_model.value_type
3631                ),
3632                config=config,
3633            )
3634            for components_mapping_definition_model in model.components_mapping
3635        ]
3636
3637        return ConfigComponentsResolver(
3638            stream_config=stream_config,
3639            config=config,
3640            components_mapping=components_mapping,
3641            parameters=model.parameters or {},
3642        )
3643
3644    _UNSUPPORTED_DECODER_ERROR = (
3645        "Specified decoder of {decoder_type} is not supported for pagination."
3646        "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead."
3647        "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`."
3648    )
3649
3650    def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool:
3651        if isinstance(decoder, (JsonDecoder, XmlDecoder)):
3652            return True
3653        elif isinstance(decoder, CompositeRawDecoder):
3654            return self._is_supported_parser_for_pagination(decoder.parser)
3655        else:
3656            return False
3657
3658    def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
3659        if isinstance(parser, JsonParser):
3660            return True
3661        elif isinstance(parser, GzipParser):
3662            return isinstance(parser.inner_parser, JsonParser)
3663        else:
3664            return False
3665
3666    def create_http_api_budget(
3667        self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
3668    ) -> HttpAPIBudget:
3669        policies = [
3670            self._create_component_from_model(model=policy, config=config)
3671            for policy in model.policies
3672        ]
3673
3674        return HttpAPIBudget(
3675            policies=policies,
3676            ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
3677            ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
3678            status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
3679        )
3680
3681    def create_fixed_window_call_rate_policy(
3682        self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
3683    ) -> FixedWindowCallRatePolicy:
3684        matchers = [
3685            self._create_component_from_model(model=matcher, config=config)
3686            for matcher in model.matchers
3687        ]
3688
3689        # Set the initial reset timestamp to 10 days from now.
3690        # This value will be updated by the first request.
3691        return FixedWindowCallRatePolicy(
3692            next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
3693            period=parse_duration(model.period),
3694            call_limit=model.call_limit,
3695            matchers=matchers,
3696        )
3697
3698    def create_file_uploader(
3699        self, model: FileUploaderModel, config: Config, **kwargs: Any
3700    ) -> FileUploader:
3701        name = "File Uploader"
3702        requester = self._create_component_from_model(
3703            model=model.requester,
3704            config=config,
3705            name=name,
3706            **kwargs,
3707        )
3708        download_target_extractor = self._create_component_from_model(
3709            model=model.download_target_extractor,
3710            config=config,
3711            name=name,
3712            **kwargs,
3713        )
3714        emit_connector_builder_messages = self._emit_connector_builder_messages
3715        file_uploader = DefaultFileUploader(
3716            requester=requester,
3717            download_target_extractor=download_target_extractor,
3718            config=config,
3719            file_writer=NoopFileWriter()
3720            if emit_connector_builder_messages
3721            else LocalFileSystemFileWriter(),
3722            parameters=model.parameters or {},
3723            filename_extractor=model.filename_extractor if model.filename_extractor else None,
3724        )
3725
3726        return (
3727            ConnectorBuilderFileUploader(file_uploader)
3728            if emit_connector_builder_messages
3729            else file_uploader
3730        )
3731
3732    def create_moving_window_call_rate_policy(
3733        self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
3734    ) -> MovingWindowCallRatePolicy:
3735        rates = [
3736            self._create_component_from_model(model=rate, config=config) for rate in model.rates
3737        ]
3738        matchers = [
3739            self._create_component_from_model(model=matcher, config=config)
3740            for matcher in model.matchers
3741        ]
3742        return MovingWindowCallRatePolicy(
3743            rates=rates,
3744            matchers=matchers,
3745        )
3746
3747    def create_unlimited_call_rate_policy(
3748        self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
3749    ) -> UnlimitedCallRatePolicy:
3750        matchers = [
3751            self._create_component_from_model(model=matcher, config=config)
3752            for matcher in model.matchers
3753        ]
3754
3755        return UnlimitedCallRatePolicy(
3756            matchers=matchers,
3757        )
3758
3759    def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
3760        interpolated_limit = InterpolatedString.create(str(model.limit), parameters={})
3761        return Rate(
3762            limit=int(interpolated_limit.eval(config=config)),
3763            interval=parse_duration(model.interval),
3764        )
3765
3766    def create_http_request_matcher(
3767        self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
3768    ) -> HttpRequestRegexMatcher:
3769        return HttpRequestRegexMatcher(
3770            method=model.method,
3771            url_base=model.url_base,
3772            url_path_pattern=model.url_path_pattern,
3773            params=model.params,
3774            headers=model.headers,
3775        )
3776
3777    def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
3778        self._api_budget = self.create_component(
3779            model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
3780        )
3781
3782    def create_grouping_partition_router(
3783        self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
3784    ) -> GroupingPartitionRouter:
3785        underlying_router = self._create_component_from_model(
3786            model=model.underlying_partition_router, config=config
3787        )
3788        if model.group_size < 1:
3789            raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
3790
3791        # Request options in underlying partition routers are not supported for GroupingPartitionRouter
3792        # because they are specific to individual partitions and cannot be aggregated or handled
3793        # when grouping, potentially leading to incorrect API calls. Any request customization
3794        # should be managed at the stream level through the requester's configuration.
3795        if isinstance(underlying_router, SubstreamPartitionRouter):
3796            if any(
3797                parent_config.request_option
3798                for parent_config in underlying_router.parent_stream_configs
3799            ):
3800                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
3801
3802        if isinstance(underlying_router, ListPartitionRouter):
3803            if underlying_router.request_option:
3804                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
3805
3806        return GroupingPartitionRouter(
3807            group_size=model.group_size,
3808            underlying_partition_router=underlying_router,
3809            deduplicate=model.deduplicate if model.deduplicate is not None else True,
3810            config=config,
3811        )
ModelToComponentFactory( limit_pages_fetched_per_slice: Optional[int] = None, limit_slices_fetched: Optional[int] = None, emit_connector_builder_messages: bool = False, disable_retries: bool = False, disable_cache: bool = False, disable_resumable_full_refresh: bool = False, message_repository: Optional[airbyte_cdk.MessageRepository] = None, connector_state_manager: Optional[airbyte_cdk.ConnectorStateManager] = None, max_concurrent_async_job_count: Optional[int] = None)
580    def __init__(
581        self,
582        limit_pages_fetched_per_slice: Optional[int] = None,
583        limit_slices_fetched: Optional[int] = None,
584        emit_connector_builder_messages: bool = False,
585        disable_retries: bool = False,
586        disable_cache: bool = False,
587        disable_resumable_full_refresh: bool = False,
588        message_repository: Optional[MessageRepository] = None,
589        connector_state_manager: Optional[ConnectorStateManager] = None,
590        max_concurrent_async_job_count: Optional[int] = None,
591    ):
592        self._init_mappings()
593        self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
594        self._limit_slices_fetched = limit_slices_fetched
595        self._emit_connector_builder_messages = emit_connector_builder_messages
596        self._disable_retries = disable_retries
597        self._disable_cache = disable_cache
598        self._disable_resumable_full_refresh = disable_resumable_full_refresh
599        self._message_repository = message_repository or InMemoryMessageRepository(
600            self._evaluate_log_level(emit_connector_builder_messages)
601        )
602        self._connector_state_manager = connector_state_manager or ConnectorStateManager()
603        self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
604        self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1)
605        # placeholder for deprecation warnings
606        self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = []
EPOCH_DATETIME_FORMAT = '%s'
def create_component( self, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], config: Mapping[str, Any], **kwargs: Any) -> Any:
711    def create_component(
712        self,
713        model_type: Type[BaseModel],
714        component_definition: ComponentDefinition,
715        config: Config,
716        **kwargs: Any,
717    ) -> Any:
718        """
719        Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and
720        subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating
721        creating declarative components from that model.
722
723        :param model_type: The type of declarative component that is being initialized
724        :param component_definition: The mapping that represents a declarative component
725        :param config: The connector config that is provided by the customer
726        :return: The declarative component to be used at runtime
727        """
728
729        component_type = component_definition.get("type")
730        if component_definition.get("type") != model_type.__name__:
731            raise ValueError(
732                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
733            )
734
735        declarative_component_model = model_type.parse_obj(component_definition)
736
737        if not isinstance(declarative_component_model, model_type):
738            raise ValueError(
739                f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}"
740            )
741
742        return self._create_component_from_model(
743            model=declarative_component_model, config=config, **kwargs
744        )

Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating creating declarative components from that model.

Parameters
  • model_type: The type of declarative component that is being initialized
  • component_definition: The mapping that represents a declarative component
  • config: The connector config that is provided by the customer
Returns

The declarative component to be used at runtime

def get_model_deprecations(self) -> List[airbyte_cdk.connector_builder.models.LogMessage]:
761    def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]:
762        """
763        Returns the deprecation warnings that were collected during the creation of components.
764        """
765        return self._collected_deprecation_logs

Returns the deprecation warnings that were collected during the creation of components.

@staticmethod
def create_added_field_definition( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.AddedFieldDefinition, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.AddedFieldDefinition:
782    @staticmethod
783    def create_added_field_definition(
784        model: AddedFieldDefinitionModel, config: Config, **kwargs: Any
785    ) -> AddedFieldDefinition:
786        interpolated_value = InterpolatedString.create(
787            model.value, parameters=model.parameters or {}
788        )
789        return AddedFieldDefinition(
790            path=model.path,
791            value=interpolated_value,
792            value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
793            parameters=model.parameters or {},
794        )
def create_add_fields( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.AddFields, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.AddFields:
796    def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields:
797        added_field_definitions = [
798            self._create_component_from_model(
799                model=added_field_definition_model,
800                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
801                    added_field_definition_model.value_type
802                ),
803                config=config,
804            )
805            for added_field_definition_model in model.fields
806        ]
807        return AddFields(
808            fields=added_field_definitions,
809            condition=model.condition or "",
810            parameters=model.parameters or {},
811        )
813    def create_keys_to_lower_transformation(
814        self, model: KeysToLowerModel, config: Config, **kwargs: Any
815    ) -> KeysToLowerTransformation:
816        return KeysToLowerTransformation()
818    def create_keys_to_snake_transformation(
819        self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
820    ) -> KeysToSnakeCaseTransformation:
821        return KeysToSnakeCaseTransformation()
823    def create_keys_replace_transformation(
824        self, model: KeysReplaceModel, config: Config, **kwargs: Any
825    ) -> KeysReplaceTransformation:
826        return KeysReplaceTransformation(
827            old=model.old, new=model.new, parameters=model.parameters or {}
828        )
830    def create_flatten_fields(
831        self, model: FlattenFieldsModel, config: Config, **kwargs: Any
832    ) -> FlattenFields:
833        return FlattenFields(
834            flatten_lists=model.flatten_lists if model.flatten_lists is not None else True
835        )
837    def create_dpath_flatten_fields(
838        self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any
839    ) -> DpathFlattenFields:
840        model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
841        key_transformation = (
842            KeyTransformation(
843                config=config,
844                prefix=model.key_transformation.prefix,
845                suffix=model.key_transformation.suffix,
846                parameters=model.parameters or {},
847            )
848            if model.key_transformation is not None
849            else None
850        )
851        return DpathFlattenFields(
852            config=config,
853            field_path=model_field_path,
854            delete_origin_value=model.delete_origin_value
855            if model.delete_origin_value is not None
856            else False,
857            replace_record=model.replace_record if model.replace_record is not None else False,
858            key_transformation=key_transformation,
859            parameters=model.parameters or {},
860        )
def create_api_key_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ApiKeyAuthenticator, config: Mapping[str, Any], token_provider: Optional[airbyte_cdk.sources.declarative.auth.token_provider.TokenProvider] = None, **kwargs: Any) -> airbyte_cdk.ApiKeyAuthenticator:
874    def create_api_key_authenticator(
875        self,
876        model: ApiKeyAuthenticatorModel,
877        config: Config,
878        token_provider: Optional[TokenProvider] = None,
879        **kwargs: Any,
880    ) -> ApiKeyAuthenticator:
881        if model.inject_into is None and model.header is None:
882            raise ValueError(
883                "Expected either inject_into or header to be set for ApiKeyAuthenticator"
884            )
885
886        if model.inject_into is not None and model.header is not None:
887            raise ValueError(
888                "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option"
889            )
890
891        if token_provider is not None and model.api_token != "":
892            raise ValueError(
893                "If token_provider is set, api_token is ignored and has to be set to empty string."
894            )
895
896        request_option = (
897            self._create_component_from_model(
898                model.inject_into, config, parameters=model.parameters or {}
899            )
900            if model.inject_into
901            else RequestOption(
902                inject_into=RequestOptionType.header,
903                field_name=model.header or "",
904                parameters=model.parameters or {},
905            )
906        )
907
908        return ApiKeyAuthenticator(
909            token_provider=(
910                token_provider
911                if token_provider is not None
912                else InterpolatedStringTokenProvider(
913                    api_token=model.api_token or "",
914                    config=config,
915                    parameters=model.parameters or {},
916                )
917            ),
918            request_option=request_option,
919            config=config,
920            parameters=model.parameters or {},
921        )
923    def create_legacy_to_per_partition_state_migration(
924        self,
925        model: LegacyToPerPartitionStateMigrationModel,
926        config: Mapping[str, Any],
927        declarative_stream: DeclarativeStreamModel,
928    ) -> LegacyToPerPartitionStateMigration:
929        retriever = declarative_stream.retriever
930        if not isinstance(retriever, SimpleRetrieverModel):
931            raise ValueError(
932                f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}"
933            )
934        partition_router = retriever.partition_router
935        if not isinstance(
936            partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel)
937        ):
938            raise ValueError(
939                f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}"
940            )
941        if not hasattr(partition_router, "parent_stream_configs"):
942            raise ValueError(
943                "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration."
944            )
945
946        if not hasattr(declarative_stream, "incremental_sync"):
947            raise ValueError(
948                "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration."
949            )
950
951        return LegacyToPerPartitionStateMigration(
952            partition_router,  # type: ignore # was already checked above
953            declarative_stream.incremental_sync,  # type: ignore # was already checked. Migration can be applied only to incremental streams.
954            config,
955            declarative_stream.parameters,  # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
956        )
def create_session_token_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SessionTokenAuthenticator, config: Mapping[str, Any], name: str, **kwargs: Any) -> Union[airbyte_cdk.ApiKeyAuthenticator, airbyte_cdk.BearerAuthenticator]:
958    def create_session_token_authenticator(
959        self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any
960    ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]:
961        decoder = (
962            self._create_component_from_model(model=model.decoder, config=config)
963            if model.decoder
964            else JsonDecoder(parameters={})
965        )
966        login_requester = self._create_component_from_model(
967            model=model.login_requester,
968            config=config,
969            name=f"{name}_login_requester",
970            decoder=decoder,
971        )
972        token_provider = SessionTokenProvider(
973            login_requester=login_requester,
974            session_token_path=model.session_token_path,
975            expiration_duration=parse_duration(model.expiration_duration)
976            if model.expiration_duration
977            else None,
978            parameters=model.parameters or {},
979            message_repository=self._message_repository,
980            decoder=decoder,
981        )
982        if model.request_authentication.type == "Bearer":
983            return ModelToComponentFactory.create_bearer_authenticator(
984                BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""),  # type: ignore # $parameters has a default value
985                config,
986                token_provider=token_provider,
987            )
988        else:
989            return self.create_api_key_authenticator(
990                ApiKeyAuthenticatorModel(
991                    type="ApiKeyAuthenticator",
992                    api_token="",
993                    inject_into=model.request_authentication.inject_into,
994                ),  # type: ignore # $parameters and headers default to None
995                config=config,
996                token_provider=token_provider,
997            )
@staticmethod
def create_basic_http_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.BasicHttpAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.BasicHttpAuthenticator:
 999    @staticmethod
1000    def create_basic_http_authenticator(
1001        model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any
1002    ) -> BasicHttpAuthenticator:
1003        return BasicHttpAuthenticator(
1004            password=model.password or "",
1005            username=model.username,
1006            config=config,
1007            parameters=model.parameters or {},
1008        )
@staticmethod
def create_bearer_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.BearerAuthenticator, config: Mapping[str, Any], token_provider: Optional[airbyte_cdk.sources.declarative.auth.token_provider.TokenProvider] = None, **kwargs: Any) -> airbyte_cdk.BearerAuthenticator:
1010    @staticmethod
1011    def create_bearer_authenticator(
1012        model: BearerAuthenticatorModel,
1013        config: Config,
1014        token_provider: Optional[TokenProvider] = None,
1015        **kwargs: Any,
1016    ) -> BearerAuthenticator:
1017        if token_provider is not None and model.api_token != "":
1018            raise ValueError(
1019                "If token_provider is set, api_token is ignored and has to be set to empty string."
1020            )
1021        return BearerAuthenticator(
1022            token_provider=(
1023                token_provider
1024                if token_provider is not None
1025                else InterpolatedStringTokenProvider(
1026                    api_token=model.api_token or "",
1027                    config=config,
1028                    parameters=model.parameters or {},
1029                )
1030            ),
1031            config=config,
1032            parameters=model.parameters or {},
1033        )
@staticmethod
def create_dynamic_stream_check_config( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DynamicStreamCheckConfig, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.checks.DynamicStreamCheckConfig:
1035    @staticmethod
1036    def create_dynamic_stream_check_config(
1037        model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any
1038    ) -> DynamicStreamCheckConfig:
1039        return DynamicStreamCheckConfig(
1040            dynamic_stream_name=model.dynamic_stream_name,
1041            stream_count=model.stream_count or 0,
1042        )
def create_check_stream( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CheckStream, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.checks.CheckStream:
1044    def create_check_stream(
1045        self, model: CheckStreamModel, config: Config, **kwargs: Any
1046    ) -> CheckStream:
1047        if model.dynamic_streams_check_configs is None and model.stream_names is None:
1048            raise ValueError(
1049                "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream"
1050            )
1051
1052        dynamic_streams_check_configs = (
1053            [
1054                self._create_component_from_model(model=dynamic_stream_check_config, config=config)
1055                for dynamic_stream_check_config in model.dynamic_streams_check_configs
1056            ]
1057            if model.dynamic_streams_check_configs
1058            else []
1059        )
1060
1061        return CheckStream(
1062            stream_names=model.stream_names or [],
1063            dynamic_streams_check_configs=dynamic_streams_check_configs,
1064            parameters={},
1065        )
@staticmethod
def create_check_dynamic_stream( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CheckDynamicStream, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.checks.CheckDynamicStream:
1067    @staticmethod
1068    def create_check_dynamic_stream(
1069        model: CheckDynamicStreamModel, config: Config, **kwargs: Any
1070    ) -> CheckDynamicStream:
1071        assert model.use_check_availability is not None  # for mypy
1072
1073        use_check_availability = model.use_check_availability
1074
1075        return CheckDynamicStream(
1076            stream_count=model.stream_count,
1077            use_check_availability=use_check_availability,
1078            parameters={},
1079        )
1081    def create_composite_error_handler(
1082        self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
1083    ) -> CompositeErrorHandler:
1084        error_handlers = [
1085            self._create_component_from_model(model=error_handler_model, config=config)
1086            for error_handler_model in model.error_handlers
1087        ]
1088        return CompositeErrorHandler(
1089            error_handlers=error_handlers, parameters=model.parameters or {}
1090        )
@staticmethod
def create_concurrency_level( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ConcurrencyLevel, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.concurrency_level.ConcurrencyLevel:
1092    @staticmethod
1093    def create_concurrency_level(
1094        model: ConcurrencyLevelModel, config: Config, **kwargs: Any
1095    ) -> ConcurrencyLevel:
1096        return ConcurrencyLevel(
1097            default_concurrency=model.default_concurrency,
1098            max_concurrency=model.max_concurrency,
1099            config=config,
1100            parameters={},
1101        )
@staticmethod
def apply_stream_state_migrations( stream_state_migrations: Optional[List[Any]], stream_state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
1103    @staticmethod
1104    def apply_stream_state_migrations(
1105        stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any]
1106    ) -> MutableMapping[str, Any]:
1107        if stream_state_migrations:
1108            for state_migration in stream_state_migrations:
1109                if state_migration.should_migrate(stream_state):
1110                    # The state variable is expected to be mutable but the migrate method returns an immutable mapping.
1111                    stream_state = dict(state_migration.migrate(stream_state))
1112        return stream_state
def create_concurrent_cursor_from_datetime_based_cursor( self, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], stream_name: str, stream_namespace: Optional[str], config: Mapping[str, Any], message_repository: Optional[airbyte_cdk.MessageRepository] = None, runtime_lookback_window: Optional[datetime.timedelta] = None, stream_state_migrations: Optional[List[Any]] = None, **kwargs: Any) -> airbyte_cdk.ConcurrentCursor:
1114    def create_concurrent_cursor_from_datetime_based_cursor(
1115        self,
1116        model_type: Type[BaseModel],
1117        component_definition: ComponentDefinition,
1118        stream_name: str,
1119        stream_namespace: Optional[str],
1120        config: Config,
1121        message_repository: Optional[MessageRepository] = None,
1122        runtime_lookback_window: Optional[datetime.timedelta] = None,
1123        stream_state_migrations: Optional[List[Any]] = None,
1124        **kwargs: Any,
1125    ) -> ConcurrentCursor:
1126        # Per-partition incremental streams can dynamically create child cursors which will pass their current
1127        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1128        # incoming state and connector_state_manager that is initialized when the component factory is created
1129        stream_state = (
1130            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1131            if "stream_state" not in kwargs
1132            else kwargs["stream_state"]
1133        )
1134        stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1135
1136        component_type = component_definition.get("type")
1137        if component_definition.get("type") != model_type.__name__:
1138            raise ValueError(
1139                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1140            )
1141
1142        datetime_based_cursor_model = model_type.parse_obj(component_definition)
1143
1144        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1145            raise ValueError(
1146                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1147            )
1148
1149        interpolated_cursor_field = InterpolatedString.create(
1150            datetime_based_cursor_model.cursor_field,
1151            parameters=datetime_based_cursor_model.parameters or {},
1152        )
1153        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1154
1155        interpolated_partition_field_start = InterpolatedString.create(
1156            datetime_based_cursor_model.partition_field_start or "start_time",
1157            parameters=datetime_based_cursor_model.parameters or {},
1158        )
1159        interpolated_partition_field_end = InterpolatedString.create(
1160            datetime_based_cursor_model.partition_field_end or "end_time",
1161            parameters=datetime_based_cursor_model.parameters or {},
1162        )
1163
1164        slice_boundary_fields = (
1165            interpolated_partition_field_start.eval(config=config),
1166            interpolated_partition_field_end.eval(config=config),
1167        )
1168
1169        datetime_format = datetime_based_cursor_model.datetime_format
1170
1171        cursor_granularity = (
1172            parse_duration(datetime_based_cursor_model.cursor_granularity)
1173            if datetime_based_cursor_model.cursor_granularity
1174            else None
1175        )
1176
1177        lookback_window = None
1178        interpolated_lookback_window = (
1179            InterpolatedString.create(
1180                datetime_based_cursor_model.lookback_window,
1181                parameters=datetime_based_cursor_model.parameters or {},
1182            )
1183            if datetime_based_cursor_model.lookback_window
1184            else None
1185        )
1186        if interpolated_lookback_window:
1187            evaluated_lookback_window = interpolated_lookback_window.eval(config=config)
1188            if evaluated_lookback_window:
1189                lookback_window = parse_duration(evaluated_lookback_window)
1190
1191        connector_state_converter: DateTimeStreamStateConverter
1192        connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1193            datetime_format=datetime_format,
1194            input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1195            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1196            cursor_granularity=cursor_granularity,
1197        )
1198
1199        # Adjusts the stream state by applying the runtime lookback window.
1200        # This is used to ensure correct state handling in case of failed partitions.
1201        stream_state_value = stream_state.get(cursor_field.cursor_field_key)
1202        if runtime_lookback_window and stream_state_value:
1203            new_stream_state = (
1204                connector_state_converter.parse_timestamp(stream_state_value)
1205                - runtime_lookback_window
1206            )
1207            stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
1208                new_stream_state
1209            )
1210
1211        start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
1212        if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
1213            start_date_runtime_value = self.create_min_max_datetime(
1214                model=datetime_based_cursor_model.start_datetime, config=config
1215            )
1216        else:
1217            start_date_runtime_value = datetime_based_cursor_model.start_datetime
1218
1219        end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]]
1220        if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel):
1221            end_date_runtime_value = self.create_min_max_datetime(
1222                model=datetime_based_cursor_model.end_datetime, config=config
1223            )
1224        else:
1225            end_date_runtime_value = datetime_based_cursor_model.end_datetime
1226
1227        interpolated_start_date = MinMaxDatetime.create(
1228            interpolated_string_or_min_max_datetime=start_date_runtime_value,
1229            parameters=datetime_based_cursor_model.parameters,
1230        )
1231        interpolated_end_date = (
1232            None
1233            if not end_date_runtime_value
1234            else MinMaxDatetime.create(
1235                end_date_runtime_value, datetime_based_cursor_model.parameters
1236            )
1237        )
1238
1239        # If datetime format is not specified then start/end datetime should inherit it from the stream slicer
1240        if not interpolated_start_date.datetime_format:
1241            interpolated_start_date.datetime_format = datetime_format
1242        if interpolated_end_date and not interpolated_end_date.datetime_format:
1243            interpolated_end_date.datetime_format = datetime_format
1244
1245        start_date = interpolated_start_date.get_datetime(config=config)
1246        end_date_provider = (
1247            partial(interpolated_end_date.get_datetime, config)
1248            if interpolated_end_date
1249            else connector_state_converter.get_end_provider()
1250        )
1251
1252        if (
1253            datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity
1254        ) or (
1255            not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity
1256        ):
1257            raise ValueError(
1258                f"If step is defined, cursor_granularity should be as well and vice-versa. "
1259                f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`"
1260            )
1261
1262        # When step is not defined, default to a step size from the starting date to the present moment
1263        step_length = datetime.timedelta.max
1264        interpolated_step = (
1265            InterpolatedString.create(
1266                datetime_based_cursor_model.step,
1267                parameters=datetime_based_cursor_model.parameters or {},
1268            )
1269            if datetime_based_cursor_model.step
1270            else None
1271        )
1272        if interpolated_step:
1273            evaluated_step = interpolated_step.eval(config)
1274            if evaluated_step:
1275                step_length = parse_duration(evaluated_step)
1276
1277        clamping_strategy: ClampingStrategy = NoClamping()
1278        if datetime_based_cursor_model.clamping:
1279            # While it is undesirable to interpolate within the model factory (as opposed to at runtime),
1280            # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
1281            # object which we want to keep agnostic of being low-code
1282            target = InterpolatedString(
1283                string=datetime_based_cursor_model.clamping.target,
1284                parameters=datetime_based_cursor_model.parameters or {},
1285            )
1286            evaluated_target = target.eval(config=config)
1287            match evaluated_target:
1288                case "DAY":
1289                    clamping_strategy = DayClampingStrategy()
1290                    end_date_provider = ClampingEndProvider(
1291                        DayClampingStrategy(is_ceiling=False),
1292                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1293                        granularity=cursor_granularity or datetime.timedelta(seconds=1),
1294                    )
1295                case "WEEK":
1296                    if (
1297                        not datetime_based_cursor_model.clamping.target_details
1298                        or "weekday" not in datetime_based_cursor_model.clamping.target_details
1299                    ):
1300                        raise ValueError(
1301                            "Given WEEK clamping, weekday needs to be provided as target_details"
1302                        )
1303                    weekday = self._assemble_weekday(
1304                        datetime_based_cursor_model.clamping.target_details["weekday"]
1305                    )
1306                    clamping_strategy = WeekClampingStrategy(weekday)
1307                    end_date_provider = ClampingEndProvider(
1308                        WeekClampingStrategy(weekday, is_ceiling=False),
1309                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1310                        granularity=cursor_granularity or datetime.timedelta(days=1),
1311                    )
1312                case "MONTH":
1313                    clamping_strategy = MonthClampingStrategy()
1314                    end_date_provider = ClampingEndProvider(
1315                        MonthClampingStrategy(is_ceiling=False),
1316                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1317                        granularity=cursor_granularity or datetime.timedelta(days=1),
1318                    )
1319                case _:
1320                    raise ValueError(
1321                        f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
1322                    )
1323
1324        return ConcurrentCursor(
1325            stream_name=stream_name,
1326            stream_namespace=stream_namespace,
1327            stream_state=stream_state,
1328            message_repository=message_repository or self._message_repository,
1329            connector_state_manager=self._connector_state_manager,
1330            connector_state_converter=connector_state_converter,
1331            cursor_field=cursor_field,
1332            slice_boundary_fields=slice_boundary_fields,
1333            start=start_date,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1334            end_provider=end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1335            lookback_window=lookback_window,
1336            slice_range=step_length,
1337            cursor_granularity=cursor_granularity,
1338            clamping_strategy=clamping_strategy,
1339        )
def create_concurrent_cursor_from_incrementing_count_cursor( self, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], stream_name: str, stream_namespace: Optional[str], config: Mapping[str, Any], message_repository: Optional[airbyte_cdk.MessageRepository] = None, **kwargs: Any) -> airbyte_cdk.ConcurrentCursor:
1341    def create_concurrent_cursor_from_incrementing_count_cursor(
1342        self,
1343        model_type: Type[BaseModel],
1344        component_definition: ComponentDefinition,
1345        stream_name: str,
1346        stream_namespace: Optional[str],
1347        config: Config,
1348        message_repository: Optional[MessageRepository] = None,
1349        **kwargs: Any,
1350    ) -> ConcurrentCursor:
1351        # Per-partition incremental streams can dynamically create child cursors which will pass their current
1352        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1353        # incoming state and connector_state_manager that is initialized when the component factory is created
1354        stream_state = (
1355            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1356            if "stream_state" not in kwargs
1357            else kwargs["stream_state"]
1358        )
1359
1360        component_type = component_definition.get("type")
1361        if component_definition.get("type") != model_type.__name__:
1362            raise ValueError(
1363                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1364            )
1365
1366        incrementing_count_cursor_model = model_type.parse_obj(component_definition)
1367
1368        if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
1369            raise ValueError(
1370                f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
1371            )
1372
1373        interpolated_start_value = (
1374            InterpolatedString.create(
1375                incrementing_count_cursor_model.start_value,  # type: ignore
1376                parameters=incrementing_count_cursor_model.parameters or {},
1377            )
1378            if incrementing_count_cursor_model.start_value
1379            else 0
1380        )
1381
1382        interpolated_cursor_field = InterpolatedString.create(
1383            incrementing_count_cursor_model.cursor_field,
1384            parameters=incrementing_count_cursor_model.parameters or {},
1385        )
1386        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1387
1388        connector_state_converter = IncrementingCountStreamStateConverter(
1389            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1390        )
1391
1392        return ConcurrentCursor(
1393            stream_name=stream_name,
1394            stream_namespace=stream_namespace,
1395            stream_state=stream_state,
1396            message_repository=message_repository or self._message_repository,
1397            connector_state_manager=self._connector_state_manager,
1398            connector_state_converter=connector_state_converter,
1399            cursor_field=cursor_field,
1400            slice_boundary_fields=None,
1401            start=interpolated_start_value,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1402            end_provider=connector_state_converter.get_end_provider(),  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1403        )
def create_concurrent_cursor_from_perpartition_cursor( self, state_manager: airbyte_cdk.ConnectorStateManager, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], stream_name: str, stream_namespace: Optional[str], config: Mapping[str, Any], stream_state: MutableMapping[str, Any], partition_router: airbyte_cdk.sources.declarative.partition_routers.PartitionRouter, stream_state_migrations: Optional[List[Any]] = None, **kwargs: Any) -> airbyte_cdk.sources.declarative.incremental.ConcurrentPerPartitionCursor:
1424    def create_concurrent_cursor_from_perpartition_cursor(
1425        self,
1426        state_manager: ConnectorStateManager,
1427        model_type: Type[BaseModel],
1428        component_definition: ComponentDefinition,
1429        stream_name: str,
1430        stream_namespace: Optional[str],
1431        config: Config,
1432        stream_state: MutableMapping[str, Any],
1433        partition_router: PartitionRouter,
1434        stream_state_migrations: Optional[List[Any]] = None,
1435        **kwargs: Any,
1436    ) -> ConcurrentPerPartitionCursor:
1437        component_type = component_definition.get("type")
1438        if component_definition.get("type") != model_type.__name__:
1439            raise ValueError(
1440                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1441            )
1442
1443        datetime_based_cursor_model = model_type.parse_obj(component_definition)
1444
1445        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1446            raise ValueError(
1447                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1448            )
1449
1450        interpolated_cursor_field = InterpolatedString.create(
1451            datetime_based_cursor_model.cursor_field,
1452            parameters=datetime_based_cursor_model.parameters or {},
1453        )
1454        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1455
1456        datetime_format = datetime_based_cursor_model.datetime_format
1457
1458        cursor_granularity = (
1459            parse_duration(datetime_based_cursor_model.cursor_granularity)
1460            if datetime_based_cursor_model.cursor_granularity
1461            else None
1462        )
1463
1464        connector_state_converter: DateTimeStreamStateConverter
1465        connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1466            datetime_format=datetime_format,
1467            input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1468            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1469            cursor_granularity=cursor_granularity,
1470        )
1471
1472        # Create the cursor factory
1473        cursor_factory = ConcurrentCursorFactory(
1474            partial(
1475                self.create_concurrent_cursor_from_datetime_based_cursor,
1476                state_manager=state_manager,
1477                model_type=model_type,
1478                component_definition=component_definition,
1479                stream_name=stream_name,
1480                stream_namespace=stream_namespace,
1481                config=config,
1482                message_repository=NoopMessageRepository(),
1483                stream_state_migrations=stream_state_migrations,
1484            )
1485        )
1486        stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1487        # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
1488        use_global_cursor = isinstance(
1489            partition_router, GroupingPartitionRouter
1490        ) or component_definition.get("global_substream_cursor", False)
1491
1492        # Return the concurrent cursor and state converter
1493        return ConcurrentPerPartitionCursor(
1494            cursor_factory=cursor_factory,
1495            partition_router=partition_router,
1496            stream_name=stream_name,
1497            stream_namespace=stream_namespace,
1498            stream_state=stream_state,
1499            message_repository=self._message_repository,  # type: ignore
1500            connector_state_manager=state_manager,
1501            connector_state_converter=connector_state_converter,
1502            cursor_field=cursor_field,
1503            use_global_cursor=use_global_cursor,
1504        )
1506    @staticmethod
1507    def create_constant_backoff_strategy(
1508        model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
1509    ) -> ConstantBackoffStrategy:
1510        return ConstantBackoffStrategy(
1511            backoff_time_in_seconds=model.backoff_time_in_seconds,
1512            config=config,
1513            parameters=model.parameters or {},
1514        )
def create_cursor_pagination( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CursorPagination, config: Mapping[str, Any], decoder: airbyte_cdk.Decoder, **kwargs: Any) -> airbyte_cdk.CursorPaginationStrategy:
1516    def create_cursor_pagination(
1517        self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
1518    ) -> CursorPaginationStrategy:
1519        if isinstance(decoder, PaginationDecoderDecorator):
1520            inner_decoder = decoder.decoder
1521        else:
1522            inner_decoder = decoder
1523            decoder = PaginationDecoderDecorator(decoder=decoder)
1524
1525        if self._is_supported_decoder_for_pagination(inner_decoder):
1526            decoder_to_use = decoder
1527        else:
1528            raise ValueError(
1529                self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1530            )
1531
1532        return CursorPaginationStrategy(
1533            cursor_value=model.cursor_value,
1534            decoder=decoder_to_use,
1535            page_size=model.page_size,
1536            stop_condition=model.stop_condition,
1537            config=config,
1538            parameters=model.parameters or {},
1539        )
def create_custom_component(self, model: Any, config: Mapping[str, Any], **kwargs: Any) -> Any:
1541    def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any:
1542        """
1543        Generically creates a custom component based on the model type and a class_name reference to the custom Python class being
1544        instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
1545        :param model: The Pydantic model of the custom component being created
1546        :param config: The custom defined connector config
1547        :return: The declarative component built from the Pydantic model to be used at runtime
1548        """
1549        custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
1550        component_fields = get_type_hints(custom_component_class)
1551        model_args = model.dict()
1552        model_args["config"] = config
1553
1554        # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions
1555        # we defer to these arguments over the component's definition
1556        for key, arg in kwargs.items():
1557            model_args[key] = arg
1558
1559        # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not
1560        # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to
1561        # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components
1562        for model_field, model_value in model_args.items():
1563            # If a custom component field doesn't have a type set, we try to use the type hints to infer the type
1564            if (
1565                isinstance(model_value, dict)
1566                and "type" not in model_value
1567                and model_field in component_fields
1568            ):
1569                derived_type = self._derive_component_type_from_type_hints(
1570                    component_fields.get(model_field)
1571                )
1572                if derived_type:
1573                    model_value["type"] = derived_type
1574
1575            if self._is_component(model_value):
1576                model_args[model_field] = self._create_nested_component(
1577                    model, model_field, model_value, config
1578                )
1579            elif isinstance(model_value, list):
1580                vals = []
1581                for v in model_value:
1582                    if isinstance(v, dict) and "type" not in v and model_field in component_fields:
1583                        derived_type = self._derive_component_type_from_type_hints(
1584                            component_fields.get(model_field)
1585                        )
1586                        if derived_type:
1587                            v["type"] = derived_type
1588                    if self._is_component(v):
1589                        vals.append(self._create_nested_component(model, model_field, v, config))
1590                    else:
1591                        vals.append(v)
1592                model_args[model_field] = vals
1593
1594        kwargs = {
1595            class_field: model_args[class_field]
1596            for class_field in component_fields.keys()
1597            if class_field in model_args
1598        }
1599        return custom_component_class(**kwargs)

Generically creates a custom component based on the model type and a class_name reference to the custom Python class being instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor

Parameters
  • model: The Pydantic model of the custom component being created
  • config: The custom defined connector config
Returns

The declarative component built from the Pydantic model to be used at runtime

@staticmethod
def is_builtin_type(cls: Optional[Type[Any]]) -> bool:
1664    @staticmethod
1665    def is_builtin_type(cls: Optional[Type[Any]]) -> bool:
1666        if not cls:
1667            return False
1668        return cls.__module__ == "builtins"
def create_datetime_based_cursor( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DatetimeBasedCursor, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DatetimeBasedCursor:
1731    def create_datetime_based_cursor(
1732        self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any
1733    ) -> DatetimeBasedCursor:
1734        start_datetime: Union[str, MinMaxDatetime] = (
1735            model.start_datetime
1736            if isinstance(model.start_datetime, str)
1737            else self.create_min_max_datetime(model.start_datetime, config)
1738        )
1739        end_datetime: Union[str, MinMaxDatetime, None] = None
1740        if model.is_data_feed and model.end_datetime:
1741            raise ValueError("Data feed does not support end_datetime")
1742        if model.is_data_feed and model.is_client_side_incremental:
1743            raise ValueError(
1744                "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them."
1745            )
1746        if model.end_datetime:
1747            end_datetime = (
1748                model.end_datetime
1749                if isinstance(model.end_datetime, str)
1750                else self.create_min_max_datetime(model.end_datetime, config)
1751            )
1752
1753        end_time_option = (
1754            self._create_component_from_model(
1755                model.end_time_option, config, parameters=model.parameters or {}
1756            )
1757            if model.end_time_option
1758            else None
1759        )
1760        start_time_option = (
1761            self._create_component_from_model(
1762                model.start_time_option, config, parameters=model.parameters or {}
1763            )
1764            if model.start_time_option
1765            else None
1766        )
1767
1768        return DatetimeBasedCursor(
1769            cursor_field=model.cursor_field,
1770            cursor_datetime_formats=model.cursor_datetime_formats
1771            if model.cursor_datetime_formats
1772            else [],
1773            cursor_granularity=model.cursor_granularity,
1774            datetime_format=model.datetime_format,
1775            end_datetime=end_datetime,
1776            start_datetime=start_datetime,
1777            step=model.step,
1778            end_time_option=end_time_option,
1779            lookback_window=model.lookback_window,
1780            start_time_option=start_time_option,
1781            partition_field_end=model.partition_field_end,
1782            partition_field_start=model.partition_field_start,
1783            message_repository=self._message_repository,
1784            is_compare_strictly=model.is_compare_strictly,
1785            config=config,
1786            parameters=model.parameters or {},
1787        )
def create_declarative_stream( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DeclarativeStream, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DeclarativeStream:
1789    def create_declarative_stream(
1790        self, model: DeclarativeStreamModel, config: Config, **kwargs: Any
1791    ) -> DeclarativeStream:
1792        # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
1793        # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
1794        # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
1795        # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one.
1796        combined_slicers = self._merge_stream_slicers(model=model, config=config)
1797
1798        primary_key = model.primary_key.__root__ if model.primary_key else None
1799        stop_condition_on_cursor = (
1800            model.incremental_sync
1801            and hasattr(model.incremental_sync, "is_data_feed")
1802            and model.incremental_sync.is_data_feed
1803        )
1804        client_side_incremental_sync = None
1805        if (
1806            model.incremental_sync
1807            and hasattr(model.incremental_sync, "is_client_side_incremental")
1808            and model.incremental_sync.is_client_side_incremental
1809        ):
1810            supported_slicers = (
1811                DatetimeBasedCursor,
1812                GlobalSubstreamCursor,
1813                PerPartitionWithGlobalCursor,
1814            )
1815            if combined_slicers and not isinstance(combined_slicers, supported_slicers):
1816                raise ValueError(
1817                    "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1818                )
1819            cursor = (
1820                combined_slicers
1821                if isinstance(
1822                    combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1823                )
1824                else self._create_component_from_model(model=model.incremental_sync, config=config)
1825            )
1826
1827            client_side_incremental_sync = {"cursor": cursor}
1828
1829        if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1830            cursor_model = model.incremental_sync
1831
1832            end_time_option = (
1833                self._create_component_from_model(
1834                    cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
1835                )
1836                if cursor_model.end_time_option
1837                else None
1838            )
1839            start_time_option = (
1840                self._create_component_from_model(
1841                    cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
1842                )
1843                if cursor_model.start_time_option
1844                else None
1845            )
1846
1847            request_options_provider = DatetimeBasedRequestOptionsProvider(
1848                start_time_option=start_time_option,
1849                end_time_option=end_time_option,
1850                partition_field_start=cursor_model.partition_field_end,
1851                partition_field_end=cursor_model.partition_field_end,
1852                config=config,
1853                parameters=model.parameters or {},
1854            )
1855        elif model.incremental_sync and isinstance(
1856            model.incremental_sync, IncrementingCountCursorModel
1857        ):
1858            cursor_model: IncrementingCountCursorModel = model.incremental_sync  # type: ignore
1859
1860            start_time_option = (
1861                self._create_component_from_model(
1862                    cursor_model.start_value_option,  # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1863                    config,
1864                    parameters=cursor_model.parameters or {},
1865                )
1866                if cursor_model.start_value_option  # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1867                else None
1868            )
1869
1870            # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
1871            # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
1872            partition_field_start = "start"
1873
1874            request_options_provider = DatetimeBasedRequestOptionsProvider(
1875                start_time_option=start_time_option,
1876                partition_field_start=partition_field_start,
1877                config=config,
1878                parameters=model.parameters or {},
1879            )
1880        else:
1881            request_options_provider = None
1882
1883        transformations = []
1884        if model.transformations:
1885            for transformation_model in model.transformations:
1886                transformations.append(
1887                    self._create_component_from_model(model=transformation_model, config=config)
1888                )
1889        file_uploader = None
1890        if model.file_uploader:
1891            file_uploader = self._create_component_from_model(
1892                model=model.file_uploader, config=config
1893            )
1894
1895        retriever = self._create_component_from_model(
1896            model=model.retriever,
1897            config=config,
1898            name=model.name,
1899            primary_key=primary_key,
1900            stream_slicer=combined_slicers,
1901            request_options_provider=request_options_provider,
1902            stop_condition_on_cursor=stop_condition_on_cursor,
1903            client_side_incremental_sync=client_side_incremental_sync,
1904            transformations=transformations,
1905            file_uploader=file_uploader,
1906            incremental_sync=model.incremental_sync,
1907        )
1908        cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
1909
1910        if model.state_migrations:
1911            state_transformations = [
1912                self._create_component_from_model(state_migration, config, declarative_stream=model)
1913                for state_migration in model.state_migrations
1914            ]
1915        else:
1916            state_transformations = []
1917
1918        schema_loader: Union[
1919            CompositeSchemaLoader,
1920            DefaultSchemaLoader,
1921            DynamicSchemaLoader,
1922            InlineSchemaLoader,
1923            JsonFileSchemaLoader,
1924        ]
1925        if model.schema_loader and isinstance(model.schema_loader, list):
1926            nested_schema_loaders = [
1927                self._create_component_from_model(model=nested_schema_loader, config=config)
1928                for nested_schema_loader in model.schema_loader
1929            ]
1930            schema_loader = CompositeSchemaLoader(
1931                schema_loaders=nested_schema_loaders, parameters={}
1932            )
1933        elif model.schema_loader:
1934            schema_loader = self._create_component_from_model(
1935                model=model.schema_loader,  # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models
1936                config=config,
1937            )
1938        else:
1939            options = model.parameters or {}
1940            if "name" not in options:
1941                options["name"] = model.name
1942            schema_loader = DefaultSchemaLoader(config=config, parameters=options)
1943
1944        return DeclarativeStream(
1945            name=model.name or "",
1946            primary_key=primary_key,
1947            retriever=retriever,
1948            schema_loader=schema_loader,
1949            stream_cursor_field=cursor_field or "",
1950            state_migrations=state_transformations,
1951            config=config,
1952            parameters=model.parameters or {},
1953        )
2105    def create_default_error_handler(
2106        self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
2107    ) -> DefaultErrorHandler:
2108        backoff_strategies = []
2109        if model.backoff_strategies:
2110            for backoff_strategy_model in model.backoff_strategies:
2111                backoff_strategies.append(
2112                    self._create_component_from_model(model=backoff_strategy_model, config=config)
2113                )
2114
2115        response_filters = []
2116        if model.response_filters:
2117            for response_filter_model in model.response_filters:
2118                response_filters.append(
2119                    self._create_component_from_model(model=response_filter_model, config=config)
2120                )
2121        response_filters.append(
2122            HttpResponseFilter(config=config, parameters=model.parameters or {})
2123        )
2124
2125        return DefaultErrorHandler(
2126            backoff_strategies=backoff_strategies,
2127            max_retries=model.max_retries,
2128            response_filters=response_filters,
2129            config=config,
2130            parameters=model.parameters or {},
2131        )
2133    def create_default_paginator(
2134        self,
2135        model: DefaultPaginatorModel,
2136        config: Config,
2137        *,
2138        url_base: str,
2139        extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None,
2140        decoder: Optional[Decoder] = None,
2141        cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
2142    ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
2143        if decoder:
2144            if self._is_supported_decoder_for_pagination(decoder):
2145                decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
2146            else:
2147                raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder)))
2148        else:
2149            decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
2150        page_size_option = (
2151            self._create_component_from_model(model=model.page_size_option, config=config)
2152            if model.page_size_option
2153            else None
2154        )
2155        page_token_option = (
2156            self._create_component_from_model(model=model.page_token_option, config=config)
2157            if model.page_token_option
2158            else None
2159        )
2160        pagination_strategy = self._create_component_from_model(
2161            model=model.pagination_strategy,
2162            config=config,
2163            decoder=decoder_to_use,
2164            extractor_model=extractor_model,
2165        )
2166        if cursor_used_for_stop_condition:
2167            pagination_strategy = StopConditionPaginationStrategyDecorator(
2168                pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition)
2169            )
2170        paginator = DefaultPaginator(
2171            decoder=decoder_to_use,
2172            page_size_option=page_size_option,
2173            page_token_option=page_token_option,
2174            pagination_strategy=pagination_strategy,
2175            url_base=url_base,
2176            config=config,
2177            parameters=model.parameters or {},
2178        )
2179        if self._limit_pages_fetched_per_slice:
2180            return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice)
2181        return paginator
def create_dpath_extractor( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DpathExtractor, config: Mapping[str, Any], decoder: Optional[airbyte_cdk.Decoder] = None, **kwargs: Any) -> airbyte_cdk.DpathExtractor:
2183    def create_dpath_extractor(
2184        self,
2185        model: DpathExtractorModel,
2186        config: Config,
2187        decoder: Optional[Decoder] = None,
2188        **kwargs: Any,
2189    ) -> DpathExtractor:
2190        if decoder:
2191            decoder_to_use = decoder
2192        else:
2193            decoder_to_use = JsonDecoder(parameters={})
2194        model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
2195        return DpathExtractor(
2196            decoder=decoder_to_use,
2197            field_path=model_field_path,
2198            config=config,
2199            parameters=model.parameters or {},
2200        )
2202    @staticmethod
2203    def create_response_to_file_extractor(
2204        model: ResponseToFileExtractorModel,
2205        **kwargs: Any,
2206    ) -> ResponseToFileExtractor:
2207        return ResponseToFileExtractor(parameters=model.parameters or {})
2209    @staticmethod
2210    def create_exponential_backoff_strategy(
2211        model: ExponentialBackoffStrategyModel, config: Config
2212    ) -> ExponentialBackoffStrategy:
2213        return ExponentialBackoffStrategy(
2214            factor=model.factor or 5, parameters=model.parameters or {}, config=config
2215        )
2217    @staticmethod
2218    def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey:
2219        return GroupByKey(model.key, config=config, parameters=model.parameters or {})
def create_http_requester( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpRequester, config: Mapping[str, Any], decoder: airbyte_cdk.Decoder = JsonDecoder(), query_properties_key: Optional[str] = None, use_cache: Optional[bool] = None, *, name: str) -> airbyte_cdk.HttpRequester:
2221    def create_http_requester(
2222        self,
2223        model: HttpRequesterModel,
2224        config: Config,
2225        decoder: Decoder = JsonDecoder(parameters={}),
2226        query_properties_key: Optional[str] = None,
2227        use_cache: Optional[bool] = None,
2228        *,
2229        name: str,
2230    ) -> HttpRequester:
2231        authenticator = (
2232            self._create_component_from_model(
2233                model=model.authenticator,
2234                config=config,
2235                url_base=model.url or model.url_base,
2236                name=name,
2237                decoder=decoder,
2238            )
2239            if model.authenticator
2240            else None
2241        )
2242        error_handler = (
2243            self._create_component_from_model(model=model.error_handler, config=config)
2244            if model.error_handler
2245            else DefaultErrorHandler(
2246                backoff_strategies=[],
2247                response_filters=[],
2248                config=config,
2249                parameters=model.parameters or {},
2250            )
2251        )
2252
2253        api_budget = self._api_budget
2254
2255        # Removes QueryProperties components from the interpolated mappings because it has been designed
2256        # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly
2257        # instead of through jinja interpolation
2258        request_parameters: Optional[Union[str, Mapping[str, str]]]
2259        if isinstance(model.request_parameters, Mapping):
2260            request_parameters = self._remove_query_properties(model.request_parameters)
2261        else:
2262            request_parameters = model.request_parameters
2263
2264        request_options_provider = InterpolatedRequestOptionsProvider(
2265            request_body=model.request_body,
2266            request_body_data=model.request_body_data,
2267            request_body_json=model.request_body_json,
2268            request_headers=model.request_headers,
2269            request_parameters=request_parameters,
2270            query_properties_key=query_properties_key,
2271            config=config,
2272            parameters=model.parameters or {},
2273        )
2274
2275        assert model.use_cache is not None  # for mypy
2276        assert model.http_method is not None  # for mypy
2277
2278        should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache
2279
2280        return HttpRequester(
2281            name=name,
2282            url=model.url,
2283            url_base=model.url_base,
2284            path=model.path,
2285            authenticator=authenticator,
2286            error_handler=error_handler,
2287            api_budget=api_budget,
2288            http_method=HttpMethod[model.http_method.value],
2289            request_options_provider=request_options_provider,
2290            config=config,
2291            disable_retries=self._disable_retries,
2292            parameters=model.parameters or {},
2293            message_repository=self._message_repository,
2294            use_cache=should_use_cache,
2295            decoder=decoder,
2296            stream_response=decoder.is_stream_response() if decoder else False,
2297        )
@staticmethod
def create_http_response_filter( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpResponseFilter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.HttpResponseFilter:
2299    @staticmethod
2300    def create_http_response_filter(
2301        model: HttpResponseFilterModel, config: Config, **kwargs: Any
2302    ) -> HttpResponseFilter:
2303        if model.action:
2304            action = ResponseAction(model.action.value)
2305        else:
2306            action = None
2307
2308        failure_type = FailureType(model.failure_type.value) if model.failure_type else None
2309
2310        http_codes = (
2311            set(model.http_codes) if model.http_codes else set()
2312        )  # JSON schema notation has no set data type. The schema enforces an array of unique elements
2313
2314        return HttpResponseFilter(
2315            action=action,
2316            failure_type=failure_type,
2317            error_message=model.error_message or "",
2318            error_message_contains=model.error_message_contains or "",
2319            http_codes=http_codes,
2320            predicate=model.predicate or "",
2321            config=config,
2322            parameters=model.parameters or {},
2323        )
@staticmethod
def create_inline_schema_loader( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.InlineSchemaLoader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.InlineSchemaLoader:
2325    @staticmethod
2326    def create_inline_schema_loader(
2327        model: InlineSchemaLoaderModel, config: Config, **kwargs: Any
2328    ) -> InlineSchemaLoader:
2329        return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
def create_complex_field_type( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ComplexFieldType, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.ComplexFieldType:
2331    def create_complex_field_type(
2332        self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
2333    ) -> ComplexFieldType:
2334        items = (
2335            self._create_component_from_model(model=model.items, config=config)
2336            if isinstance(model.items, ComplexFieldTypeModel)
2337            else model.items
2338        )
2339
2340        return ComplexFieldType(field_type=model.field_type, items=items)
def create_types_map( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.TypesMap, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.TypesMap:
2342    def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
2343        target_type = (
2344            self._create_component_from_model(model=model.target_type, config=config)
2345            if isinstance(model.target_type, ComplexFieldTypeModel)
2346            else model.target_type
2347        )
2348
2349        return TypesMap(
2350            target_type=target_type,
2351            current_type=model.current_type,
2352            condition=model.condition if model.condition is not None else "True",
2353        )
def create_schema_type_identifier( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SchemaTypeIdentifier, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.SchemaTypeIdentifier:
2355    def create_schema_type_identifier(
2356        self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
2357    ) -> SchemaTypeIdentifier:
2358        types_mapping = []
2359        if model.types_mapping:
2360            types_mapping.extend(
2361                [
2362                    self._create_component_from_model(types_map, config=config)
2363                    for types_map in model.types_mapping
2364                ]
2365            )
2366        model_schema_pointer: List[Union[InterpolatedString, str]] = (
2367            [x for x in model.schema_pointer] if model.schema_pointer else []
2368        )
2369        model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
2370        model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
2371            [x for x in model.type_pointer] if model.type_pointer else None
2372        )
2373
2374        return SchemaTypeIdentifier(
2375            schema_pointer=model_schema_pointer,
2376            key_pointer=model_key_pointer,
2377            type_pointer=model_type_pointer,
2378            types_mapping=types_mapping,
2379            parameters=model.parameters or {},
2380        )
def create_dynamic_schema_loader( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DynamicSchemaLoader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.DynamicSchemaLoader:
2382    def create_dynamic_schema_loader(
2383        self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
2384    ) -> DynamicSchemaLoader:
2385        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
2386        combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
2387
2388        schema_transformations = []
2389        if model.schema_transformations:
2390            for transformation_model in model.schema_transformations:
2391                schema_transformations.append(
2392                    self._create_component_from_model(model=transformation_model, config=config)
2393                )
2394
2395        retriever = self._create_component_from_model(
2396            model=model.retriever,
2397            config=config,
2398            name="dynamic_properties",
2399            primary_key=None,
2400            stream_slicer=combined_slicers,
2401            transformations=[],
2402            use_cache=True,
2403        )
2404        schema_type_identifier = self._create_component_from_model(
2405            model.schema_type_identifier, config=config, parameters=model.parameters or {}
2406        )
2407        return DynamicSchemaLoader(
2408            retriever=retriever,
2409            config=config,
2410            schema_transformations=schema_transformations,
2411            schema_type_identifier=schema_type_identifier,
2412            parameters=model.parameters or {},
2413        )
@staticmethod
def create_json_decoder( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JsonDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
2415    @staticmethod
2416    def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2417        return JsonDecoder(parameters={})
def create_csv_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CsvDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
2419    def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2420        return CompositeRawDecoder(
2421            parser=ModelToComponentFactory._get_parser(model, config),
2422            stream_response=False if self._emit_connector_builder_messages else True,
2423        )
def create_jsonl_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JsonlDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
2425    def create_jsonl_decoder(
2426        self, model: JsonlDecoderModel, config: Config, **kwargs: Any
2427    ) -> Decoder:
2428        return CompositeRawDecoder(
2429            parser=ModelToComponentFactory._get_parser(model, config),
2430            stream_response=False if self._emit_connector_builder_messages else True,
2431        )
def create_gzip_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.GzipDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
2433    def create_gzip_decoder(
2434        self, model: GzipDecoderModel, config: Config, **kwargs: Any
2435    ) -> Decoder:
2436        _compressed_response_types = {
2437            "gzip",
2438            "x-gzip",
2439            "gzip, deflate",
2440            "x-gzip, deflate",
2441            "application/zip",
2442            "application/gzip",
2443            "application/x-gzip",
2444            "application/x-zip-compressed",
2445        }
2446
2447        gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config)  # type: ignore  # based on the model, we know this will be a GzipParser
2448
2449        if self._emit_connector_builder_messages:
2450            # This is very surprising but if the response is not streamed,
2451            # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw,
2452            # which uses urllib3 directly and does not uncompress the data.
2453            return CompositeRawDecoder(gzip_parser.inner_parser, False)
2454
2455        return CompositeRawDecoder.by_headers(
2456            [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
2457            stream_response=True,
2458            fallback_parser=gzip_parser.inner_parser,
2459        )
@staticmethod
def create_incrementing_count_cursor( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.IncrementingCountCursor, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DatetimeBasedCursor:
2461    @staticmethod
2462    def create_incrementing_count_cursor(
2463        model: IncrementingCountCursorModel, config: Config, **kwargs: Any
2464    ) -> DatetimeBasedCursor:
2465        # This should not actually get used anywhere at runtime, but needed to add this to pass checks since
2466        # we still parse models into components. The issue is that there's no runtime implementation of a
2467        # IncrementingCountCursor.
2468        # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
2469        return DatetimeBasedCursor(
2470            cursor_field=model.cursor_field,
2471            datetime_format="%Y-%m-%d",
2472            start_datetime="2024-12-12",
2473            config=config,
2474            parameters={},
2475        )
@staticmethod
def create_iterable_decoder( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.IterableDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.decoders.IterableDecoder:
2477    @staticmethod
2478    def create_iterable_decoder(
2479        model: IterableDecoderModel, config: Config, **kwargs: Any
2480    ) -> IterableDecoder:
2481        return IterableDecoder(parameters={})
@staticmethod
def create_xml_decoder( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.XmlDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.decoders.XmlDecoder:
2483    @staticmethod
2484    def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
2485        return XmlDecoder(parameters={})
def create_zipfile_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ZipfileDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.decoders.ZipfileDecoder:
2487    def create_zipfile_decoder(
2488        self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
2489    ) -> ZipfileDecoder:
2490        return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config))
@staticmethod
def create_json_file_schema_loader( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JsonFileSchemaLoader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.JsonFileSchemaLoader:
2512    @staticmethod
2513    def create_json_file_schema_loader(
2514        model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
2515    ) -> JsonFileSchemaLoader:
2516        return JsonFileSchemaLoader(
2517            file_path=model.file_path or "", config=config, parameters=model.parameters or {}
2518        )
@staticmethod
def create_jwt_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JwtAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.auth.JwtAuthenticator:
2520    @staticmethod
2521    def create_jwt_authenticator(
2522        model: JwtAuthenticatorModel, config: Config, **kwargs: Any
2523    ) -> JwtAuthenticator:
2524        jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None)
2525        jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None)
2526        return JwtAuthenticator(
2527            config=config,
2528            parameters=model.parameters or {},
2529            algorithm=JwtAlgorithm(model.algorithm.value),
2530            secret_key=model.secret_key,
2531            base64_encode_secret_key=model.base64_encode_secret_key,
2532            token_duration=model.token_duration,
2533            header_prefix=model.header_prefix,
2534            kid=jwt_headers.kid,
2535            typ=jwt_headers.typ,
2536            cty=jwt_headers.cty,
2537            iss=jwt_payload.iss,
2538            sub=jwt_payload.sub,
2539            aud=jwt_payload.aud,
2540            additional_jwt_headers=model.additional_jwt_headers,
2541            additional_jwt_payload=model.additional_jwt_payload,
2542        )
def create_list_partition_router( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ListPartitionRouter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.partition_routers.ListPartitionRouter:
2544    def create_list_partition_router(
2545        self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
2546    ) -> ListPartitionRouter:
2547        request_option = (
2548            self._create_component_from_model(model.request_option, config)
2549            if model.request_option
2550            else None
2551        )
2552        return ListPartitionRouter(
2553            cursor_field=model.cursor_field,
2554            request_option=request_option,
2555            values=model.values,
2556            config=config,
2557            parameters=model.parameters or {},
2558        )
@staticmethod
def create_min_max_datetime( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.MinMaxDatetime, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.MinMaxDatetime:
2560    @staticmethod
2561    def create_min_max_datetime(
2562        model: MinMaxDatetimeModel, config: Config, **kwargs: Any
2563    ) -> MinMaxDatetime:
2564        return MinMaxDatetime(
2565            datetime=model.datetime,
2566            datetime_format=model.datetime_format or "",
2567            max_datetime=model.max_datetime or "",
2568            min_datetime=model.min_datetime or "",
2569            parameters=model.parameters or {},
2570        )
@staticmethod
def create_no_auth( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.NoAuth, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.NoAuth:
2572    @staticmethod
2573    def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth:
2574        return NoAuth(parameters=model.parameters or {})
@staticmethod
def create_no_pagination( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.NoPagination, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.paginators.NoPagination:
2576    @staticmethod
2577    def create_no_pagination(
2578        model: NoPaginationModel, config: Config, **kwargs: Any
2579    ) -> NoPagination:
2580        return NoPagination(parameters={})
def create_oauth_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.OAuthAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DeclarativeOauth2Authenticator:
2582    def create_oauth_authenticator(
2583        self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
2584    ) -> DeclarativeOauth2Authenticator:
2585        profile_assertion = (
2586            self._create_component_from_model(model.profile_assertion, config=config)
2587            if model.profile_assertion
2588            else None
2589        )
2590
2591        if model.refresh_token_updater:
2592            # ignore type error because fixing it would have a lot of dependencies, revisit later
2593            return DeclarativeSingleUseRefreshTokenOauth2Authenticator(  # type: ignore
2594                config,
2595                InterpolatedString.create(
2596                    model.token_refresh_endpoint,  # type: ignore
2597                    parameters=model.parameters or {},
2598                ).eval(config),
2599                access_token_name=InterpolatedString.create(
2600                    model.access_token_name or "access_token", parameters=model.parameters or {}
2601                ).eval(config),
2602                refresh_token_name=model.refresh_token_updater.refresh_token_name,
2603                expires_in_name=InterpolatedString.create(
2604                    model.expires_in_name or "expires_in", parameters=model.parameters or {}
2605                ).eval(config),
2606                client_id_name=InterpolatedString.create(
2607                    model.client_id_name or "client_id", parameters=model.parameters or {}
2608                ).eval(config),
2609                client_id=InterpolatedString.create(
2610                    model.client_id, parameters=model.parameters or {}
2611                ).eval(config)
2612                if model.client_id
2613                else model.client_id,
2614                client_secret_name=InterpolatedString.create(
2615                    model.client_secret_name or "client_secret", parameters=model.parameters or {}
2616                ).eval(config),
2617                client_secret=InterpolatedString.create(
2618                    model.client_secret, parameters=model.parameters or {}
2619                ).eval(config)
2620                if model.client_secret
2621                else model.client_secret,
2622                access_token_config_path=model.refresh_token_updater.access_token_config_path,
2623                refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
2624                token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
2625                grant_type_name=InterpolatedString.create(
2626                    model.grant_type_name or "grant_type", parameters=model.parameters or {}
2627                ).eval(config),
2628                grant_type=InterpolatedString.create(
2629                    model.grant_type or "refresh_token", parameters=model.parameters or {}
2630                ).eval(config),
2631                refresh_request_body=InterpolatedMapping(
2632                    model.refresh_request_body or {}, parameters=model.parameters or {}
2633                ).eval(config),
2634                refresh_request_headers=InterpolatedMapping(
2635                    model.refresh_request_headers or {}, parameters=model.parameters or {}
2636                ).eval(config),
2637                scopes=model.scopes,
2638                token_expiry_date_format=model.token_expiry_date_format,
2639                message_repository=self._message_repository,
2640                refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes,
2641                refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key,
2642                refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values,
2643            )
2644        # ignore type error because fixing it would have a lot of dependencies, revisit later
2645        return DeclarativeOauth2Authenticator(  # type: ignore
2646            access_token_name=model.access_token_name or "access_token",
2647            access_token_value=model.access_token_value,
2648            client_id_name=model.client_id_name or "client_id",
2649            client_id=model.client_id,
2650            client_secret_name=model.client_secret_name or "client_secret",
2651            client_secret=model.client_secret,
2652            expires_in_name=model.expires_in_name or "expires_in",
2653            grant_type_name=model.grant_type_name or "grant_type",
2654            grant_type=model.grant_type or "refresh_token",
2655            refresh_request_body=model.refresh_request_body,
2656            refresh_request_headers=model.refresh_request_headers,
2657            refresh_token_name=model.refresh_token_name or "refresh_token",
2658            refresh_token=model.refresh_token,
2659            scopes=model.scopes,
2660            token_expiry_date=model.token_expiry_date,
2661            token_expiry_date_format=model.token_expiry_date_format,
2662            token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
2663            token_refresh_endpoint=model.token_refresh_endpoint,
2664            config=config,
2665            parameters=model.parameters or {},
2666            message_repository=self._message_repository,
2667            profile_assertion=profile_assertion,
2668            use_profile_assertion=model.use_profile_assertion,
2669        )
2671    def create_offset_increment(
2672        self,
2673        model: OffsetIncrementModel,
2674        config: Config,
2675        decoder: Decoder,
2676        extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None,
2677        **kwargs: Any,
2678    ) -> OffsetIncrement:
2679        if isinstance(decoder, PaginationDecoderDecorator):
2680            inner_decoder = decoder.decoder
2681        else:
2682            inner_decoder = decoder
2683            decoder = PaginationDecoderDecorator(decoder=decoder)
2684
2685        if self._is_supported_decoder_for_pagination(inner_decoder):
2686            decoder_to_use = decoder
2687        else:
2688            raise ValueError(
2689                self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
2690            )
2691
2692        # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever)
2693        # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the
2694        # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing
2695        # behavior by having two separate extractors with identical behavior since they use the same extractor model.
2696        # When we have more time to investigate we can look into reusing the same component.
2697        extractor = (
2698            self._create_component_from_model(
2699                model=extractor_model, config=config, decoder=decoder_to_use
2700            )
2701            if extractor_model
2702            else None
2703        )
2704
2705        return OffsetIncrement(
2706            page_size=model.page_size,
2707            config=config,
2708            decoder=decoder_to_use,
2709            extractor=extractor,
2710            inject_on_first_request=model.inject_on_first_request or False,
2711            parameters=model.parameters or {},
2712        )
@staticmethod
def create_page_increment( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.PageIncrement, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.PageIncrement:
2714    @staticmethod
2715    def create_page_increment(
2716        model: PageIncrementModel, config: Config, **kwargs: Any
2717    ) -> PageIncrement:
2718        return PageIncrement(
2719            page_size=model.page_size,
2720            config=config,
2721            start_from_page=model.start_from_page or 0,
2722            inject_on_first_request=model.inject_on_first_request or False,
2723            parameters=model.parameters or {},
2724        )
def create_parent_stream_config( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ParentStreamConfig, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.ParentStreamConfig:
2726    def create_parent_stream_config(
2727        self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2728    ) -> ParentStreamConfig:
2729        declarative_stream = self._create_component_from_model(
2730            model.stream, config=config, **kwargs
2731        )
2732        request_option = (
2733            self._create_component_from_model(model.request_option, config=config)
2734            if model.request_option
2735            else None
2736        )
2737
2738        if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer):
2739            raise ValueError(
2740                "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed."
2741            )
2742
2743        model_lazy_read_pointer: List[Union[InterpolatedString, str]] = (
2744            [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else []
2745        )
2746
2747        return ParentStreamConfig(
2748            parent_key=model.parent_key,
2749            request_option=request_option,
2750            stream=declarative_stream,
2751            partition_field=model.partition_field,
2752            config=config,
2753            incremental_dependency=model.incremental_dependency or False,
2754            parameters=model.parameters or {},
2755            extra_fields=model.extra_fields,
2756            lazy_read_pointer=model_lazy_read_pointer,
2757        )
2759    def create_properties_from_endpoint(
2760        self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any
2761    ) -> PropertiesFromEndpoint:
2762        retriever = self._create_component_from_model(
2763            model=model.retriever,
2764            config=config,
2765            name="dynamic_properties",
2766            primary_key=None,
2767            stream_slicer=None,
2768            transformations=[],
2769            use_cache=True,  # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different
2770        )
2771        return PropertiesFromEndpoint(
2772            property_field_path=model.property_field_path,
2773            retriever=retriever,
2774            config=config,
2775            parameters=model.parameters or {},
2776        )
2778    def create_property_chunking(
2779        self, model: PropertyChunkingModel, config: Config, **kwargs: Any
2780    ) -> PropertyChunking:
2781        record_merge_strategy = (
2782            self._create_component_from_model(
2783                model=model.record_merge_strategy, config=config, **kwargs
2784            )
2785            if model.record_merge_strategy
2786            else None
2787        )
2788
2789        property_limit_type: PropertyLimitType
2790        match model.property_limit_type:
2791            case PropertyLimitTypeModel.property_count:
2792                property_limit_type = PropertyLimitType.property_count
2793            case PropertyLimitTypeModel.characters:
2794                property_limit_type = PropertyLimitType.characters
2795            case _:
2796                raise ValueError(f"Invalid PropertyLimitType {property_limit_type}")
2797
2798        return PropertyChunking(
2799            property_limit_type=property_limit_type,
2800            property_limit=model.property_limit,
2801            record_merge_strategy=record_merge_strategy,
2802            config=config,
2803            parameters=model.parameters or {},
2804        )
2806    def create_query_properties(
2807        self, model: QueryPropertiesModel, config: Config, **kwargs: Any
2808    ) -> QueryProperties:
2809        if isinstance(model.property_list, list):
2810            property_list = model.property_list
2811        else:
2812            property_list = self._create_component_from_model(
2813                model=model.property_list, config=config, **kwargs
2814            )
2815
2816        property_chunking = (
2817            self._create_component_from_model(
2818                model=model.property_chunking, config=config, **kwargs
2819            )
2820            if model.property_chunking
2821            else None
2822        )
2823
2824        return QueryProperties(
2825            property_list=property_list,
2826            always_include_properties=model.always_include_properties,
2827            property_chunking=property_chunking,
2828            config=config,
2829            parameters=model.parameters or {},
2830        )
@staticmethod
def create_record_filter( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RecordFilter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.RecordFilter:
2832    @staticmethod
2833    def create_record_filter(
2834        model: RecordFilterModel, config: Config, **kwargs: Any
2835    ) -> RecordFilter:
2836        return RecordFilter(
2837            condition=model.condition or "", config=config, parameters=model.parameters or {}
2838        )
@staticmethod
def create_request_path( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RequestPath, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.request_path.RequestPath:
2840    @staticmethod
2841    def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath:
2842        return RequestPath(parameters={})
@staticmethod
def create_request_option( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RequestOption, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.RequestOption:
2844    @staticmethod
2845    def create_request_option(
2846        model: RequestOptionModel, config: Config, **kwargs: Any
2847    ) -> RequestOption:
2848        inject_into = RequestOptionType(model.inject_into.value)
2849        field_path: Optional[List[Union[InterpolatedString, str]]] = (
2850            [
2851                InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
2852                for segment in model.field_path
2853            ]
2854            if model.field_path
2855            else None
2856        )
2857        field_name = (
2858            InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
2859            if model.field_name
2860            else None
2861        )
2862        return RequestOption(
2863            field_name=field_name,
2864            field_path=field_path,
2865            inject_into=inject_into,
2866            parameters=kwargs.get("parameters", {}),
2867        )
def create_record_selector( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RecordSelector, config: Mapping[str, Any], *, name: str, transformations: Optional[List[airbyte_cdk.RecordTransformation]] = None, decoder: airbyte_cdk.Decoder | None = None, client_side_incremental_sync: Optional[Dict[str, Any]] = None, file_uploader: Optional[airbyte_cdk.sources.declarative.retrievers.file_uploader.DefaultFileUploader] = None, **kwargs: Any) -> airbyte_cdk.RecordSelector:
2869    def create_record_selector(
2870        self,
2871        model: RecordSelectorModel,
2872        config: Config,
2873        *,
2874        name: str,
2875        transformations: List[RecordTransformation] | None = None,
2876        decoder: Decoder | None = None,
2877        client_side_incremental_sync: Dict[str, Any] | None = None,
2878        file_uploader: Optional[DefaultFileUploader] = None,
2879        **kwargs: Any,
2880    ) -> RecordSelector:
2881        extractor = self._create_component_from_model(
2882            model=model.extractor, decoder=decoder, config=config
2883        )
2884        record_filter = (
2885            self._create_component_from_model(model.record_filter, config=config)
2886            if model.record_filter
2887            else None
2888        )
2889
2890        transform_before_filtering = (
2891            False if model.transform_before_filtering is None else model.transform_before_filtering
2892        )
2893        if client_side_incremental_sync:
2894            record_filter = ClientSideIncrementalRecordFilterDecorator(
2895                config=config,
2896                parameters=model.parameters,
2897                condition=model.record_filter.condition
2898                if (model.record_filter and hasattr(model.record_filter, "condition"))
2899                else None,
2900                **client_side_incremental_sync,
2901            )
2902            transform_before_filtering = (
2903                True
2904                if model.transform_before_filtering is None
2905                else model.transform_before_filtering
2906            )
2907
2908        if model.schema_normalization is None:
2909            # default to no schema normalization if not set
2910            model.schema_normalization = SchemaNormalizationModel.None_
2911
2912        schema_normalization = (
2913            TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
2914            if isinstance(model.schema_normalization, SchemaNormalizationModel)
2915            else self._create_component_from_model(model.schema_normalization, config=config)  # type: ignore[arg-type] # custom normalization model expected here
2916        )
2917
2918        return RecordSelector(
2919            extractor=extractor,
2920            name=name,
2921            config=config,
2922            record_filter=record_filter,
2923            transformations=transformations or [],
2924            file_uploader=file_uploader,
2925            schema_normalization=schema_normalization,
2926            parameters=model.parameters or {},
2927            transform_before_filtering=transform_before_filtering,
2928        )
@staticmethod
def create_remove_fields( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RemoveFields, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.RemoveFields:
2930    @staticmethod
2931    def create_remove_fields(
2932        model: RemoveFieldsModel, config: Config, **kwargs: Any
2933    ) -> RemoveFields:
2934        return RemoveFields(
2935            field_pointers=model.field_pointers, condition=model.condition or "", parameters={}
2936        )
def create_selective_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SelectiveAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DeclarativeAuthenticator:
2938    def create_selective_authenticator(
2939        self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any
2940    ) -> DeclarativeAuthenticator:
2941        authenticators = {
2942            name: self._create_component_from_model(model=auth, config=config)
2943            for name, auth in model.authenticators.items()
2944        }
2945        # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error
2946        return SelectiveAuthenticator(  # type: ignore[abstract]
2947            config=config,
2948            authenticators=authenticators,
2949            authenticator_selection_path=model.authenticator_selection_path,
2950            **kwargs,
2951        )
@staticmethod
def create_legacy_session_token_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.LegacySessionTokenAuthenticator, config: Mapping[str, Any], *, url_base: str, **kwargs: Any) -> airbyte_cdk.sources.declarative.auth.token.LegacySessionTokenAuthenticator:
2953    @staticmethod
2954    def create_legacy_session_token_authenticator(
2955        model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any
2956    ) -> LegacySessionTokenAuthenticator:
2957        return LegacySessionTokenAuthenticator(
2958            api_url=url_base,
2959            header=model.header,
2960            login_url=model.login_url,
2961            password=model.password or "",
2962            session_token=model.session_token or "",
2963            session_token_response_key=model.session_token_response_key or "",
2964            username=model.username or "",
2965            validate_session_url=model.validate_session_url,
2966            config=config,
2967            parameters=model.parameters or {},
2968        )
def create_simple_retriever( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SimpleRetriever, config: Mapping[str, Any], *, name: str, primary_key: Union[str, List[str], List[List[str]], NoneType], stream_slicer: Optional[airbyte_cdk.sources.declarative.stream_slicers.StreamSlicer], request_options_provider: Optional[airbyte_cdk.sources.declarative.requesters.request_options.RequestOptionsProvider] = None, stop_condition_on_cursor: bool = False, client_side_incremental_sync: Optional[Dict[str, Any]] = None, transformations: List[airbyte_cdk.RecordTransformation], file_uploader: Optional[airbyte_cdk.sources.declarative.retrievers.file_uploader.DefaultFileUploader] = None, incremental_sync: Union[airbyte_cdk.sources.declarative.models.declarative_component_schema.IncrementingCountCursor, airbyte_cdk.sources.declarative.models.declarative_component_schema.DatetimeBasedCursor, airbyte_cdk.sources.declarative.models.declarative_component_schema.CustomIncrementalSync, NoneType] = None, use_cache: Optional[bool] = None, **kwargs: Any) -> airbyte_cdk.SimpleRetriever:
2970    def create_simple_retriever(
2971        self,
2972        model: SimpleRetrieverModel,
2973        config: Config,
2974        *,
2975        name: str,
2976        primary_key: Optional[Union[str, List[str], List[List[str]]]],
2977        stream_slicer: Optional[StreamSlicer],
2978        request_options_provider: Optional[RequestOptionsProvider] = None,
2979        stop_condition_on_cursor: bool = False,
2980        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
2981        transformations: List[RecordTransformation],
2982        file_uploader: Optional[DefaultFileUploader] = None,
2983        incremental_sync: Optional[
2984            Union[
2985                IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
2986            ]
2987        ] = None,
2988        use_cache: Optional[bool] = None,
2989        **kwargs: Any,
2990    ) -> SimpleRetriever:
2991        def _get_url() -> str:
2992            """
2993            Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever.
2994            This is needed because the URL is not set until the requester is created.
2995            """
2996
2997            _url = (
2998                model.requester.url
2999                if hasattr(model.requester, "url") and model.requester.url is not None
3000                else requester.get_url()
3001            )
3002            _url_base = (
3003                model.requester.url_base
3004                if hasattr(model.requester, "url_base") and model.requester.url_base is not None
3005                else requester.get_url_base()
3006            )
3007
3008            return _url or _url_base
3009
3010        decoder = (
3011            self._create_component_from_model(model=model.decoder, config=config)
3012            if model.decoder
3013            else JsonDecoder(parameters={})
3014        )
3015        record_selector = self._create_component_from_model(
3016            model=model.record_selector,
3017            name=name,
3018            config=config,
3019            decoder=decoder,
3020            transformations=transformations,
3021            client_side_incremental_sync=client_side_incremental_sync,
3022            file_uploader=file_uploader,
3023        )
3024
3025        query_properties: Optional[QueryProperties] = None
3026        query_properties_key: Optional[str] = None
3027        if self._query_properties_in_request_parameters(model.requester):
3028            # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple
3029            # places instead of default to request_parameters which isn't clearly documented
3030            if (
3031                hasattr(model.requester, "fetch_properties_from_endpoint")
3032                and model.requester.fetch_properties_from_endpoint
3033            ):
3034                raise ValueError(
3035                    f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters"
3036                )
3037
3038            query_properties_definitions = []
3039            for key, request_parameter in model.requester.request_parameters.items():  # type: ignore # request_parameters is already validated to be a Mapping using _query_properties_in_request_parameters()
3040                if isinstance(request_parameter, QueryPropertiesModel):
3041                    query_properties_key = key
3042                    query_properties_definitions.append(request_parameter)
3043
3044            if len(query_properties_definitions) > 1:
3045                raise ValueError(
3046                    f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages"
3047                )
3048
3049            if len(query_properties_definitions) == 1:
3050                query_properties = self._create_component_from_model(
3051                    model=query_properties_definitions[0], config=config
3052                )
3053        elif (
3054            hasattr(model.requester, "fetch_properties_from_endpoint")
3055            and model.requester.fetch_properties_from_endpoint
3056        ):
3057            query_properties_definition = QueryPropertiesModel(
3058                type="QueryProperties",
3059                property_list=model.requester.fetch_properties_from_endpoint,
3060                always_include_properties=None,
3061                property_chunking=None,
3062            )  # type: ignore # $parameters has a default value
3063
3064            query_properties = self.create_query_properties(
3065                model=query_properties_definition,
3066                config=config,
3067            )
3068
3069        requester = self._create_component_from_model(
3070            model=model.requester,
3071            decoder=decoder,
3072            name=name,
3073            query_properties_key=query_properties_key,
3074            use_cache=use_cache,
3075            config=config,
3076        )
3077
3078        # Define cursor only if per partition or common incremental support is needed
3079        cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
3080
3081        if (
3082            not isinstance(stream_slicer, DatetimeBasedCursor)
3083            or type(stream_slicer) is not DatetimeBasedCursor
3084        ):
3085            # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
3086            # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
3087            # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
3088            # request_options_provider
3089            request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
3090        elif not request_options_provider:
3091            request_options_provider = DefaultRequestOptionsProvider(parameters={})
3092
3093        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3094
3095        cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None
3096        paginator = (
3097            self._create_component_from_model(
3098                model=model.paginator,
3099                config=config,
3100                url_base=_get_url(),
3101                extractor_model=model.record_selector.extractor,
3102                decoder=decoder,
3103                cursor_used_for_stop_condition=cursor_used_for_stop_condition,
3104            )
3105            if model.paginator
3106            else NoPagination(parameters={})
3107        )
3108
3109        ignore_stream_slicer_parameters_on_paginated_requests = (
3110            model.ignore_stream_slicer_parameters_on_paginated_requests or False
3111        )
3112
3113        if (
3114            model.partition_router
3115            and isinstance(model.partition_router, SubstreamPartitionRouterModel)
3116            and not bool(self._connector_state_manager.get_stream_state(name, None))
3117            and any(
3118                parent_stream_config.lazy_read_pointer
3119                for parent_stream_config in model.partition_router.parent_stream_configs
3120            )
3121        ):
3122            if incremental_sync:
3123                if incremental_sync.type != "DatetimeBasedCursor":
3124                    raise ValueError(
3125                        f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}."
3126                    )
3127
3128                elif incremental_sync.step or incremental_sync.cursor_granularity:
3129                    raise ValueError(
3130                        f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}."
3131                    )
3132
3133            if model.decoder and model.decoder.type != "JsonDecoder":
3134                raise ValueError(
3135                    f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}."
3136                )
3137
3138            return LazySimpleRetriever(
3139                name=name,
3140                paginator=paginator,
3141                primary_key=primary_key,
3142                requester=requester,
3143                record_selector=record_selector,
3144                stream_slicer=stream_slicer,
3145                request_option_provider=request_options_provider,
3146                cursor=cursor,
3147                config=config,
3148                ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3149                parameters=model.parameters or {},
3150            )
3151
3152        if self._limit_slices_fetched or self._emit_connector_builder_messages:
3153            return SimpleRetrieverTestReadDecorator(
3154                name=name,
3155                paginator=paginator,
3156                primary_key=primary_key,
3157                requester=requester,
3158                record_selector=record_selector,
3159                stream_slicer=stream_slicer,
3160                request_option_provider=request_options_provider,
3161                cursor=cursor,
3162                config=config,
3163                maximum_number_of_slices=self._limit_slices_fetched or 5,
3164                ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3165                parameters=model.parameters or {},
3166            )
3167        return SimpleRetriever(
3168            name=name,
3169            paginator=paginator,
3170            primary_key=primary_key,
3171            requester=requester,
3172            record_selector=record_selector,
3173            stream_slicer=stream_slicer,
3174            request_option_provider=request_options_provider,
3175            cursor=cursor,
3176            config=config,
3177            ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3178            additional_query_properties=query_properties,
3179            parameters=model.parameters or {},
3180        )
def create_state_delegating_stream( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.StateDelegatingStream, config: Mapping[str, Any], has_parent_state: Optional[bool] = None, **kwargs: Any) -> airbyte_cdk.DeclarativeStream:
3205    def create_state_delegating_stream(
3206        self,
3207        model: StateDelegatingStreamModel,
3208        config: Config,
3209        has_parent_state: Optional[bool] = None,
3210        **kwargs: Any,
3211    ) -> DeclarativeStream:
3212        if (
3213            model.full_refresh_stream.name != model.name
3214            or model.name != model.incremental_stream.name
3215        ):
3216            raise ValueError(
3217                f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
3218            )
3219
3220        stream_model = (
3221            model.incremental_stream
3222            if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
3223            else model.full_refresh_stream
3224        )
3225
3226        return self._create_component_from_model(stream_model, config=config, **kwargs)  # type: ignore[no-any-return]  # Will be created DeclarativeStream as stream_model is stream description
def create_async_retriever( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.AsyncRetriever, config: Mapping[str, Any], *, name: str, primary_key: Union[str, List[str], List[List[str]], NoneType], stream_slicer: Optional[airbyte_cdk.sources.declarative.stream_slicers.StreamSlicer], client_side_incremental_sync: Optional[Dict[str, Any]] = None, transformations: List[airbyte_cdk.RecordTransformation], **kwargs: Any) -> airbyte_cdk.sources.declarative.retrievers.AsyncRetriever:
3258    def create_async_retriever(
3259        self,
3260        model: AsyncRetrieverModel,
3261        config: Config,
3262        *,
3263        name: str,
3264        primary_key: Optional[
3265            Union[str, List[str], List[List[str]]]
3266        ],  # this seems to be needed to match create_simple_retriever
3267        stream_slicer: Optional[StreamSlicer],
3268        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
3269        transformations: List[RecordTransformation],
3270        **kwargs: Any,
3271    ) -> AsyncRetriever:
3272        def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever:
3273            record_selector = RecordSelector(
3274                extractor=download_extractor,
3275                name=name,
3276                record_filter=None,
3277                transformations=transformations,
3278                schema_normalization=TypeTransformer(TransformConfig.NoTransform),
3279                config=config,
3280                parameters={},
3281            )
3282            paginator = (
3283                self._create_component_from_model(
3284                    model=model.download_paginator,
3285                    decoder=decoder,
3286                    config=config,
3287                    url_base="",
3288                )
3289                if model.download_paginator
3290                else NoPagination(parameters={})
3291            )
3292            maximum_number_of_slices = self._limit_slices_fetched or 5
3293
3294            if self._limit_slices_fetched or self._emit_connector_builder_messages:
3295                return SimpleRetrieverTestReadDecorator(
3296                    requester=download_requester,
3297                    record_selector=record_selector,
3298                    primary_key=None,
3299                    name=job_download_components_name,
3300                    paginator=paginator,
3301                    config=config,
3302                    parameters={},
3303                    maximum_number_of_slices=maximum_number_of_slices,
3304                )
3305
3306            return SimpleRetriever(
3307                requester=download_requester,
3308                record_selector=record_selector,
3309                primary_key=None,
3310                name=job_download_components_name,
3311                paginator=paginator,
3312                config=config,
3313                parameters={},
3314            )
3315
3316        def _get_job_timeout() -> datetime.timedelta:
3317            user_defined_timeout: Optional[int] = (
3318                int(
3319                    InterpolatedString.create(
3320                        str(model.polling_job_timeout),
3321                        parameters={},
3322                    ).eval(config)
3323                )
3324                if model.polling_job_timeout
3325                else None
3326            )
3327
3328            # check for user defined timeout during the test read or 15 minutes
3329            test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15)
3330            # default value for non-connector builder is 60 minutes.
3331            default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60)
3332
3333            return (
3334                test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout
3335            )
3336
3337        decoder = (
3338            self._create_component_from_model(model=model.decoder, config=config)
3339            if model.decoder
3340            else JsonDecoder(parameters={})
3341        )
3342        record_selector = self._create_component_from_model(
3343            model=model.record_selector,
3344            config=config,
3345            decoder=decoder,
3346            name=name,
3347            transformations=transformations,
3348            client_side_incremental_sync=client_side_incremental_sync,
3349        )
3350        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3351        creation_requester = self._create_component_from_model(
3352            model=model.creation_requester,
3353            decoder=decoder,
3354            config=config,
3355            name=f"job creation - {name}",
3356        )
3357        polling_requester = self._create_component_from_model(
3358            model=model.polling_requester,
3359            decoder=decoder,
3360            config=config,
3361            name=f"job polling - {name}",
3362        )
3363        job_download_components_name = f"job download - {name}"
3364        download_decoder = (
3365            self._create_component_from_model(model=model.download_decoder, config=config)
3366            if model.download_decoder
3367            else JsonDecoder(parameters={})
3368        )
3369        download_extractor = (
3370            self._create_component_from_model(
3371                model=model.download_extractor,
3372                config=config,
3373                decoder=download_decoder,
3374                parameters=model.parameters,
3375            )
3376            if model.download_extractor
3377            else DpathExtractor(
3378                [],
3379                config=config,
3380                decoder=download_decoder,
3381                parameters=model.parameters or {},
3382            )
3383        )
3384        download_requester = self._create_component_from_model(
3385            model=model.download_requester,
3386            decoder=download_decoder,
3387            config=config,
3388            name=job_download_components_name,
3389        )
3390        download_retriever = _get_download_retriever()
3391        abort_requester = (
3392            self._create_component_from_model(
3393                model=model.abort_requester,
3394                decoder=decoder,
3395                config=config,
3396                name=f"job abort - {name}",
3397            )
3398            if model.abort_requester
3399            else None
3400        )
3401        delete_requester = (
3402            self._create_component_from_model(
3403                model=model.delete_requester,
3404                decoder=decoder,
3405                config=config,
3406                name=f"job delete - {name}",
3407            )
3408            if model.delete_requester
3409            else None
3410        )
3411        download_target_requester = (
3412            self._create_component_from_model(
3413                model=model.download_target_requester,
3414                decoder=decoder,
3415                config=config,
3416                name=f"job extract_url - {name}",
3417            )
3418            if model.download_target_requester
3419            else None
3420        )
3421        status_extractor = self._create_component_from_model(
3422            model=model.status_extractor, decoder=decoder, config=config, name=name
3423        )
3424        download_target_extractor = self._create_component_from_model(
3425            model=model.download_target_extractor,
3426            decoder=decoder,
3427            config=config,
3428            name=name,
3429        )
3430
3431        job_repository: AsyncJobRepository = AsyncHttpJobRepository(
3432            creation_requester=creation_requester,
3433            polling_requester=polling_requester,
3434            download_retriever=download_retriever,
3435            download_target_requester=download_target_requester,
3436            abort_requester=abort_requester,
3437            delete_requester=delete_requester,
3438            status_extractor=status_extractor,
3439            status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
3440            download_target_extractor=download_target_extractor,
3441            job_timeout=_get_job_timeout(),
3442        )
3443
3444        async_job_partition_router = AsyncJobPartitionRouter(
3445            job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
3446                job_repository,
3447                stream_slices,
3448                self._job_tracker,
3449                self._message_repository,
3450                # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
3451                has_bulk_parent=False,
3452                # set the `job_max_retry` to 1 for the `Connector Builder`` use-case.
3453                # `None` == default retry is set to 3 attempts, under the hood.
3454                job_max_retry=1 if self._emit_connector_builder_messages else None,
3455            ),
3456            stream_slicer=stream_slicer,
3457            config=config,
3458            parameters=model.parameters or {},
3459        )
3460
3461        return AsyncRetriever(
3462            record_selector=record_selector,
3463            stream_slicer=async_job_partition_router,
3464            config=config,
3465            parameters=model.parameters or {},
3466        )
@staticmethod
def create_spec( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.Spec, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.spec.Spec:
3468    @staticmethod
3469    def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
3470        return Spec(
3471            connection_specification=model.connection_specification,
3472            documentation_url=model.documentation_url,
3473            advanced_auth=model.advanced_auth,
3474            parameters={},
3475        )
def create_substream_partition_router( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SubstreamPartitionRouter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.SubstreamPartitionRouter:
3477    def create_substream_partition_router(
3478        self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any
3479    ) -> SubstreamPartitionRouter:
3480        parent_stream_configs = []
3481        if model.parent_stream_configs:
3482            parent_stream_configs.extend(
3483                [
3484                    self._create_message_repository_substream_wrapper(
3485                        model=parent_stream_config, config=config, **kwargs
3486                    )
3487                    for parent_stream_config in model.parent_stream_configs
3488                ]
3489            )
3490
3491        return SubstreamPartitionRouter(
3492            parent_stream_configs=parent_stream_configs,
3493            parameters=model.parameters or {},
3494            config=config,
3495        )
3523    @staticmethod
3524    def create_wait_time_from_header(
3525        model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
3526    ) -> WaitTimeFromHeaderBackoffStrategy:
3527        return WaitTimeFromHeaderBackoffStrategy(
3528            header=model.header,
3529            parameters=model.parameters or {},
3530            config=config,
3531            regex=model.regex,
3532            max_waiting_time_in_seconds=model.max_waiting_time_in_seconds
3533            if model.max_waiting_time_in_seconds is not None
3534            else None,
3535        )
3537    @staticmethod
3538    def create_wait_until_time_from_header(
3539        model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any
3540    ) -> WaitUntilTimeFromHeaderBackoffStrategy:
3541        return WaitUntilTimeFromHeaderBackoffStrategy(
3542            header=model.header,
3543            parameters=model.parameters or {},
3544            config=config,
3545            min_wait=model.min_wait,
3546            regex=model.regex,
3547        )
def get_message_repository(self) -> airbyte_cdk.MessageRepository:
3549    def get_message_repository(self) -> MessageRepository:
3550        return self._message_repository
@staticmethod
def create_components_mapping_definition( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ComponentMappingDefinition, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.resolvers.ComponentMappingDefinition:
3555    @staticmethod
3556    def create_components_mapping_definition(
3557        model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any
3558    ) -> ComponentMappingDefinition:
3559        interpolated_value = InterpolatedString.create(
3560            model.value, parameters=model.parameters or {}
3561        )
3562        field_path = [
3563            InterpolatedString.create(path, parameters=model.parameters or {})
3564            for path in model.field_path
3565        ]
3566        return ComponentMappingDefinition(
3567            field_path=field_path,  # type: ignore[arg-type] # field_path can be str and InterpolatedString
3568            value=interpolated_value,
3569            value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
3570            parameters=model.parameters or {},
3571        )
def create_http_components_resolver( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpComponentsResolver, config: Mapping[str, Any]) -> Any:
3573    def create_http_components_resolver(
3574        self, model: HttpComponentsResolverModel, config: Config
3575    ) -> Any:
3576        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
3577        combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
3578
3579        retriever = self._create_component_from_model(
3580            model=model.retriever,
3581            config=config,
3582            name="",
3583            primary_key=None,
3584            stream_slicer=stream_slicer if stream_slicer else combined_slicers,
3585            transformations=[],
3586        )
3587
3588        components_mapping = [
3589            self._create_component_from_model(
3590                model=components_mapping_definition_model,
3591                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3592                    components_mapping_definition_model.value_type
3593                ),
3594                config=config,
3595            )
3596            for components_mapping_definition_model in model.components_mapping
3597        ]
3598
3599        return HttpComponentsResolver(
3600            retriever=retriever,
3601            config=config,
3602            components_mapping=components_mapping,
3603            parameters=model.parameters or {},
3604        )
@staticmethod
def create_stream_config( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.StreamConfig, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.resolvers.StreamConfig:
3606    @staticmethod
3607    def create_stream_config(
3608        model: StreamConfigModel, config: Config, **kwargs: Any
3609    ) -> StreamConfig:
3610        model_configs_pointer: List[Union[InterpolatedString, str]] = (
3611            [x for x in model.configs_pointer] if model.configs_pointer else []
3612        )
3613
3614        return StreamConfig(
3615            configs_pointer=model_configs_pointer,
3616            parameters=model.parameters or {},
3617        )
def create_config_components_resolver( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ConfigComponentsResolver, config: Mapping[str, Any]) -> Any:
3619    def create_config_components_resolver(
3620        self, model: ConfigComponentsResolverModel, config: Config
3621    ) -> Any:
3622        stream_config = self._create_component_from_model(
3623            model.stream_config, config=config, parameters=model.parameters or {}
3624        )
3625
3626        components_mapping = [
3627            self._create_component_from_model(
3628                model=components_mapping_definition_model,
3629                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3630                    components_mapping_definition_model.value_type
3631                ),
3632                config=config,
3633            )
3634            for components_mapping_definition_model in model.components_mapping
3635        ]
3636
3637        return ConfigComponentsResolver(
3638            stream_config=stream_config,
3639            config=config,
3640            components_mapping=components_mapping,
3641            parameters=model.parameters or {},
3642        )
def create_http_api_budget( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HTTPAPIBudget, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.HttpAPIBudget:
3666    def create_http_api_budget(
3667        self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
3668    ) -> HttpAPIBudget:
3669        policies = [
3670            self._create_component_from_model(model=policy, config=config)
3671            for policy in model.policies
3672        ]
3673
3674        return HttpAPIBudget(
3675            policies=policies,
3676            ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
3677            ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
3678            status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
3679        )
def create_fixed_window_call_rate_policy( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.FixedWindowCallRatePolicy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.streams.call_rate.FixedWindowCallRatePolicy:
3681    def create_fixed_window_call_rate_policy(
3682        self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
3683    ) -> FixedWindowCallRatePolicy:
3684        matchers = [
3685            self._create_component_from_model(model=matcher, config=config)
3686            for matcher in model.matchers
3687        ]
3688
3689        # Set the initial reset timestamp to 10 days from now.
3690        # This value will be updated by the first request.
3691        return FixedWindowCallRatePolicy(
3692            next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
3693            period=parse_duration(model.period),
3694            call_limit=model.call_limit,
3695            matchers=matchers,
3696        )
def create_file_uploader( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.FileUploader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.retrievers.file_uploader.FileUploader:
3698    def create_file_uploader(
3699        self, model: FileUploaderModel, config: Config, **kwargs: Any
3700    ) -> FileUploader:
3701        name = "File Uploader"
3702        requester = self._create_component_from_model(
3703            model=model.requester,
3704            config=config,
3705            name=name,
3706            **kwargs,
3707        )
3708        download_target_extractor = self._create_component_from_model(
3709            model=model.download_target_extractor,
3710            config=config,
3711            name=name,
3712            **kwargs,
3713        )
3714        emit_connector_builder_messages = self._emit_connector_builder_messages
3715        file_uploader = DefaultFileUploader(
3716            requester=requester,
3717            download_target_extractor=download_target_extractor,
3718            config=config,
3719            file_writer=NoopFileWriter()
3720            if emit_connector_builder_messages
3721            else LocalFileSystemFileWriter(),
3722            parameters=model.parameters or {},
3723            filename_extractor=model.filename_extractor if model.filename_extractor else None,
3724        )
3725
3726        return (
3727            ConnectorBuilderFileUploader(file_uploader)
3728            if emit_connector_builder_messages
3729            else file_uploader
3730        )
def create_moving_window_call_rate_policy( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.MovingWindowCallRatePolicy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.MovingWindowCallRatePolicy:
3732    def create_moving_window_call_rate_policy(
3733        self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
3734    ) -> MovingWindowCallRatePolicy:
3735        rates = [
3736            self._create_component_from_model(model=rate, config=config) for rate in model.rates
3737        ]
3738        matchers = [
3739            self._create_component_from_model(model=matcher, config=config)
3740            for matcher in model.matchers
3741        ]
3742        return MovingWindowCallRatePolicy(
3743            rates=rates,
3744            matchers=matchers,
3745        )
def create_unlimited_call_rate_policy( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.UnlimitedCallRatePolicy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.streams.call_rate.UnlimitedCallRatePolicy:
3747    def create_unlimited_call_rate_policy(
3748        self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
3749    ) -> UnlimitedCallRatePolicy:
3750        matchers = [
3751            self._create_component_from_model(model=matcher, config=config)
3752            for matcher in model.matchers
3753        ]
3754
3755        return UnlimitedCallRatePolicy(
3756            matchers=matchers,
3757        )
def create_rate( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.Rate, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Rate:
3759    def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
3760        interpolated_limit = InterpolatedString.create(str(model.limit), parameters={})
3761        return Rate(
3762            limit=int(interpolated_limit.eval(config=config)),
3763            interval=parse_duration(model.interval),
3764        )
def create_http_request_matcher( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpRequestRegexMatcher, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.streams.call_rate.HttpRequestRegexMatcher:
3766    def create_http_request_matcher(
3767        self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
3768    ) -> HttpRequestRegexMatcher:
3769        return HttpRequestRegexMatcher(
3770            method=model.method,
3771            url_base=model.url_base,
3772            url_path_pattern=model.url_path_pattern,
3773            params=model.params,
3774            headers=model.headers,
3775        )
def set_api_budget( self, component_definition: Mapping[str, Any], config: Mapping[str, Any]) -> None:
3777    def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
3778        self._api_budget = self.create_component(
3779            model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
3780        )
def create_grouping_partition_router( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.GroupingPartitionRouter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.partition_routers.GroupingPartitionRouter:
3782    def create_grouping_partition_router(
3783        self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
3784    ) -> GroupingPartitionRouter:
3785        underlying_router = self._create_component_from_model(
3786            model=model.underlying_partition_router, config=config
3787        )
3788        if model.group_size < 1:
3789            raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
3790
3791        # Request options in underlying partition routers are not supported for GroupingPartitionRouter
3792        # because they are specific to individual partitions and cannot be aggregated or handled
3793        # when grouping, potentially leading to incorrect API calls. Any request customization
3794        # should be managed at the stream level through the requester's configuration.
3795        if isinstance(underlying_router, SubstreamPartitionRouter):
3796            if any(
3797                parent_config.request_option
3798                for parent_config in underlying_router.parent_stream_configs
3799            ):
3800                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
3801
3802        if isinstance(underlying_router, ListPartitionRouter):
3803            if underlying_router.request_option:
3804                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
3805
3806        return GroupingPartitionRouter(
3807            group_size=model.group_size,
3808            underlying_partition_router=underlying_router,
3809            deduplicate=model.deduplicate if model.deduplicate is not None else True,
3810            config=config,
3811        )