airbyte_cdk.sources.declarative.parsers.model_to_component_factory

   1#
   2# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
   3#
   4
   5from __future__ import annotations
   6
   7import datetime
   8import importlib
   9import inspect
  10import re
  11from functools import partial
  12from typing import (
  13    Any,
  14    Callable,
  15    Dict,
  16    List,
  17    Mapping,
  18    MutableMapping,
  19    Optional,
  20    Type,
  21    Union,
  22    cast,
  23    get_args,
  24    get_origin,
  25    get_type_hints,
  26)
  27
  28from isodate import parse_duration
  29from pydantic.v1 import BaseModel
  30from requests import Response
  31
  32from airbyte_cdk.connector_builder.models import (
  33    LogMessage as ConnectorBuilderLogMessage,
  34)
  35from airbyte_cdk.models import FailureType, Level
  36from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
  37from airbyte_cdk.sources.declarative import transformations
  38from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
  39from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker
  40from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
  41from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
  42from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator
  43from airbyte_cdk.sources.declarative.auth.declarative_authenticator import (
  44    DeclarativeAuthenticator,
  45    NoAuth,
  46)
  47from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm
  48from airbyte_cdk.sources.declarative.auth.oauth import (
  49    DeclarativeSingleUseRefreshTokenOauth2Authenticator,
  50)
  51from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator
  52from airbyte_cdk.sources.declarative.auth.token import (
  53    ApiKeyAuthenticator,
  54    BasicHttpAuthenticator,
  55    BearerAuthenticator,
  56    LegacySessionTokenAuthenticator,
  57)
  58from airbyte_cdk.sources.declarative.auth.token_provider import (
  59    InterpolatedStringTokenProvider,
  60    SessionTokenProvider,
  61    TokenProvider,
  62)
  63from airbyte_cdk.sources.declarative.checks import (
  64    CheckDynamicStream,
  65    CheckStream,
  66    DynamicStreamCheckConfig,
  67)
  68from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
  69from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
  70from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
  71from airbyte_cdk.sources.declarative.decoders import (
  72    Decoder,
  73    IterableDecoder,
  74    JsonDecoder,
  75    PaginationDecoderDecorator,
  76    XmlDecoder,
  77    ZipfileDecoder,
  78)
  79from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
  80    CompositeRawDecoder,
  81    CsvParser,
  82    GzipParser,
  83    JsonLineParser,
  84    JsonParser,
  85    Parser,
  86)
  87from airbyte_cdk.sources.declarative.extractors import (
  88    DpathExtractor,
  89    RecordFilter,
  90    RecordSelector,
  91    ResponseToFileExtractor,
  92)
  93from airbyte_cdk.sources.declarative.extractors.record_filter import (
  94    ClientSideIncrementalRecordFilterDecorator,
  95)
  96from airbyte_cdk.sources.declarative.incremental import (
  97    ChildPartitionResumableFullRefreshCursor,
  98    ConcurrentCursorFactory,
  99    ConcurrentPerPartitionCursor,
 100    CursorFactory,
 101    DatetimeBasedCursor,
 102    DeclarativeCursor,
 103    GlobalSubstreamCursor,
 104    PerPartitionCursor,
 105    PerPartitionWithGlobalCursor,
 106    ResumableFullRefreshCursor,
 107)
 108from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
 109from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
 110from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import (
 111    LegacyToPerPartitionStateMigration,
 112)
 113from airbyte_cdk.sources.declarative.models import (
 114    CustomStateMigration,
 115)
 116from airbyte_cdk.sources.declarative.models.base_model_with_deprecations import (
 117    DEPRECATION_LOGS_TAG,
 118    BaseModelWithDeprecations,
 119)
 120from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 121    AddedFieldDefinition as AddedFieldDefinitionModel,
 122)
 123from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 124    AddFields as AddFieldsModel,
 125)
 126from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 127    ApiKeyAuthenticator as ApiKeyAuthenticatorModel,
 128)
 129from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 130    AsyncJobStatusMap as AsyncJobStatusMapModel,
 131)
 132from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 133    AsyncRetriever as AsyncRetrieverModel,
 134)
 135from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 136    BasicHttpAuthenticator as BasicHttpAuthenticatorModel,
 137)
 138from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 139    BearerAuthenticator as BearerAuthenticatorModel,
 140)
 141from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 142    CheckDynamicStream as CheckDynamicStreamModel,
 143)
 144from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 145    CheckStream as CheckStreamModel,
 146)
 147from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 148    ComplexFieldType as ComplexFieldTypeModel,
 149)
 150from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 151    ComponentMappingDefinition as ComponentMappingDefinitionModel,
 152)
 153from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 154    CompositeErrorHandler as CompositeErrorHandlerModel,
 155)
 156from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 157    ConcurrencyLevel as ConcurrencyLevelModel,
 158)
 159from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 160    ConfigAddFields as ConfigAddFieldsModel,
 161)
 162from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 163    ConfigComponentsResolver as ConfigComponentsResolverModel,
 164)
 165from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 166    ConfigMigration as ConfigMigrationModel,
 167)
 168from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 169    ConfigRemapField as ConfigRemapFieldModel,
 170)
 171from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 172    ConfigRemoveFields as ConfigRemoveFieldsModel,
 173)
 174from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 175    ConstantBackoffStrategy as ConstantBackoffStrategyModel,
 176)
 177from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 178    CsvDecoder as CsvDecoderModel,
 179)
 180from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 181    CursorPagination as CursorPaginationModel,
 182)
 183from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 184    CustomAuthenticator as CustomAuthenticatorModel,
 185)
 186from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 187    CustomBackoffStrategy as CustomBackoffStrategyModel,
 188)
 189from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 190    CustomConfigTransformation as CustomConfigTransformationModel,
 191)
 192from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 193    CustomDecoder as CustomDecoderModel,
 194)
 195from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 196    CustomErrorHandler as CustomErrorHandlerModel,
 197)
 198from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 199    CustomIncrementalSync as CustomIncrementalSyncModel,
 200)
 201from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 202    CustomPaginationStrategy as CustomPaginationStrategyModel,
 203)
 204from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 205    CustomPartitionRouter as CustomPartitionRouterModel,
 206)
 207from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 208    CustomRecordExtractor as CustomRecordExtractorModel,
 209)
 210from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 211    CustomRecordFilter as CustomRecordFilterModel,
 212)
 213from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 214    CustomRequester as CustomRequesterModel,
 215)
 216from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 217    CustomRetriever as CustomRetrieverModel,
 218)
 219from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 220    CustomSchemaLoader as CustomSchemaLoader,
 221)
 222from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 223    CustomSchemaNormalization as CustomSchemaNormalizationModel,
 224)
 225from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 226    CustomTransformation as CustomTransformationModel,
 227)
 228from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 229    CustomValidationStrategy as CustomValidationStrategyModel,
 230)
 231from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 232    DatetimeBasedCursor as DatetimeBasedCursorModel,
 233)
 234from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 235    DeclarativeStream as DeclarativeStreamModel,
 236)
 237from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 238    DefaultErrorHandler as DefaultErrorHandlerModel,
 239)
 240from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 241    DefaultPaginator as DefaultPaginatorModel,
 242)
 243from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 244    DpathExtractor as DpathExtractorModel,
 245)
 246from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 247    DpathFlattenFields as DpathFlattenFieldsModel,
 248)
 249from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 250    DpathValidator as DpathValidatorModel,
 251)
 252from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 253    DynamicSchemaLoader as DynamicSchemaLoaderModel,
 254)
 255from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 256    DynamicStreamCheckConfig as DynamicStreamCheckConfigModel,
 257)
 258from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 259    ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
 260)
 261from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 262    FileUploader as FileUploaderModel,
 263)
 264from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 265    FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
 266)
 267from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 268    FlattenFields as FlattenFieldsModel,
 269)
 270from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 271    GroupByKeyMergeStrategy as GroupByKeyMergeStrategyModel,
 272)
 273from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 274    GroupingPartitionRouter as GroupingPartitionRouterModel,
 275)
 276from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 277    GzipDecoder as GzipDecoderModel,
 278)
 279from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 280    HTTPAPIBudget as HTTPAPIBudgetModel,
 281)
 282from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 283    HttpComponentsResolver as HttpComponentsResolverModel,
 284)
 285from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 286    HttpRequester as HttpRequesterModel,
 287)
 288from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 289    HttpRequestRegexMatcher as HttpRequestRegexMatcherModel,
 290)
 291from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 292    HttpResponseFilter as HttpResponseFilterModel,
 293)
 294from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 295    IncrementingCountCursor as IncrementingCountCursorModel,
 296)
 297from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 298    InlineSchemaLoader as InlineSchemaLoaderModel,
 299)
 300from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 301    IterableDecoder as IterableDecoderModel,
 302)
 303from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 304    JsonDecoder as JsonDecoderModel,
 305)
 306from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 307    JsonFileSchemaLoader as JsonFileSchemaLoaderModel,
 308)
 309from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 310    JsonlDecoder as JsonlDecoderModel,
 311)
 312from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 313    JwtAuthenticator as JwtAuthenticatorModel,
 314)
 315from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 316    JwtHeaders as JwtHeadersModel,
 317)
 318from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 319    JwtPayload as JwtPayloadModel,
 320)
 321from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 322    KeysReplace as KeysReplaceModel,
 323)
 324from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 325    KeysToLower as KeysToLowerModel,
 326)
 327from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 328    KeysToSnakeCase as KeysToSnakeCaseModel,
 329)
 330from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 331    LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
 332)
 333from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 334    LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel,
 335)
 336from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 337    ListPartitionRouter as ListPartitionRouterModel,
 338)
 339from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 340    MinMaxDatetime as MinMaxDatetimeModel,
 341)
 342from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 343    MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel,
 344)
 345from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 346    NoAuth as NoAuthModel,
 347)
 348from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 349    NoPagination as NoPaginationModel,
 350)
 351from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 352    OAuthAuthenticator as OAuthAuthenticatorModel,
 353)
 354from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 355    OffsetIncrement as OffsetIncrementModel,
 356)
 357from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 358    PageIncrement as PageIncrementModel,
 359)
 360from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 361    ParametrizedComponentsResolver as ParametrizedComponentsResolverModel,
 362)
 363from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 364    ParentStreamConfig as ParentStreamConfigModel,
 365)
 366from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 367    PredicateValidator as PredicateValidatorModel,
 368)
 369from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 370    PropertiesFromEndpoint as PropertiesFromEndpointModel,
 371)
 372from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 373    PropertyChunking as PropertyChunkingModel,
 374)
 375from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 376    PropertyLimitType as PropertyLimitTypeModel,
 377)
 378from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 379    QueryProperties as QueryPropertiesModel,
 380)
 381from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 382    Rate as RateModel,
 383)
 384from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 385    RecordFilter as RecordFilterModel,
 386)
 387from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 388    RecordSelector as RecordSelectorModel,
 389)
 390from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 391    RemoveFields as RemoveFieldsModel,
 392)
 393from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 394    RequestOption as RequestOptionModel,
 395)
 396from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 397    RequestPath as RequestPathModel,
 398)
 399from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 400    ResponseToFileExtractor as ResponseToFileExtractorModel,
 401)
 402from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 403    SchemaNormalization as SchemaNormalizationModel,
 404)
 405from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 406    SchemaTypeIdentifier as SchemaTypeIdentifierModel,
 407)
 408from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 409    SelectiveAuthenticator as SelectiveAuthenticatorModel,
 410)
 411from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 412    SessionTokenAuthenticator as SessionTokenAuthenticatorModel,
 413)
 414from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 415    SimpleRetriever as SimpleRetrieverModel,
 416)
 417from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
 418from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 419    StateDelegatingStream as StateDelegatingStreamModel,
 420)
 421from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 422    StreamConfig as StreamConfigModel,
 423)
 424from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 425    SubstreamPartitionRouter as SubstreamPartitionRouterModel,
 426)
 427from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 428    TypesMap as TypesMapModel,
 429)
 430from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 431    UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel,
 432)
 433from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 434    ValidateAdheresToSchema as ValidateAdheresToSchemaModel,
 435)
 436from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
 437from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 438    WaitTimeFromHeader as WaitTimeFromHeaderModel,
 439)
 440from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 441    WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel,
 442)
 443from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 444    XmlDecoder as XmlDecoderModel,
 445)
 446from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
 447    ZipfileDecoder as ZipfileDecoderModel,
 448)
 449from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
 450    COMPONENTS_MODULE_NAME,
 451    SDM_COMPONENTS_MODULE_NAME,
 452)
 453from airbyte_cdk.sources.declarative.partition_routers import (
 454    CartesianProductStreamSlicer,
 455    GroupingPartitionRouter,
 456    ListPartitionRouter,
 457    PartitionRouter,
 458    SinglePartitionRouter,
 459    SubstreamPartitionRouter,
 460)
 461from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
 462    AsyncJobPartitionRouter,
 463)
 464from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
 465    ParentStreamConfig,
 466)
 467from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption
 468from airbyte_cdk.sources.declarative.requesters.error_handlers import (
 469    CompositeErrorHandler,
 470    DefaultErrorHandler,
 471    HttpResponseFilter,
 472)
 473from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import (
 474    ConstantBackoffStrategy,
 475    ExponentialBackoffStrategy,
 476    WaitTimeFromHeaderBackoffStrategy,
 477    WaitUntilTimeFromHeaderBackoffStrategy,
 478)
 479from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository
 480from airbyte_cdk.sources.declarative.requesters.paginators import (
 481    DefaultPaginator,
 482    NoPagination,
 483    PaginatorTestReadDecorator,
 484)
 485from airbyte_cdk.sources.declarative.requesters.paginators.strategies import (
 486    CursorPaginationStrategy,
 487    CursorStopCondition,
 488    OffsetIncrement,
 489    PageIncrement,
 490    StopConditionPaginationStrategyDecorator,
 491)
 492from airbyte_cdk.sources.declarative.requesters.query_properties import (
 493    PropertiesFromEndpoint,
 494    PropertyChunking,
 495    QueryProperties,
 496)
 497from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import (
 498    PropertyLimitType,
 499)
 500from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import (
 501    GroupByKey,
 502)
 503from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType
 504from airbyte_cdk.sources.declarative.requesters.request_options import (
 505    DatetimeBasedRequestOptionsProvider,
 506    DefaultRequestOptionsProvider,
 507    InterpolatedRequestOptionsProvider,
 508    RequestOptionsProvider,
 509)
 510from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
 511from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester
 512from airbyte_cdk.sources.declarative.resolvers import (
 513    ComponentMappingDefinition,
 514    ConfigComponentsResolver,
 515    HttpComponentsResolver,
 516    ParametrizedComponentsResolver,
 517    StreamConfig,
 518    StreamParametersDefinition,
 519)
 520from airbyte_cdk.sources.declarative.retrievers import (
 521    AsyncRetriever,
 522    LazySimpleRetriever,
 523    SimpleRetriever,
 524)
 525from airbyte_cdk.sources.declarative.retrievers.file_uploader import (
 526    ConnectorBuilderFileUploader,
 527    DefaultFileUploader,
 528    FileUploader,
 529    LocalFileSystemFileWriter,
 530    NoopFileWriter,
 531)
 532from airbyte_cdk.sources.declarative.schema import (
 533    ComplexFieldType,
 534    DefaultSchemaLoader,
 535    DynamicSchemaLoader,
 536    InlineSchemaLoader,
 537    JsonFileSchemaLoader,
 538    SchemaTypeIdentifier,
 539    TypesMap,
 540)
 541from airbyte_cdk.sources.declarative.schema.composite_schema_loader import CompositeSchemaLoader
 542from airbyte_cdk.sources.declarative.spec import ConfigMigration, Spec
 543from airbyte_cdk.sources.declarative.stream_slicers import (
 544    StreamSlicer,
 545    StreamSlicerTestReadDecorator,
 546)
 547from airbyte_cdk.sources.declarative.transformations import (
 548    AddFields,
 549    RecordTransformation,
 550    RemoveFields,
 551)
 552from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
 553from airbyte_cdk.sources.declarative.transformations.config_transformations import (
 554    ConfigAddFields,
 555    ConfigRemapField,
 556    ConfigRemoveFields,
 557)
 558from airbyte_cdk.sources.declarative.transformations.config_transformations.config_transformation import (
 559    ConfigTransformation,
 560)
 561from airbyte_cdk.sources.declarative.transformations.dpath_flatten_fields import (
 562    DpathFlattenFields,
 563    KeyTransformation,
 564)
 565from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
 566    FlattenFields,
 567)
 568from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import (
 569    KeysReplaceTransformation,
 570)
 571from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
 572    KeysToLowerTransformation,
 573)
 574from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
 575    KeysToSnakeCaseTransformation,
 576)
 577from airbyte_cdk.sources.declarative.validators import (
 578    DpathValidator,
 579    PredicateValidator,
 580    ValidateAdheresToSchema,
 581)
 582from airbyte_cdk.sources.http_logger import format_http_message
 583from airbyte_cdk.sources.message import (
 584    InMemoryMessageRepository,
 585    LogAppenderMessageRepositoryDecorator,
 586    MessageRepository,
 587    NoopMessageRepository,
 588)
 589from airbyte_cdk.sources.streams.call_rate import (
 590    APIBudget,
 591    FixedWindowCallRatePolicy,
 592    HttpAPIBudget,
 593    HttpRequestRegexMatcher,
 594    MovingWindowCallRatePolicy,
 595    Rate,
 596    UnlimitedCallRatePolicy,
 597)
 598from airbyte_cdk.sources.streams.concurrent.clamping import (
 599    ClampingEndProvider,
 600    ClampingStrategy,
 601    DayClampingStrategy,
 602    MonthClampingStrategy,
 603    NoClamping,
 604    WeekClampingStrategy,
 605    Weekday,
 606)
 607from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
 608from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
 609    CustomFormatConcurrentStreamStateConverter,
 610    DateTimeStreamStateConverter,
 611)
 612from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import (
 613    IncrementingCountStreamStateConverter,
 614)
 615from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
 616from airbyte_cdk.sources.types import Config
 617from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
 618
 619ComponentDefinition = Mapping[str, Any]
 620
 621SCHEMA_TRANSFORMER_TYPE_MAPPING = {
 622    SchemaNormalizationModel.None_: TransformConfig.NoTransform,
 623    SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization,
 624}
 625
 626
 627class ModelToComponentFactory:
 628    EPOCH_DATETIME_FORMAT = "%s"
 629
 630    def __init__(
 631        self,
 632        limit_pages_fetched_per_slice: Optional[int] = None,
 633        limit_slices_fetched: Optional[int] = None,
 634        emit_connector_builder_messages: bool = False,
 635        disable_retries: bool = False,
 636        disable_cache: bool = False,
 637        disable_resumable_full_refresh: bool = False,
 638        message_repository: Optional[MessageRepository] = None,
 639        connector_state_manager: Optional[ConnectorStateManager] = None,
 640        max_concurrent_async_job_count: Optional[int] = None,
 641    ):
 642        self._init_mappings()
 643        self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
 644        self._limit_slices_fetched = limit_slices_fetched
 645        self._emit_connector_builder_messages = emit_connector_builder_messages
 646        self._disable_retries = disable_retries
 647        self._disable_cache = disable_cache
 648        self._disable_resumable_full_refresh = disable_resumable_full_refresh
 649        self._message_repository = message_repository or InMemoryMessageRepository(
 650            self._evaluate_log_level(emit_connector_builder_messages)
 651        )
 652        self._connector_state_manager = connector_state_manager or ConnectorStateManager()
 653        self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
 654        self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1)
 655        # placeholder for deprecation warnings
 656        self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = []
 657
 658    def _init_mappings(self) -> None:
 659        self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
 660            AddedFieldDefinitionModel: self.create_added_field_definition,
 661            AddFieldsModel: self.create_add_fields,
 662            ApiKeyAuthenticatorModel: self.create_api_key_authenticator,
 663            BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
 664            BearerAuthenticatorModel: self.create_bearer_authenticator,
 665            CheckStreamModel: self.create_check_stream,
 666            DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config,
 667            CheckDynamicStreamModel: self.create_check_dynamic_stream,
 668            CompositeErrorHandlerModel: self.create_composite_error_handler,
 669            ConcurrencyLevelModel: self.create_concurrency_level,
 670            ConfigMigrationModel: self.create_config_migration,
 671            ConfigAddFieldsModel: self.create_config_add_fields,
 672            ConfigRemapFieldModel: self.create_config_remap_field,
 673            ConfigRemoveFieldsModel: self.create_config_remove_fields,
 674            ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
 675            CsvDecoderModel: self.create_csv_decoder,
 676            CursorPaginationModel: self.create_cursor_pagination,
 677            CustomAuthenticatorModel: self.create_custom_component,
 678            CustomBackoffStrategyModel: self.create_custom_component,
 679            CustomDecoderModel: self.create_custom_component,
 680            CustomErrorHandlerModel: self.create_custom_component,
 681            CustomIncrementalSyncModel: self.create_custom_component,
 682            CustomRecordExtractorModel: self.create_custom_component,
 683            CustomRecordFilterModel: self.create_custom_component,
 684            CustomRequesterModel: self.create_custom_component,
 685            CustomRetrieverModel: self.create_custom_component,
 686            CustomSchemaLoader: self.create_custom_component,
 687            CustomSchemaNormalizationModel: self.create_custom_component,
 688            CustomStateMigration: self.create_custom_component,
 689            CustomPaginationStrategyModel: self.create_custom_component,
 690            CustomPartitionRouterModel: self.create_custom_component,
 691            CustomTransformationModel: self.create_custom_component,
 692            CustomValidationStrategyModel: self.create_custom_component,
 693            CustomConfigTransformationModel: self.create_custom_component,
 694            DatetimeBasedCursorModel: self.create_datetime_based_cursor,
 695            DeclarativeStreamModel: self.create_declarative_stream,
 696            DefaultErrorHandlerModel: self.create_default_error_handler,
 697            DefaultPaginatorModel: self.create_default_paginator,
 698            DpathExtractorModel: self.create_dpath_extractor,
 699            DpathValidatorModel: self.create_dpath_validator,
 700            ResponseToFileExtractorModel: self.create_response_to_file_extractor,
 701            ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
 702            SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
 703            GroupByKeyMergeStrategyModel: self.create_group_by_key,
 704            HttpRequesterModel: self.create_http_requester,
 705            HttpResponseFilterModel: self.create_http_response_filter,
 706            InlineSchemaLoaderModel: self.create_inline_schema_loader,
 707            JsonDecoderModel: self.create_json_decoder,
 708            JsonlDecoderModel: self.create_jsonl_decoder,
 709            GzipDecoderModel: self.create_gzip_decoder,
 710            KeysToLowerModel: self.create_keys_to_lower_transformation,
 711            KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
 712            KeysReplaceModel: self.create_keys_replace_transformation,
 713            FlattenFieldsModel: self.create_flatten_fields,
 714            DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
 715            IterableDecoderModel: self.create_iterable_decoder,
 716            IncrementingCountCursorModel: self.create_incrementing_count_cursor,
 717            XmlDecoderModel: self.create_xml_decoder,
 718            JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
 719            DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
 720            SchemaTypeIdentifierModel: self.create_schema_type_identifier,
 721            TypesMapModel: self.create_types_map,
 722            ComplexFieldTypeModel: self.create_complex_field_type,
 723            JwtAuthenticatorModel: self.create_jwt_authenticator,
 724            LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
 725            ListPartitionRouterModel: self.create_list_partition_router,
 726            MinMaxDatetimeModel: self.create_min_max_datetime,
 727            NoAuthModel: self.create_no_auth,
 728            NoPaginationModel: self.create_no_pagination,
 729            OAuthAuthenticatorModel: self.create_oauth_authenticator,
 730            OffsetIncrementModel: self.create_offset_increment,
 731            PageIncrementModel: self.create_page_increment,
 732            ParentStreamConfigModel: self.create_parent_stream_config,
 733            PredicateValidatorModel: self.create_predicate_validator,
 734            PropertiesFromEndpointModel: self.create_properties_from_endpoint,
 735            PropertyChunkingModel: self.create_property_chunking,
 736            QueryPropertiesModel: self.create_query_properties,
 737            RecordFilterModel: self.create_record_filter,
 738            RecordSelectorModel: self.create_record_selector,
 739            RemoveFieldsModel: self.create_remove_fields,
 740            RequestPathModel: self.create_request_path,
 741            RequestOptionModel: self.create_request_option,
 742            LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator,
 743            SelectiveAuthenticatorModel: self.create_selective_authenticator,
 744            SimpleRetrieverModel: self.create_simple_retriever,
 745            StateDelegatingStreamModel: self.create_state_delegating_stream,
 746            SpecModel: self.create_spec,
 747            SubstreamPartitionRouterModel: self.create_substream_partition_router,
 748            ValidateAdheresToSchemaModel: self.create_validate_adheres_to_schema,
 749            WaitTimeFromHeaderModel: self.create_wait_time_from_header,
 750            WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
 751            AsyncRetrieverModel: self.create_async_retriever,
 752            HttpComponentsResolverModel: self.create_http_components_resolver,
 753            ConfigComponentsResolverModel: self.create_config_components_resolver,
 754            ParametrizedComponentsResolverModel: self.create_parametrized_components_resolver,
 755            StreamConfigModel: self.create_stream_config,
 756            ComponentMappingDefinitionModel: self.create_components_mapping_definition,
 757            ZipfileDecoderModel: self.create_zipfile_decoder,
 758            HTTPAPIBudgetModel: self.create_http_api_budget,
 759            FileUploaderModel: self.create_file_uploader,
 760            FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
 761            MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
 762            UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
 763            RateModel: self.create_rate,
 764            HttpRequestRegexMatcherModel: self.create_http_request_matcher,
 765            GroupingPartitionRouterModel: self.create_grouping_partition_router,
 766        }
 767
 768        # Needed for the case where we need to perform a second parse on the fields of a custom component
 769        self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR}
 770
 771    def create_component(
 772        self,
 773        model_type: Type[BaseModel],
 774        component_definition: ComponentDefinition,
 775        config: Config,
 776        **kwargs: Any,
 777    ) -> Any:
 778        """
 779        Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and
 780        subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating
 781        creating declarative components from that model.
 782
 783        :param model_type: The type of declarative component that is being initialized
 784        :param component_definition: The mapping that represents a declarative component
 785        :param config: The connector config that is provided by the customer
 786        :return: The declarative component to be used at runtime
 787        """
 788
 789        component_type = component_definition.get("type")
 790        if component_definition.get("type") != model_type.__name__:
 791            raise ValueError(
 792                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
 793            )
 794
 795        declarative_component_model = model_type.parse_obj(component_definition)
 796
 797        if not isinstance(declarative_component_model, model_type):
 798            raise ValueError(
 799                f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}"
 800            )
 801
 802        return self._create_component_from_model(
 803            model=declarative_component_model, config=config, **kwargs
 804        )
 805
 806    def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any:
 807        if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR:
 808            raise ValueError(
 809                f"{model.__class__} with attributes {model} is not a valid component type"
 810            )
 811        component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__)
 812        if not component_constructor:
 813            raise ValueError(f"Could not find constructor for {model.__class__}")
 814
 815        # collect deprecation warnings for supported models.
 816        if isinstance(model, BaseModelWithDeprecations):
 817            self._collect_model_deprecations(model)
 818
 819        return component_constructor(model=model, config=config, **kwargs)
 820
 821    def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]:
 822        """
 823        Returns the deprecation warnings that were collected during the creation of components.
 824        """
 825        return self._collected_deprecation_logs
 826
 827    def _collect_model_deprecations(self, model: BaseModelWithDeprecations) -> None:
 828        """
 829        Collects deprecation logs from the given model and appends any new logs to the internal collection.
 830
 831        This method checks if the provided model has deprecation logs (identified by the presence of the DEPRECATION_LOGS_TAG attribute and a non-None `_deprecation_logs` property). It iterates through each deprecation log in the model and appends it to the `_collected_deprecation_logs` list if it has not already been collected, ensuring that duplicate logs are avoided.
 832
 833        Args:
 834            model (BaseModelWithDeprecations): The model instance from which to collect deprecation logs.
 835        """
 836        if hasattr(model, DEPRECATION_LOGS_TAG) and model._deprecation_logs is not None:
 837            for log in model._deprecation_logs:
 838                # avoid duplicates for deprecation logs observed.
 839                if log not in self._collected_deprecation_logs:
 840                    self._collected_deprecation_logs.append(log)
 841
 842    def create_config_migration(
 843        self, model: ConfigMigrationModel, config: Config
 844    ) -> ConfigMigration:
 845        transformations: List[ConfigTransformation] = [
 846            self._create_component_from_model(transformation, config)
 847            for transformation in model.transformations
 848        ]
 849
 850        return ConfigMigration(
 851            description=model.description,
 852            transformations=transformations,
 853        )
 854
 855    def create_config_add_fields(
 856        self, model: ConfigAddFieldsModel, config: Config, **kwargs: Any
 857    ) -> ConfigAddFields:
 858        fields = [self._create_component_from_model(field, config) for field in model.fields]
 859        return ConfigAddFields(
 860            fields=fields,
 861            condition=model.condition or "",
 862        )
 863
 864    @staticmethod
 865    def create_config_remove_fields(
 866        model: ConfigRemoveFieldsModel, config: Config, **kwargs: Any
 867    ) -> ConfigRemoveFields:
 868        return ConfigRemoveFields(
 869            field_pointers=model.field_pointers,
 870            condition=model.condition or "",
 871        )
 872
 873    @staticmethod
 874    def create_config_remap_field(
 875        model: ConfigRemapFieldModel, config: Config, **kwargs: Any
 876    ) -> ConfigRemapField:
 877        mapping = cast(Mapping[str, Any], model.map)
 878        return ConfigRemapField(
 879            map=mapping,
 880            field_path=model.field_path,
 881            config=config,
 882        )
 883
 884    def create_dpath_validator(self, model: DpathValidatorModel, config: Config) -> DpathValidator:
 885        strategy = self._create_component_from_model(model.validation_strategy, config)
 886
 887        return DpathValidator(
 888            field_path=model.field_path,
 889            strategy=strategy,
 890        )
 891
 892    def create_predicate_validator(
 893        self, model: PredicateValidatorModel, config: Config
 894    ) -> PredicateValidator:
 895        strategy = self._create_component_from_model(model.validation_strategy, config)
 896
 897        return PredicateValidator(
 898            value=model.value,
 899            strategy=strategy,
 900        )
 901
 902    @staticmethod
 903    def create_validate_adheres_to_schema(
 904        model: ValidateAdheresToSchemaModel, config: Config, **kwargs: Any
 905    ) -> ValidateAdheresToSchema:
 906        base_schema = cast(Mapping[str, Any], model.base_schema)
 907        return ValidateAdheresToSchema(
 908            schema=base_schema,
 909        )
 910
 911    @staticmethod
 912    def create_added_field_definition(
 913        model: AddedFieldDefinitionModel, config: Config, **kwargs: Any
 914    ) -> AddedFieldDefinition:
 915        interpolated_value = InterpolatedString.create(
 916            model.value, parameters=model.parameters or {}
 917        )
 918        return AddedFieldDefinition(
 919            path=model.path,
 920            value=interpolated_value,
 921            value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
 922            parameters=model.parameters or {},
 923        )
 924
 925    def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields:
 926        added_field_definitions = [
 927            self._create_component_from_model(
 928                model=added_field_definition_model,
 929                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
 930                    added_field_definition_model.value_type
 931                ),
 932                config=config,
 933            )
 934            for added_field_definition_model in model.fields
 935        ]
 936        return AddFields(
 937            fields=added_field_definitions,
 938            condition=model.condition or "",
 939            parameters=model.parameters or {},
 940        )
 941
 942    def create_keys_to_lower_transformation(
 943        self, model: KeysToLowerModel, config: Config, **kwargs: Any
 944    ) -> KeysToLowerTransformation:
 945        return KeysToLowerTransformation()
 946
 947    def create_keys_to_snake_transformation(
 948        self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
 949    ) -> KeysToSnakeCaseTransformation:
 950        return KeysToSnakeCaseTransformation()
 951
 952    def create_keys_replace_transformation(
 953        self, model: KeysReplaceModel, config: Config, **kwargs: Any
 954    ) -> KeysReplaceTransformation:
 955        return KeysReplaceTransformation(
 956            old=model.old, new=model.new, parameters=model.parameters or {}
 957        )
 958
 959    def create_flatten_fields(
 960        self, model: FlattenFieldsModel, config: Config, **kwargs: Any
 961    ) -> FlattenFields:
 962        return FlattenFields(
 963            flatten_lists=model.flatten_lists if model.flatten_lists is not None else True
 964        )
 965
 966    def create_dpath_flatten_fields(
 967        self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any
 968    ) -> DpathFlattenFields:
 969        model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
 970        key_transformation = (
 971            KeyTransformation(
 972                config=config,
 973                prefix=model.key_transformation.prefix,
 974                suffix=model.key_transformation.suffix,
 975                parameters=model.parameters or {},
 976            )
 977            if model.key_transformation is not None
 978            else None
 979        )
 980        return DpathFlattenFields(
 981            config=config,
 982            field_path=model_field_path,
 983            delete_origin_value=model.delete_origin_value
 984            if model.delete_origin_value is not None
 985            else False,
 986            replace_record=model.replace_record if model.replace_record is not None else False,
 987            key_transformation=key_transformation,
 988            parameters=model.parameters or {},
 989        )
 990
 991    @staticmethod
 992    def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
 993        if not value_type:
 994            return None
 995        names_to_types = {
 996            ValueType.string: str,
 997            ValueType.number: float,
 998            ValueType.integer: int,
 999            ValueType.boolean: bool,
1000        }
1001        return names_to_types[value_type]
1002
1003    def create_api_key_authenticator(
1004        self,
1005        model: ApiKeyAuthenticatorModel,
1006        config: Config,
1007        token_provider: Optional[TokenProvider] = None,
1008        **kwargs: Any,
1009    ) -> ApiKeyAuthenticator:
1010        if model.inject_into is None and model.header is None:
1011            raise ValueError(
1012                "Expected either inject_into or header to be set for ApiKeyAuthenticator"
1013            )
1014
1015        if model.inject_into is not None and model.header is not None:
1016            raise ValueError(
1017                "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option"
1018            )
1019
1020        if token_provider is not None and model.api_token != "":
1021            raise ValueError(
1022                "If token_provider is set, api_token is ignored and has to be set to empty string."
1023            )
1024
1025        request_option = (
1026            self._create_component_from_model(
1027                model.inject_into, config, parameters=model.parameters or {}
1028            )
1029            if model.inject_into
1030            else RequestOption(
1031                inject_into=RequestOptionType.header,
1032                field_name=model.header or "",
1033                parameters=model.parameters or {},
1034            )
1035        )
1036
1037        return ApiKeyAuthenticator(
1038            token_provider=(
1039                token_provider
1040                if token_provider is not None
1041                else InterpolatedStringTokenProvider(
1042                    api_token=model.api_token or "",
1043                    config=config,
1044                    parameters=model.parameters or {},
1045                )
1046            ),
1047            request_option=request_option,
1048            config=config,
1049            parameters=model.parameters or {},
1050        )
1051
1052    def create_legacy_to_per_partition_state_migration(
1053        self,
1054        model: LegacyToPerPartitionStateMigrationModel,
1055        config: Mapping[str, Any],
1056        declarative_stream: DeclarativeStreamModel,
1057    ) -> LegacyToPerPartitionStateMigration:
1058        retriever = declarative_stream.retriever
1059        if not isinstance(retriever, (SimpleRetrieverModel, AsyncRetrieverModel)):
1060            raise ValueError(
1061                f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever or AsyncRetriever. Got {type(retriever)}"
1062            )
1063        partition_router = retriever.partition_router
1064        if not isinstance(
1065            partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel)
1066        ):
1067            raise ValueError(
1068                f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}"
1069            )
1070        if not hasattr(partition_router, "parent_stream_configs"):
1071            raise ValueError(
1072                "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration."
1073            )
1074
1075        if not hasattr(declarative_stream, "incremental_sync"):
1076            raise ValueError(
1077                "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration."
1078            )
1079
1080        return LegacyToPerPartitionStateMigration(
1081            partition_router,  # type: ignore # was already checked above
1082            declarative_stream.incremental_sync,  # type: ignore # was already checked. Migration can be applied only to incremental streams.
1083            config,
1084            declarative_stream.parameters,  # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
1085        )
1086
1087    def create_session_token_authenticator(
1088        self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any
1089    ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]:
1090        decoder = (
1091            self._create_component_from_model(model=model.decoder, config=config)
1092            if model.decoder
1093            else JsonDecoder(parameters={})
1094        )
1095        login_requester = self._create_component_from_model(
1096            model=model.login_requester,
1097            config=config,
1098            name=f"{name}_login_requester",
1099            decoder=decoder,
1100        )
1101        token_provider = SessionTokenProvider(
1102            login_requester=login_requester,
1103            session_token_path=model.session_token_path,
1104            expiration_duration=parse_duration(model.expiration_duration)
1105            if model.expiration_duration
1106            else None,
1107            parameters=model.parameters or {},
1108            message_repository=self._message_repository,
1109            decoder=decoder,
1110        )
1111        if model.request_authentication.type == "Bearer":
1112            return ModelToComponentFactory.create_bearer_authenticator(
1113                BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""),  # type: ignore # $parameters has a default value
1114                config,
1115                token_provider=token_provider,
1116            )
1117        else:
1118            return self.create_api_key_authenticator(
1119                ApiKeyAuthenticatorModel(
1120                    type="ApiKeyAuthenticator",
1121                    api_token="",
1122                    inject_into=model.request_authentication.inject_into,
1123                ),  # type: ignore # $parameters and headers default to None
1124                config=config,
1125                token_provider=token_provider,
1126            )
1127
1128    @staticmethod
1129    def create_basic_http_authenticator(
1130        model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any
1131    ) -> BasicHttpAuthenticator:
1132        return BasicHttpAuthenticator(
1133            password=model.password or "",
1134            username=model.username,
1135            config=config,
1136            parameters=model.parameters or {},
1137        )
1138
1139    @staticmethod
1140    def create_bearer_authenticator(
1141        model: BearerAuthenticatorModel,
1142        config: Config,
1143        token_provider: Optional[TokenProvider] = None,
1144        **kwargs: Any,
1145    ) -> BearerAuthenticator:
1146        if token_provider is not None and model.api_token != "":
1147            raise ValueError(
1148                "If token_provider is set, api_token is ignored and has to be set to empty string."
1149            )
1150        return BearerAuthenticator(
1151            token_provider=(
1152                token_provider
1153                if token_provider is not None
1154                else InterpolatedStringTokenProvider(
1155                    api_token=model.api_token or "",
1156                    config=config,
1157                    parameters=model.parameters or {},
1158                )
1159            ),
1160            config=config,
1161            parameters=model.parameters or {},
1162        )
1163
1164    @staticmethod
1165    def create_dynamic_stream_check_config(
1166        model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any
1167    ) -> DynamicStreamCheckConfig:
1168        return DynamicStreamCheckConfig(
1169            dynamic_stream_name=model.dynamic_stream_name,
1170            stream_count=model.stream_count or 0,
1171        )
1172
1173    def create_check_stream(
1174        self, model: CheckStreamModel, config: Config, **kwargs: Any
1175    ) -> CheckStream:
1176        if model.dynamic_streams_check_configs is None and model.stream_names is None:
1177            raise ValueError(
1178                "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream"
1179            )
1180
1181        dynamic_streams_check_configs = (
1182            [
1183                self._create_component_from_model(model=dynamic_stream_check_config, config=config)
1184                for dynamic_stream_check_config in model.dynamic_streams_check_configs
1185            ]
1186            if model.dynamic_streams_check_configs
1187            else []
1188        )
1189
1190        return CheckStream(
1191            stream_names=model.stream_names or [],
1192            dynamic_streams_check_configs=dynamic_streams_check_configs,
1193            parameters={},
1194        )
1195
1196    @staticmethod
1197    def create_check_dynamic_stream(
1198        model: CheckDynamicStreamModel, config: Config, **kwargs: Any
1199    ) -> CheckDynamicStream:
1200        assert model.use_check_availability is not None  # for mypy
1201
1202        use_check_availability = model.use_check_availability
1203
1204        return CheckDynamicStream(
1205            stream_count=model.stream_count,
1206            use_check_availability=use_check_availability,
1207            parameters={},
1208        )
1209
1210    def create_composite_error_handler(
1211        self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
1212    ) -> CompositeErrorHandler:
1213        error_handlers = [
1214            self._create_component_from_model(model=error_handler_model, config=config)
1215            for error_handler_model in model.error_handlers
1216        ]
1217        return CompositeErrorHandler(
1218            error_handlers=error_handlers, parameters=model.parameters or {}
1219        )
1220
1221    @staticmethod
1222    def create_concurrency_level(
1223        model: ConcurrencyLevelModel, config: Config, **kwargs: Any
1224    ) -> ConcurrencyLevel:
1225        return ConcurrencyLevel(
1226            default_concurrency=model.default_concurrency,
1227            max_concurrency=model.max_concurrency,
1228            config=config,
1229            parameters={},
1230        )
1231
1232    @staticmethod
1233    def apply_stream_state_migrations(
1234        stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any]
1235    ) -> MutableMapping[str, Any]:
1236        if stream_state_migrations:
1237            for state_migration in stream_state_migrations:
1238                if state_migration.should_migrate(stream_state):
1239                    # The state variable is expected to be mutable but the migrate method returns an immutable mapping.
1240                    stream_state = dict(state_migration.migrate(stream_state))
1241        return stream_state
1242
1243    def create_concurrent_cursor_from_datetime_based_cursor(
1244        self,
1245        model_type: Type[BaseModel],
1246        component_definition: ComponentDefinition,
1247        stream_name: str,
1248        stream_namespace: Optional[str],
1249        config: Config,
1250        message_repository: Optional[MessageRepository] = None,
1251        runtime_lookback_window: Optional[datetime.timedelta] = None,
1252        stream_state_migrations: Optional[List[Any]] = None,
1253        **kwargs: Any,
1254    ) -> ConcurrentCursor:
1255        # Per-partition incremental streams can dynamically create child cursors which will pass their current
1256        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1257        # incoming state and connector_state_manager that is initialized when the component factory is created
1258        stream_state = (
1259            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1260            if "stream_state" not in kwargs
1261            else kwargs["stream_state"]
1262        )
1263        stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1264
1265        component_type = component_definition.get("type")
1266        if component_definition.get("type") != model_type.__name__:
1267            raise ValueError(
1268                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1269            )
1270
1271        datetime_based_cursor_model = model_type.parse_obj(component_definition)
1272
1273        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1274            raise ValueError(
1275                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1276            )
1277
1278        interpolated_cursor_field = InterpolatedString.create(
1279            datetime_based_cursor_model.cursor_field,
1280            parameters=datetime_based_cursor_model.parameters or {},
1281        )
1282        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1283
1284        interpolated_partition_field_start = InterpolatedString.create(
1285            datetime_based_cursor_model.partition_field_start or "start_time",
1286            parameters=datetime_based_cursor_model.parameters or {},
1287        )
1288        interpolated_partition_field_end = InterpolatedString.create(
1289            datetime_based_cursor_model.partition_field_end or "end_time",
1290            parameters=datetime_based_cursor_model.parameters or {},
1291        )
1292
1293        slice_boundary_fields = (
1294            interpolated_partition_field_start.eval(config=config),
1295            interpolated_partition_field_end.eval(config=config),
1296        )
1297
1298        datetime_format = datetime_based_cursor_model.datetime_format
1299
1300        cursor_granularity = (
1301            parse_duration(datetime_based_cursor_model.cursor_granularity)
1302            if datetime_based_cursor_model.cursor_granularity
1303            else None
1304        )
1305
1306        lookback_window = None
1307        interpolated_lookback_window = (
1308            InterpolatedString.create(
1309                datetime_based_cursor_model.lookback_window,
1310                parameters=datetime_based_cursor_model.parameters or {},
1311            )
1312            if datetime_based_cursor_model.lookback_window
1313            else None
1314        )
1315        if interpolated_lookback_window:
1316            evaluated_lookback_window = interpolated_lookback_window.eval(config=config)
1317            if evaluated_lookback_window:
1318                lookback_window = parse_duration(evaluated_lookback_window)
1319
1320        connector_state_converter: DateTimeStreamStateConverter
1321        connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1322            datetime_format=datetime_format,
1323            input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1324            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1325            cursor_granularity=cursor_granularity,
1326        )
1327
1328        # Adjusts the stream state by applying the runtime lookback window.
1329        # This is used to ensure correct state handling in case of failed partitions.
1330        stream_state_value = stream_state.get(cursor_field.cursor_field_key)
1331        if runtime_lookback_window and stream_state_value:
1332            new_stream_state = (
1333                connector_state_converter.parse_timestamp(stream_state_value)
1334                - runtime_lookback_window
1335            )
1336            stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
1337                new_stream_state
1338            )
1339
1340        start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
1341        if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
1342            start_date_runtime_value = self.create_min_max_datetime(
1343                model=datetime_based_cursor_model.start_datetime, config=config
1344            )
1345        else:
1346            start_date_runtime_value = datetime_based_cursor_model.start_datetime
1347
1348        end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]]
1349        if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel):
1350            end_date_runtime_value = self.create_min_max_datetime(
1351                model=datetime_based_cursor_model.end_datetime, config=config
1352            )
1353        else:
1354            end_date_runtime_value = datetime_based_cursor_model.end_datetime
1355
1356        interpolated_start_date = MinMaxDatetime.create(
1357            interpolated_string_or_min_max_datetime=start_date_runtime_value,
1358            parameters=datetime_based_cursor_model.parameters,
1359        )
1360        interpolated_end_date = (
1361            None
1362            if not end_date_runtime_value
1363            else MinMaxDatetime.create(
1364                end_date_runtime_value, datetime_based_cursor_model.parameters
1365            )
1366        )
1367
1368        # If datetime format is not specified then start/end datetime should inherit it from the stream slicer
1369        if not interpolated_start_date.datetime_format:
1370            interpolated_start_date.datetime_format = datetime_format
1371        if interpolated_end_date and not interpolated_end_date.datetime_format:
1372            interpolated_end_date.datetime_format = datetime_format
1373
1374        start_date = interpolated_start_date.get_datetime(config=config)
1375        end_date_provider = (
1376            partial(interpolated_end_date.get_datetime, config)
1377            if interpolated_end_date
1378            else connector_state_converter.get_end_provider()
1379        )
1380
1381        if (
1382            datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity
1383        ) or (
1384            not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity
1385        ):
1386            raise ValueError(
1387                f"If step is defined, cursor_granularity should be as well and vice-versa. "
1388                f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`"
1389            )
1390
1391        # When step is not defined, default to a step size from the starting date to the present moment
1392        step_length = datetime.timedelta.max
1393        interpolated_step = (
1394            InterpolatedString.create(
1395                datetime_based_cursor_model.step,
1396                parameters=datetime_based_cursor_model.parameters or {},
1397            )
1398            if datetime_based_cursor_model.step
1399            else None
1400        )
1401        if interpolated_step:
1402            evaluated_step = interpolated_step.eval(config)
1403            if evaluated_step:
1404                step_length = parse_duration(evaluated_step)
1405
1406        clamping_strategy: ClampingStrategy = NoClamping()
1407        if datetime_based_cursor_model.clamping:
1408            # While it is undesirable to interpolate within the model factory (as opposed to at runtime),
1409            # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
1410            # object which we want to keep agnostic of being low-code
1411            target = InterpolatedString(
1412                string=datetime_based_cursor_model.clamping.target,
1413                parameters=datetime_based_cursor_model.parameters or {},
1414            )
1415            evaluated_target = target.eval(config=config)
1416            match evaluated_target:
1417                case "DAY":
1418                    clamping_strategy = DayClampingStrategy()
1419                    end_date_provider = ClampingEndProvider(
1420                        DayClampingStrategy(is_ceiling=False),
1421                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1422                        granularity=cursor_granularity or datetime.timedelta(seconds=1),
1423                    )
1424                case "WEEK":
1425                    if (
1426                        not datetime_based_cursor_model.clamping.target_details
1427                        or "weekday" not in datetime_based_cursor_model.clamping.target_details
1428                    ):
1429                        raise ValueError(
1430                            "Given WEEK clamping, weekday needs to be provided as target_details"
1431                        )
1432                    weekday = self._assemble_weekday(
1433                        datetime_based_cursor_model.clamping.target_details["weekday"]
1434                    )
1435                    clamping_strategy = WeekClampingStrategy(weekday)
1436                    end_date_provider = ClampingEndProvider(
1437                        WeekClampingStrategy(weekday, is_ceiling=False),
1438                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1439                        granularity=cursor_granularity or datetime.timedelta(days=1),
1440                    )
1441                case "MONTH":
1442                    clamping_strategy = MonthClampingStrategy()
1443                    end_date_provider = ClampingEndProvider(
1444                        MonthClampingStrategy(is_ceiling=False),
1445                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1446                        granularity=cursor_granularity or datetime.timedelta(days=1),
1447                    )
1448                case _:
1449                    raise ValueError(
1450                        f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
1451                    )
1452
1453        return ConcurrentCursor(
1454            stream_name=stream_name,
1455            stream_namespace=stream_namespace,
1456            stream_state=stream_state,
1457            message_repository=message_repository or self._message_repository,
1458            connector_state_manager=self._connector_state_manager,
1459            connector_state_converter=connector_state_converter,
1460            cursor_field=cursor_field,
1461            slice_boundary_fields=slice_boundary_fields,
1462            start=start_date,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1463            end_provider=end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1464            lookback_window=lookback_window,
1465            slice_range=step_length,
1466            cursor_granularity=cursor_granularity,
1467            clamping_strategy=clamping_strategy,
1468        )
1469
1470    def create_concurrent_cursor_from_incrementing_count_cursor(
1471        self,
1472        model_type: Type[BaseModel],
1473        component_definition: ComponentDefinition,
1474        stream_name: str,
1475        stream_namespace: Optional[str],
1476        config: Config,
1477        message_repository: Optional[MessageRepository] = None,
1478        **kwargs: Any,
1479    ) -> ConcurrentCursor:
1480        # Per-partition incremental streams can dynamically create child cursors which will pass their current
1481        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1482        # incoming state and connector_state_manager that is initialized when the component factory is created
1483        stream_state = (
1484            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1485            if "stream_state" not in kwargs
1486            else kwargs["stream_state"]
1487        )
1488
1489        component_type = component_definition.get("type")
1490        if component_definition.get("type") != model_type.__name__:
1491            raise ValueError(
1492                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1493            )
1494
1495        incrementing_count_cursor_model = model_type.parse_obj(component_definition)
1496
1497        if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
1498            raise ValueError(
1499                f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
1500            )
1501
1502        interpolated_start_value = (
1503            InterpolatedString.create(
1504                incrementing_count_cursor_model.start_value,  # type: ignore
1505                parameters=incrementing_count_cursor_model.parameters or {},
1506            )
1507            if incrementing_count_cursor_model.start_value
1508            else 0
1509        )
1510
1511        interpolated_cursor_field = InterpolatedString.create(
1512            incrementing_count_cursor_model.cursor_field,
1513            parameters=incrementing_count_cursor_model.parameters or {},
1514        )
1515        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1516
1517        connector_state_converter = IncrementingCountStreamStateConverter(
1518            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1519        )
1520
1521        return ConcurrentCursor(
1522            stream_name=stream_name,
1523            stream_namespace=stream_namespace,
1524            stream_state=stream_state,
1525            message_repository=message_repository or self._message_repository,
1526            connector_state_manager=self._connector_state_manager,
1527            connector_state_converter=connector_state_converter,
1528            cursor_field=cursor_field,
1529            slice_boundary_fields=None,
1530            start=interpolated_start_value,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1531            end_provider=connector_state_converter.get_end_provider(),  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1532        )
1533
1534    def _assemble_weekday(self, weekday: str) -> Weekday:
1535        match weekday:
1536            case "MONDAY":
1537                return Weekday.MONDAY
1538            case "TUESDAY":
1539                return Weekday.TUESDAY
1540            case "WEDNESDAY":
1541                return Weekday.WEDNESDAY
1542            case "THURSDAY":
1543                return Weekday.THURSDAY
1544            case "FRIDAY":
1545                return Weekday.FRIDAY
1546            case "SATURDAY":
1547                return Weekday.SATURDAY
1548            case "SUNDAY":
1549                return Weekday.SUNDAY
1550            case _:
1551                raise ValueError(f"Unknown weekday {weekday}")
1552
1553    def create_concurrent_cursor_from_perpartition_cursor(
1554        self,
1555        state_manager: ConnectorStateManager,
1556        model_type: Type[BaseModel],
1557        component_definition: ComponentDefinition,
1558        stream_name: str,
1559        stream_namespace: Optional[str],
1560        config: Config,
1561        stream_state: MutableMapping[str, Any],
1562        partition_router: PartitionRouter,
1563        stream_state_migrations: Optional[List[Any]] = None,
1564        **kwargs: Any,
1565    ) -> ConcurrentPerPartitionCursor:
1566        component_type = component_definition.get("type")
1567        if component_definition.get("type") != model_type.__name__:
1568            raise ValueError(
1569                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1570            )
1571
1572        datetime_based_cursor_model = model_type.parse_obj(component_definition)
1573
1574        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1575            raise ValueError(
1576                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1577            )
1578
1579        interpolated_cursor_field = InterpolatedString.create(
1580            datetime_based_cursor_model.cursor_field,
1581            parameters=datetime_based_cursor_model.parameters or {},
1582        )
1583        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1584
1585        datetime_format = datetime_based_cursor_model.datetime_format
1586
1587        cursor_granularity = (
1588            parse_duration(datetime_based_cursor_model.cursor_granularity)
1589            if datetime_based_cursor_model.cursor_granularity
1590            else None
1591        )
1592
1593        connector_state_converter: DateTimeStreamStateConverter
1594        connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1595            datetime_format=datetime_format,
1596            input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1597            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1598            cursor_granularity=cursor_granularity,
1599        )
1600
1601        # Create the cursor factory
1602        cursor_factory = ConcurrentCursorFactory(
1603            partial(
1604                self.create_concurrent_cursor_from_datetime_based_cursor,
1605                state_manager=state_manager,
1606                model_type=model_type,
1607                component_definition=component_definition,
1608                stream_name=stream_name,
1609                stream_namespace=stream_namespace,
1610                config=config,
1611                message_repository=NoopMessageRepository(),
1612                stream_state_migrations=stream_state_migrations,
1613            )
1614        )
1615
1616        stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1617        # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
1618        use_global_cursor = isinstance(
1619            partition_router, GroupingPartitionRouter
1620        ) or component_definition.get("global_substream_cursor", False)
1621
1622        # Return the concurrent cursor and state converter
1623        return ConcurrentPerPartitionCursor(
1624            cursor_factory=cursor_factory,
1625            partition_router=partition_router,
1626            stream_name=stream_name,
1627            stream_namespace=stream_namespace,
1628            stream_state=stream_state,
1629            message_repository=self._message_repository,  # type: ignore
1630            connector_state_manager=state_manager,
1631            connector_state_converter=connector_state_converter,
1632            cursor_field=cursor_field,
1633            use_global_cursor=use_global_cursor,
1634        )
1635
1636    @staticmethod
1637    def create_constant_backoff_strategy(
1638        model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
1639    ) -> ConstantBackoffStrategy:
1640        return ConstantBackoffStrategy(
1641            backoff_time_in_seconds=model.backoff_time_in_seconds,
1642            config=config,
1643            parameters=model.parameters or {},
1644        )
1645
1646    def create_cursor_pagination(
1647        self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
1648    ) -> CursorPaginationStrategy:
1649        if isinstance(decoder, PaginationDecoderDecorator):
1650            inner_decoder = decoder.decoder
1651        else:
1652            inner_decoder = decoder
1653            decoder = PaginationDecoderDecorator(decoder=decoder)
1654
1655        if self._is_supported_decoder_for_pagination(inner_decoder):
1656            decoder_to_use = decoder
1657        else:
1658            raise ValueError(
1659                self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1660            )
1661
1662        return CursorPaginationStrategy(
1663            cursor_value=model.cursor_value,
1664            decoder=decoder_to_use,
1665            page_size=model.page_size,
1666            stop_condition=model.stop_condition,
1667            config=config,
1668            parameters=model.parameters or {},
1669        )
1670
1671    def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any:
1672        """
1673        Generically creates a custom component based on the model type and a class_name reference to the custom Python class being
1674        instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
1675        :param model: The Pydantic model of the custom component being created
1676        :param config: The custom defined connector config
1677        :return: The declarative component built from the Pydantic model to be used at runtime
1678        """
1679        custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
1680        component_fields = get_type_hints(custom_component_class)
1681        model_args = model.dict()
1682        model_args["config"] = config
1683
1684        # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions
1685        # we defer to these arguments over the component's definition
1686        for key, arg in kwargs.items():
1687            model_args[key] = arg
1688
1689        # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not
1690        # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to
1691        # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components
1692        for model_field, model_value in model_args.items():
1693            # If a custom component field doesn't have a type set, we try to use the type hints to infer the type
1694            if (
1695                isinstance(model_value, dict)
1696                and "type" not in model_value
1697                and model_field in component_fields
1698            ):
1699                derived_type = self._derive_component_type_from_type_hints(
1700                    component_fields.get(model_field)
1701                )
1702                if derived_type:
1703                    model_value["type"] = derived_type
1704
1705            if self._is_component(model_value):
1706                model_args[model_field] = self._create_nested_component(
1707                    model, model_field, model_value, config
1708                )
1709            elif isinstance(model_value, list):
1710                vals = []
1711                for v in model_value:
1712                    if isinstance(v, dict) and "type" not in v and model_field in component_fields:
1713                        derived_type = self._derive_component_type_from_type_hints(
1714                            component_fields.get(model_field)
1715                        )
1716                        if derived_type:
1717                            v["type"] = derived_type
1718                    if self._is_component(v):
1719                        vals.append(self._create_nested_component(model, model_field, v, config))
1720                    else:
1721                        vals.append(v)
1722                model_args[model_field] = vals
1723
1724        kwargs = {
1725            class_field: model_args[class_field]
1726            for class_field in component_fields.keys()
1727            if class_field in model_args
1728        }
1729        return custom_component_class(**kwargs)
1730
1731    @staticmethod
1732    def _get_class_from_fully_qualified_class_name(
1733        full_qualified_class_name: str,
1734    ) -> Any:
1735        """Get a class from its fully qualified name.
1736
1737        If a custom components module is needed, we assume it is already registered - probably
1738        as `source_declarative_manifest.components` or `components`.
1739
1740        Args:
1741            full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
1742
1743        Returns:
1744            Any: The class object.
1745
1746        Raises:
1747            ValueError: If the class cannot be loaded.
1748        """
1749        split = full_qualified_class_name.split(".")
1750        module_name_full = ".".join(split[:-1])
1751        class_name = split[-1]
1752
1753        try:
1754            module_ref = importlib.import_module(module_name_full)
1755        except ModuleNotFoundError as e:
1756            if split[0] == "source_declarative_manifest":
1757                # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append
1758                try:
1759                    import os
1760
1761                    module_name_with_source_declarative_manifest = ".".join(split[1:-1])
1762                    module_ref = importlib.import_module(
1763                        module_name_with_source_declarative_manifest
1764                    )
1765                except ModuleNotFoundError:
1766                    raise ValueError(f"Could not load module `{module_name_full}`.") from e
1767            else:
1768                raise ValueError(f"Could not load module `{module_name_full}`.") from e
1769
1770        try:
1771            return getattr(module_ref, class_name)
1772        except AttributeError as e:
1773            raise ValueError(
1774                f"Could not load class `{class_name}` from module `{module_name_full}`.",
1775            ) from e
1776
1777    @staticmethod
1778    def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
1779        interface = field_type
1780        while True:
1781            origin = get_origin(interface)
1782            if origin:
1783                # Unnest types until we reach the raw type
1784                # List[T] -> T
1785                # Optional[List[T]] -> T
1786                args = get_args(interface)
1787                interface = args[0]
1788            else:
1789                break
1790        if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface):
1791            return interface.__name__
1792        return None
1793
1794    @staticmethod
1795    def is_builtin_type(cls: Optional[Type[Any]]) -> bool:
1796        if not cls:
1797            return False
1798        return cls.__module__ == "builtins"
1799
1800    @staticmethod
1801    def _extract_missing_parameters(error: TypeError) -> List[str]:
1802        parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error))
1803        if parameter_search:
1804            return re.findall(r"\'(.+?)\'", parameter_search.group(1))
1805        else:
1806            return []
1807
1808    def _create_nested_component(
1809        self, model: Any, model_field: str, model_value: Any, config: Config
1810    ) -> Any:
1811        type_name = model_value.get("type", None)
1812        if not type_name:
1813            # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent
1814            return model_value
1815
1816        model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None)
1817        if model_type:
1818            parsed_model = model_type.parse_obj(model_value)
1819            try:
1820                # To improve usability of the language, certain fields are shared between components. This can come in the form of
1821                # a parent component passing some of its fields to a child component or the parent extracting fields from other child
1822                # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base
1823                # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created
1824                # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that
1825                # are needed by a component and could not be shared.
1826                model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__)
1827                constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs
1828                model_parameters = model_value.get("$parameters", {})
1829                matching_parameters = {
1830                    kwarg: model_parameters[kwarg]
1831                    for kwarg in constructor_kwargs
1832                    if kwarg in model_parameters
1833                }
1834                return self._create_component_from_model(
1835                    model=parsed_model, config=config, **matching_parameters
1836                )
1837            except TypeError as error:
1838                missing_parameters = self._extract_missing_parameters(error)
1839                if missing_parameters:
1840                    raise ValueError(
1841                        f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide "
1842                        + ", ".join(
1843                            (
1844                                f"{type_name}.$parameters.{parameter}"
1845                                for parameter in missing_parameters
1846                            )
1847                        )
1848                    )
1849                raise TypeError(
1850                    f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}"
1851                )
1852        else:
1853            raise ValueError(
1854                f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'"
1855            )
1856
1857    @staticmethod
1858    def _is_component(model_value: Any) -> bool:
1859        return isinstance(model_value, dict) and model_value.get("type") is not None
1860
1861    def create_datetime_based_cursor(
1862        self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any
1863    ) -> DatetimeBasedCursor:
1864        start_datetime: Union[str, MinMaxDatetime] = (
1865            model.start_datetime
1866            if isinstance(model.start_datetime, str)
1867            else self.create_min_max_datetime(model.start_datetime, config)
1868        )
1869        end_datetime: Union[str, MinMaxDatetime, None] = None
1870        if model.is_data_feed and model.end_datetime:
1871            raise ValueError("Data feed does not support end_datetime")
1872        if model.is_data_feed and model.is_client_side_incremental:
1873            raise ValueError(
1874                "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them."
1875            )
1876        if model.end_datetime:
1877            end_datetime = (
1878                model.end_datetime
1879                if isinstance(model.end_datetime, str)
1880                else self.create_min_max_datetime(model.end_datetime, config)
1881            )
1882
1883        end_time_option = (
1884            self._create_component_from_model(
1885                model.end_time_option, config, parameters=model.parameters or {}
1886            )
1887            if model.end_time_option
1888            else None
1889        )
1890        start_time_option = (
1891            self._create_component_from_model(
1892                model.start_time_option, config, parameters=model.parameters or {}
1893            )
1894            if model.start_time_option
1895            else None
1896        )
1897
1898        return DatetimeBasedCursor(
1899            cursor_field=model.cursor_field,
1900            cursor_datetime_formats=model.cursor_datetime_formats
1901            if model.cursor_datetime_formats
1902            else [],
1903            cursor_granularity=model.cursor_granularity,
1904            datetime_format=model.datetime_format,
1905            end_datetime=end_datetime,
1906            start_datetime=start_datetime,
1907            step=model.step,
1908            end_time_option=end_time_option,
1909            lookback_window=model.lookback_window,
1910            start_time_option=start_time_option,
1911            partition_field_end=model.partition_field_end,
1912            partition_field_start=model.partition_field_start,
1913            message_repository=self._message_repository,
1914            is_compare_strictly=model.is_compare_strictly,
1915            config=config,
1916            parameters=model.parameters or {},
1917        )
1918
1919    def create_declarative_stream(
1920        self, model: DeclarativeStreamModel, config: Config, **kwargs: Any
1921    ) -> DeclarativeStream:
1922        # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
1923        # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
1924        # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
1925        # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one.
1926        combined_slicers = self._merge_stream_slicers(model=model, config=config)
1927
1928        primary_key = model.primary_key.__root__ if model.primary_key else None
1929        stop_condition_on_cursor = (
1930            model.incremental_sync
1931            and hasattr(model.incremental_sync, "is_data_feed")
1932            and model.incremental_sync.is_data_feed
1933        )
1934        client_side_incremental_sync = None
1935        if (
1936            model.incremental_sync
1937            and hasattr(model.incremental_sync, "is_client_side_incremental")
1938            and model.incremental_sync.is_client_side_incremental
1939        ):
1940            supported_slicers = (
1941                DatetimeBasedCursor,
1942                GlobalSubstreamCursor,
1943                PerPartitionWithGlobalCursor,
1944            )
1945            if combined_slicers and not isinstance(combined_slicers, supported_slicers):
1946                raise ValueError(
1947                    "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1948                )
1949            cursor = (
1950                combined_slicers
1951                if isinstance(
1952                    combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1953                )
1954                else self._create_component_from_model(model=model.incremental_sync, config=config)
1955            )
1956
1957            client_side_incremental_sync = {"cursor": cursor}
1958
1959        if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1960            cursor_model = model.incremental_sync
1961
1962            end_time_option = (
1963                self._create_component_from_model(
1964                    cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
1965                )
1966                if cursor_model.end_time_option
1967                else None
1968            )
1969            start_time_option = (
1970                self._create_component_from_model(
1971                    cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
1972                )
1973                if cursor_model.start_time_option
1974                else None
1975            )
1976
1977            request_options_provider = DatetimeBasedRequestOptionsProvider(
1978                start_time_option=start_time_option,
1979                end_time_option=end_time_option,
1980                partition_field_start=cursor_model.partition_field_end,
1981                partition_field_end=cursor_model.partition_field_end,
1982                config=config,
1983                parameters=model.parameters or {},
1984            )
1985        elif model.incremental_sync and isinstance(
1986            model.incremental_sync, IncrementingCountCursorModel
1987        ):
1988            cursor_model: IncrementingCountCursorModel = model.incremental_sync  # type: ignore
1989
1990            start_time_option = (
1991                self._create_component_from_model(
1992                    cursor_model.start_value_option,  # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1993                    config,
1994                    parameters=cursor_model.parameters or {},
1995                )
1996                if cursor_model.start_value_option  # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1997                else None
1998            )
1999
2000            # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
2001            # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
2002            partition_field_start = "start"
2003
2004            request_options_provider = DatetimeBasedRequestOptionsProvider(
2005                start_time_option=start_time_option,
2006                partition_field_start=partition_field_start,
2007                config=config,
2008                parameters=model.parameters or {},
2009            )
2010        else:
2011            request_options_provider = None
2012
2013        transformations = []
2014        if model.transformations:
2015            for transformation_model in model.transformations:
2016                transformations.append(
2017                    self._create_component_from_model(model=transformation_model, config=config)
2018                )
2019        file_uploader = None
2020        if model.file_uploader:
2021            file_uploader = self._create_component_from_model(
2022                model=model.file_uploader, config=config
2023            )
2024
2025        retriever = self._create_component_from_model(
2026            model=model.retriever,
2027            config=config,
2028            name=model.name,
2029            primary_key=primary_key,
2030            stream_slicer=combined_slicers,
2031            request_options_provider=request_options_provider,
2032            stop_condition_on_cursor=stop_condition_on_cursor,
2033            client_side_incremental_sync=client_side_incremental_sync,
2034            transformations=transformations,
2035            file_uploader=file_uploader,
2036            incremental_sync=model.incremental_sync,
2037        )
2038        cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
2039
2040        if model.state_migrations:
2041            state_transformations = [
2042                self._create_component_from_model(state_migration, config, declarative_stream=model)
2043                for state_migration in model.state_migrations
2044            ]
2045        else:
2046            state_transformations = []
2047
2048        schema_loader: Union[
2049            CompositeSchemaLoader,
2050            DefaultSchemaLoader,
2051            DynamicSchemaLoader,
2052            InlineSchemaLoader,
2053            JsonFileSchemaLoader,
2054        ]
2055        if model.schema_loader and isinstance(model.schema_loader, list):
2056            nested_schema_loaders = [
2057                self._create_component_from_model(model=nested_schema_loader, config=config)
2058                for nested_schema_loader in model.schema_loader
2059            ]
2060            schema_loader = CompositeSchemaLoader(
2061                schema_loaders=nested_schema_loaders, parameters={}
2062            )
2063        elif model.schema_loader:
2064            schema_loader = self._create_component_from_model(
2065                model=model.schema_loader,  # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models
2066                config=config,
2067            )
2068        else:
2069            options = model.parameters or {}
2070            if "name" not in options:
2071                options["name"] = model.name
2072            schema_loader = DefaultSchemaLoader(config=config, parameters=options)
2073
2074        return DeclarativeStream(
2075            name=model.name or "",
2076            primary_key=primary_key,
2077            retriever=retriever,
2078            schema_loader=schema_loader,
2079            stream_cursor_field=cursor_field or "",
2080            state_migrations=state_transformations,
2081            config=config,
2082            parameters=model.parameters or {},
2083        )
2084
2085    def _build_stream_slicer_from_partition_router(
2086        self,
2087        model: Union[
2088            AsyncRetrieverModel,
2089            CustomRetrieverModel,
2090            SimpleRetrieverModel,
2091        ],
2092        config: Config,
2093        stream_name: Optional[str] = None,
2094    ) -> Optional[PartitionRouter]:
2095        if (
2096            hasattr(model, "partition_router")
2097            and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
2098            and model.partition_router
2099        ):
2100            stream_slicer_model = model.partition_router
2101            if isinstance(stream_slicer_model, list):
2102                return CartesianProductStreamSlicer(
2103                    [
2104                        self._create_component_from_model(
2105                            model=slicer, config=config, stream_name=stream_name or ""
2106                        )
2107                        for slicer in stream_slicer_model
2108                    ],
2109                    parameters={},
2110                )
2111            else:
2112                return self._create_component_from_model(  # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
2113                    model=stream_slicer_model, config=config, stream_name=stream_name or ""
2114                )
2115        return None
2116
2117    def _build_incremental_cursor(
2118        self,
2119        model: DeclarativeStreamModel,
2120        stream_slicer: Optional[PartitionRouter],
2121        config: Config,
2122    ) -> Optional[StreamSlicer]:
2123        if model.incremental_sync and stream_slicer:
2124            if model.retriever.type == "AsyncRetriever":
2125                stream_name = model.name or ""
2126                stream_namespace = None
2127                stream_state = self._connector_state_manager.get_stream_state(
2128                    stream_name, stream_namespace
2129                )
2130                state_transformations = (
2131                    [
2132                        self._create_component_from_model(
2133                            state_migration, config, declarative_stream=model
2134                        )
2135                        for state_migration in model.state_migrations
2136                    ]
2137                    if model.state_migrations
2138                    else []
2139                )
2140
2141                return self.create_concurrent_cursor_from_perpartition_cursor(  # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
2142                    state_manager=self._connector_state_manager,
2143                    model_type=DatetimeBasedCursorModel,
2144                    component_definition=model.incremental_sync.__dict__,
2145                    stream_name=stream_name,
2146                    stream_namespace=stream_namespace,
2147                    config=config or {},
2148                    stream_state=stream_state,
2149                    stream_state_migrations=state_transformations,
2150                    partition_router=stream_slicer,
2151                )
2152
2153            incremental_sync_model = model.incremental_sync
2154            cursor_component = self._create_component_from_model(
2155                model=incremental_sync_model, config=config
2156            )
2157            is_global_cursor = (
2158                hasattr(incremental_sync_model, "global_substream_cursor")
2159                and incremental_sync_model.global_substream_cursor
2160            )
2161
2162            if is_global_cursor:
2163                return GlobalSubstreamCursor(
2164                    stream_cursor=cursor_component, partition_router=stream_slicer
2165                )
2166            return PerPartitionWithGlobalCursor(
2167                cursor_factory=CursorFactory(
2168                    lambda: self._create_component_from_model(
2169                        model=incremental_sync_model, config=config
2170                    ),
2171                ),
2172                partition_router=stream_slicer,
2173                stream_cursor=cursor_component,
2174            )
2175        elif model.incremental_sync:
2176            if model.retriever.type == "AsyncRetriever":
2177                return self.create_concurrent_cursor_from_datetime_based_cursor(  # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
2178                    model_type=DatetimeBasedCursorModel,
2179                    component_definition=model.incremental_sync.__dict__,
2180                    stream_name=model.name or "",
2181                    stream_namespace=None,
2182                    config=config or {},
2183                    stream_state_migrations=model.state_migrations,
2184                )
2185            return self._create_component_from_model(model=model.incremental_sync, config=config)  # type: ignore[no-any-return]  # Will be created Cursor as stream_slicer_model is model.incremental_sync
2186        return None
2187
2188    def _build_resumable_cursor(
2189        self,
2190        model: Union[
2191            AsyncRetrieverModel,
2192            CustomRetrieverModel,
2193            SimpleRetrieverModel,
2194        ],
2195        stream_slicer: Optional[PartitionRouter],
2196    ) -> Optional[StreamSlicer]:
2197        if hasattr(model, "paginator") and model.paginator and not stream_slicer:
2198            # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
2199            return ResumableFullRefreshCursor(parameters={})
2200        elif stream_slicer:
2201            # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
2202            return PerPartitionCursor(
2203                cursor_factory=CursorFactory(
2204                    create_function=partial(ChildPartitionResumableFullRefreshCursor, {})
2205                ),
2206                partition_router=stream_slicer,
2207            )
2208        return None
2209
2210    def _merge_stream_slicers(
2211        self, model: DeclarativeStreamModel, config: Config
2212    ) -> Optional[StreamSlicer]:
2213        retriever_model = model.retriever
2214
2215        stream_slicer = self._build_stream_slicer_from_partition_router(
2216            retriever_model, config, stream_name=model.name
2217        )
2218
2219        if retriever_model.type == "AsyncRetriever":
2220            is_not_datetime_cursor = (
2221                model.incremental_sync.type != "DatetimeBasedCursor"
2222                if model.incremental_sync
2223                else None
2224            )
2225            is_partition_router = (
2226                bool(retriever_model.partition_router) if model.incremental_sync else None
2227            )
2228
2229            if is_not_datetime_cursor:
2230                # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
2231                # support or unordered slices (for example, when we trigger reports for January and February, the report
2232                # in February can be completed first). Once we have support for custom concurrent cursor or have a new
2233                # implementation available in the CDK, we can enable more cursors here.
2234                raise ValueError(
2235                    "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
2236                )
2237
2238            if is_partition_router and not stream_slicer:
2239                # Note that this development is also done in parallel to the per partition development which once merged
2240                # we could support here by calling create_concurrent_cursor_from_perpartition_cursor
2241                raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
2242
2243        if model.incremental_sync:
2244            return self._build_incremental_cursor(model, stream_slicer, config)
2245
2246        return (
2247            stream_slicer
2248            if self._disable_resumable_full_refresh
2249            else self._build_resumable_cursor(retriever_model, stream_slicer)
2250        )
2251
2252    def create_default_error_handler(
2253        self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
2254    ) -> DefaultErrorHandler:
2255        backoff_strategies = []
2256        if model.backoff_strategies:
2257            for backoff_strategy_model in model.backoff_strategies:
2258                backoff_strategies.append(
2259                    self._create_component_from_model(model=backoff_strategy_model, config=config)
2260                )
2261
2262        response_filters = []
2263        if model.response_filters:
2264            for response_filter_model in model.response_filters:
2265                response_filters.append(
2266                    self._create_component_from_model(model=response_filter_model, config=config)
2267                )
2268        response_filters.append(
2269            HttpResponseFilter(config=config, parameters=model.parameters or {})
2270        )
2271
2272        return DefaultErrorHandler(
2273            backoff_strategies=backoff_strategies,
2274            max_retries=model.max_retries,
2275            response_filters=response_filters,
2276            config=config,
2277            parameters=model.parameters or {},
2278        )
2279
2280    def create_default_paginator(
2281        self,
2282        model: DefaultPaginatorModel,
2283        config: Config,
2284        *,
2285        url_base: str,
2286        extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None,
2287        decoder: Optional[Decoder] = None,
2288        cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
2289    ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
2290        if decoder:
2291            if self._is_supported_decoder_for_pagination(decoder):
2292                decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
2293            else:
2294                raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder)))
2295        else:
2296            decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
2297        page_size_option = (
2298            self._create_component_from_model(model=model.page_size_option, config=config)
2299            if model.page_size_option
2300            else None
2301        )
2302        page_token_option = (
2303            self._create_component_from_model(model=model.page_token_option, config=config)
2304            if model.page_token_option
2305            else None
2306        )
2307        pagination_strategy = self._create_component_from_model(
2308            model=model.pagination_strategy,
2309            config=config,
2310            decoder=decoder_to_use,
2311            extractor_model=extractor_model,
2312        )
2313        if cursor_used_for_stop_condition:
2314            pagination_strategy = StopConditionPaginationStrategyDecorator(
2315                pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition)
2316            )
2317        paginator = DefaultPaginator(
2318            decoder=decoder_to_use,
2319            page_size_option=page_size_option,
2320            page_token_option=page_token_option,
2321            pagination_strategy=pagination_strategy,
2322            url_base=url_base,
2323            config=config,
2324            parameters=model.parameters or {},
2325        )
2326        if self._limit_pages_fetched_per_slice:
2327            return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice)
2328        return paginator
2329
2330    def create_dpath_extractor(
2331        self,
2332        model: DpathExtractorModel,
2333        config: Config,
2334        decoder: Optional[Decoder] = None,
2335        **kwargs: Any,
2336    ) -> DpathExtractor:
2337        if decoder:
2338            decoder_to_use = decoder
2339        else:
2340            decoder_to_use = JsonDecoder(parameters={})
2341        model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
2342        return DpathExtractor(
2343            decoder=decoder_to_use,
2344            field_path=model_field_path,
2345            config=config,
2346            parameters=model.parameters or {},
2347        )
2348
2349    @staticmethod
2350    def create_response_to_file_extractor(
2351        model: ResponseToFileExtractorModel,
2352        **kwargs: Any,
2353    ) -> ResponseToFileExtractor:
2354        return ResponseToFileExtractor(parameters=model.parameters or {})
2355
2356    @staticmethod
2357    def create_exponential_backoff_strategy(
2358        model: ExponentialBackoffStrategyModel, config: Config
2359    ) -> ExponentialBackoffStrategy:
2360        return ExponentialBackoffStrategy(
2361            factor=model.factor or 5, parameters=model.parameters or {}, config=config
2362        )
2363
2364    @staticmethod
2365    def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey:
2366        return GroupByKey(model.key, config=config, parameters=model.parameters or {})
2367
2368    def create_http_requester(
2369        self,
2370        model: HttpRequesterModel,
2371        config: Config,
2372        decoder: Decoder = JsonDecoder(parameters={}),
2373        query_properties_key: Optional[str] = None,
2374        use_cache: Optional[bool] = None,
2375        *,
2376        name: str,
2377    ) -> HttpRequester:
2378        authenticator = (
2379            self._create_component_from_model(
2380                model=model.authenticator,
2381                config=config,
2382                url_base=model.url or model.url_base,
2383                name=name,
2384                decoder=decoder,
2385            )
2386            if model.authenticator
2387            else None
2388        )
2389        error_handler = (
2390            self._create_component_from_model(model=model.error_handler, config=config)
2391            if model.error_handler
2392            else DefaultErrorHandler(
2393                backoff_strategies=[],
2394                response_filters=[],
2395                config=config,
2396                parameters=model.parameters or {},
2397            )
2398        )
2399
2400        api_budget = self._api_budget
2401
2402        # Removes QueryProperties components from the interpolated mappings because it has been designed
2403        # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly
2404        # instead of through jinja interpolation
2405        request_parameters: Optional[Union[str, Mapping[str, str]]]
2406        if isinstance(model.request_parameters, Mapping):
2407            request_parameters = self._remove_query_properties(model.request_parameters)
2408        else:
2409            request_parameters = model.request_parameters
2410
2411        request_options_provider = InterpolatedRequestOptionsProvider(
2412            request_body=model.request_body,
2413            request_body_data=model.request_body_data,
2414            request_body_json=model.request_body_json,
2415            request_headers=model.request_headers,
2416            request_parameters=request_parameters,
2417            query_properties_key=query_properties_key,
2418            config=config,
2419            parameters=model.parameters or {},
2420        )
2421
2422        assert model.use_cache is not None  # for mypy
2423        assert model.http_method is not None  # for mypy
2424
2425        should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache
2426
2427        return HttpRequester(
2428            name=name,
2429            url=model.url,
2430            url_base=model.url_base,
2431            path=model.path,
2432            authenticator=authenticator,
2433            error_handler=error_handler,
2434            api_budget=api_budget,
2435            http_method=HttpMethod[model.http_method.value],
2436            request_options_provider=request_options_provider,
2437            config=config,
2438            disable_retries=self._disable_retries,
2439            parameters=model.parameters or {},
2440            message_repository=self._message_repository,
2441            use_cache=should_use_cache,
2442            decoder=decoder,
2443            stream_response=decoder.is_stream_response() if decoder else False,
2444        )
2445
2446    @staticmethod
2447    def create_http_response_filter(
2448        model: HttpResponseFilterModel, config: Config, **kwargs: Any
2449    ) -> HttpResponseFilter:
2450        if model.action:
2451            action = ResponseAction(model.action.value)
2452        else:
2453            action = None
2454
2455        failure_type = FailureType(model.failure_type.value) if model.failure_type else None
2456
2457        http_codes = (
2458            set(model.http_codes) if model.http_codes else set()
2459        )  # JSON schema notation has no set data type. The schema enforces an array of unique elements
2460
2461        return HttpResponseFilter(
2462            action=action,
2463            failure_type=failure_type,
2464            error_message=model.error_message or "",
2465            error_message_contains=model.error_message_contains or "",
2466            http_codes=http_codes,
2467            predicate=model.predicate or "",
2468            config=config,
2469            parameters=model.parameters or {},
2470        )
2471
2472    @staticmethod
2473    def create_inline_schema_loader(
2474        model: InlineSchemaLoaderModel, config: Config, **kwargs: Any
2475    ) -> InlineSchemaLoader:
2476        return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
2477
2478    def create_complex_field_type(
2479        self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
2480    ) -> ComplexFieldType:
2481        items = (
2482            self._create_component_from_model(model=model.items, config=config)
2483            if isinstance(model.items, ComplexFieldTypeModel)
2484            else model.items
2485        )
2486
2487        return ComplexFieldType(field_type=model.field_type, items=items)
2488
2489    def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
2490        target_type = (
2491            self._create_component_from_model(model=model.target_type, config=config)
2492            if isinstance(model.target_type, ComplexFieldTypeModel)
2493            else model.target_type
2494        )
2495
2496        return TypesMap(
2497            target_type=target_type,
2498            current_type=model.current_type,
2499            condition=model.condition if model.condition is not None else "True",
2500        )
2501
2502    def create_schema_type_identifier(
2503        self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
2504    ) -> SchemaTypeIdentifier:
2505        types_mapping = []
2506        if model.types_mapping:
2507            types_mapping.extend(
2508                [
2509                    self._create_component_from_model(types_map, config=config)
2510                    for types_map in model.types_mapping
2511                ]
2512            )
2513        model_schema_pointer: List[Union[InterpolatedString, str]] = (
2514            [x for x in model.schema_pointer] if model.schema_pointer else []
2515        )
2516        model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
2517        model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
2518            [x for x in model.type_pointer] if model.type_pointer else None
2519        )
2520
2521        return SchemaTypeIdentifier(
2522            schema_pointer=model_schema_pointer,
2523            key_pointer=model_key_pointer,
2524            type_pointer=model_type_pointer,
2525            types_mapping=types_mapping,
2526            parameters=model.parameters or {},
2527        )
2528
2529    def create_dynamic_schema_loader(
2530        self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
2531    ) -> DynamicSchemaLoader:
2532        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
2533        combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
2534
2535        schema_transformations = []
2536        if model.schema_transformations:
2537            for transformation_model in model.schema_transformations:
2538                schema_transformations.append(
2539                    self._create_component_from_model(model=transformation_model, config=config)
2540                )
2541        name = "dynamic_properties"
2542        retriever = self._create_component_from_model(
2543            model=model.retriever,
2544            config=config,
2545            name=name,
2546            primary_key=None,
2547            stream_slicer=combined_slicers,
2548            transformations=[],
2549            use_cache=True,
2550            log_formatter=(
2551                lambda response: format_http_message(
2552                    response,
2553                    f"Schema loader '{name}' request",
2554                    f"Request performed in order to extract schema.",
2555                    name,
2556                    is_auxiliary=True,
2557                )
2558            ),
2559        )
2560        schema_type_identifier = self._create_component_from_model(
2561            model.schema_type_identifier, config=config, parameters=model.parameters or {}
2562        )
2563        schema_filter = (
2564            self._create_component_from_model(
2565                model.schema_filter, config=config, parameters=model.parameters or {}
2566            )
2567            if model.schema_filter is not None
2568            else None
2569        )
2570
2571        return DynamicSchemaLoader(
2572            retriever=retriever,
2573            config=config,
2574            schema_transformations=schema_transformations,
2575            schema_filter=schema_filter,
2576            schema_type_identifier=schema_type_identifier,
2577            parameters=model.parameters or {},
2578        )
2579
2580    @staticmethod
2581    def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2582        return JsonDecoder(parameters={})
2583
2584    def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2585        return CompositeRawDecoder(
2586            parser=ModelToComponentFactory._get_parser(model, config),
2587            stream_response=False if self._emit_connector_builder_messages else True,
2588        )
2589
2590    def create_jsonl_decoder(
2591        self, model: JsonlDecoderModel, config: Config, **kwargs: Any
2592    ) -> Decoder:
2593        return CompositeRawDecoder(
2594            parser=ModelToComponentFactory._get_parser(model, config),
2595            stream_response=False if self._emit_connector_builder_messages else True,
2596        )
2597
2598    def create_gzip_decoder(
2599        self, model: GzipDecoderModel, config: Config, **kwargs: Any
2600    ) -> Decoder:
2601        _compressed_response_types = {
2602            "gzip",
2603            "x-gzip",
2604            "gzip, deflate",
2605            "x-gzip, deflate",
2606            "application/zip",
2607            "application/gzip",
2608            "application/x-gzip",
2609            "application/x-zip-compressed",
2610        }
2611
2612        gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config)  # type: ignore  # based on the model, we know this will be a GzipParser
2613
2614        if self._emit_connector_builder_messages:
2615            # This is very surprising but if the response is not streamed,
2616            # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw,
2617            # which uses urllib3 directly and does not uncompress the data.
2618            return CompositeRawDecoder(gzip_parser.inner_parser, False)
2619
2620        return CompositeRawDecoder.by_headers(
2621            [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
2622            stream_response=True,
2623            fallback_parser=gzip_parser.inner_parser,
2624        )
2625
2626    @staticmethod
2627    def create_incrementing_count_cursor(
2628        model: IncrementingCountCursorModel, config: Config, **kwargs: Any
2629    ) -> DatetimeBasedCursor:
2630        # This should not actually get used anywhere at runtime, but needed to add this to pass checks since
2631        # we still parse models into components. The issue is that there's no runtime implementation of a
2632        # IncrementingCountCursor.
2633        # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
2634        return DatetimeBasedCursor(
2635            cursor_field=model.cursor_field,
2636            datetime_format="%Y-%m-%d",
2637            start_datetime="2024-12-12",
2638            config=config,
2639            parameters={},
2640        )
2641
2642    @staticmethod
2643    def create_iterable_decoder(
2644        model: IterableDecoderModel, config: Config, **kwargs: Any
2645    ) -> IterableDecoder:
2646        return IterableDecoder(parameters={})
2647
2648    @staticmethod
2649    def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
2650        return XmlDecoder(parameters={})
2651
2652    def create_zipfile_decoder(
2653        self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
2654    ) -> ZipfileDecoder:
2655        return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config))
2656
2657    @staticmethod
2658    def _get_parser(model: BaseModel, config: Config) -> Parser:
2659        if isinstance(model, JsonDecoderModel):
2660            # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases
2661            return JsonParser()
2662        elif isinstance(model, JsonlDecoderModel):
2663            return JsonLineParser()
2664        elif isinstance(model, CsvDecoderModel):
2665            return CsvParser(
2666                encoding=model.encoding,
2667                delimiter=model.delimiter,
2668                set_values_to_none=model.set_values_to_none,
2669            )
2670        elif isinstance(model, GzipDecoderModel):
2671            return GzipParser(
2672                inner_parser=ModelToComponentFactory._get_parser(model.decoder, config)
2673            )
2674        elif isinstance(
2675            model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel)
2676        ):
2677            raise ValueError(f"Decoder type {model} does not have parser associated to it")
2678
2679        raise ValueError(f"Unknown decoder type {model}")
2680
2681    @staticmethod
2682    def create_json_file_schema_loader(
2683        model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
2684    ) -> JsonFileSchemaLoader:
2685        return JsonFileSchemaLoader(
2686            file_path=model.file_path or "", config=config, parameters=model.parameters or {}
2687        )
2688
2689    @staticmethod
2690    def create_jwt_authenticator(
2691        model: JwtAuthenticatorModel, config: Config, **kwargs: Any
2692    ) -> JwtAuthenticator:
2693        jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None)
2694        jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None)
2695        return JwtAuthenticator(
2696            config=config,
2697            parameters=model.parameters or {},
2698            algorithm=JwtAlgorithm(model.algorithm.value),
2699            secret_key=model.secret_key,
2700            base64_encode_secret_key=model.base64_encode_secret_key,
2701            token_duration=model.token_duration,
2702            header_prefix=model.header_prefix,
2703            kid=jwt_headers.kid,
2704            typ=jwt_headers.typ,
2705            cty=jwt_headers.cty,
2706            iss=jwt_payload.iss,
2707            sub=jwt_payload.sub,
2708            aud=jwt_payload.aud,
2709            additional_jwt_headers=model.additional_jwt_headers,
2710            additional_jwt_payload=model.additional_jwt_payload,
2711        )
2712
2713    def create_list_partition_router(
2714        self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
2715    ) -> ListPartitionRouter:
2716        request_option = (
2717            self._create_component_from_model(model.request_option, config)
2718            if model.request_option
2719            else None
2720        )
2721        return ListPartitionRouter(
2722            cursor_field=model.cursor_field,
2723            request_option=request_option,
2724            values=model.values,
2725            config=config,
2726            parameters=model.parameters or {},
2727        )
2728
2729    @staticmethod
2730    def create_min_max_datetime(
2731        model: MinMaxDatetimeModel, config: Config, **kwargs: Any
2732    ) -> MinMaxDatetime:
2733        return MinMaxDatetime(
2734            datetime=model.datetime,
2735            datetime_format=model.datetime_format or "",
2736            max_datetime=model.max_datetime or "",
2737            min_datetime=model.min_datetime or "",
2738            parameters=model.parameters or {},
2739        )
2740
2741    @staticmethod
2742    def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth:
2743        return NoAuth(parameters=model.parameters or {})
2744
2745    @staticmethod
2746    def create_no_pagination(
2747        model: NoPaginationModel, config: Config, **kwargs: Any
2748    ) -> NoPagination:
2749        return NoPagination(parameters={})
2750
2751    def create_oauth_authenticator(
2752        self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
2753    ) -> DeclarativeOauth2Authenticator:
2754        profile_assertion = (
2755            self._create_component_from_model(model.profile_assertion, config=config)
2756            if model.profile_assertion
2757            else None
2758        )
2759
2760        if model.refresh_token_updater:
2761            # ignore type error because fixing it would have a lot of dependencies, revisit later
2762            return DeclarativeSingleUseRefreshTokenOauth2Authenticator(  # type: ignore
2763                config,
2764                InterpolatedString.create(
2765                    model.token_refresh_endpoint,  # type: ignore
2766                    parameters=model.parameters or {},
2767                ).eval(config),
2768                access_token_name=InterpolatedString.create(
2769                    model.access_token_name or "access_token", parameters=model.parameters or {}
2770                ).eval(config),
2771                refresh_token_name=model.refresh_token_updater.refresh_token_name,
2772                expires_in_name=InterpolatedString.create(
2773                    model.expires_in_name or "expires_in", parameters=model.parameters or {}
2774                ).eval(config),
2775                client_id_name=InterpolatedString.create(
2776                    model.client_id_name or "client_id", parameters=model.parameters or {}
2777                ).eval(config),
2778                client_id=InterpolatedString.create(
2779                    model.client_id, parameters=model.parameters or {}
2780                ).eval(config)
2781                if model.client_id
2782                else model.client_id,
2783                client_secret_name=InterpolatedString.create(
2784                    model.client_secret_name or "client_secret", parameters=model.parameters or {}
2785                ).eval(config),
2786                client_secret=InterpolatedString.create(
2787                    model.client_secret, parameters=model.parameters or {}
2788                ).eval(config)
2789                if model.client_secret
2790                else model.client_secret,
2791                access_token_config_path=model.refresh_token_updater.access_token_config_path,
2792                refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
2793                token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
2794                grant_type_name=InterpolatedString.create(
2795                    model.grant_type_name or "grant_type", parameters=model.parameters or {}
2796                ).eval(config),
2797                grant_type=InterpolatedString.create(
2798                    model.grant_type or "refresh_token", parameters=model.parameters or {}
2799                ).eval(config),
2800                refresh_request_body=InterpolatedMapping(
2801                    model.refresh_request_body or {}, parameters=model.parameters or {}
2802                ).eval(config),
2803                refresh_request_headers=InterpolatedMapping(
2804                    model.refresh_request_headers or {}, parameters=model.parameters or {}
2805                ).eval(config),
2806                scopes=model.scopes,
2807                token_expiry_date_format=model.token_expiry_date_format,
2808                token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
2809                message_repository=self._message_repository,
2810                refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes,
2811                refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key,
2812                refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values,
2813            )
2814        # ignore type error because fixing it would have a lot of dependencies, revisit later
2815        return DeclarativeOauth2Authenticator(  # type: ignore
2816            access_token_name=model.access_token_name or "access_token",
2817            access_token_value=model.access_token_value,
2818            client_id_name=model.client_id_name or "client_id",
2819            client_id=model.client_id,
2820            client_secret_name=model.client_secret_name or "client_secret",
2821            client_secret=model.client_secret,
2822            expires_in_name=model.expires_in_name or "expires_in",
2823            grant_type_name=model.grant_type_name or "grant_type",
2824            grant_type=model.grant_type or "refresh_token",
2825            refresh_request_body=model.refresh_request_body,
2826            refresh_request_headers=model.refresh_request_headers,
2827            refresh_token_name=model.refresh_token_name or "refresh_token",
2828            refresh_token=model.refresh_token,
2829            scopes=model.scopes,
2830            token_expiry_date=model.token_expiry_date,
2831            token_expiry_date_format=model.token_expiry_date_format,
2832            token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
2833            token_refresh_endpoint=model.token_refresh_endpoint,
2834            config=config,
2835            parameters=model.parameters or {},
2836            message_repository=self._message_repository,
2837            profile_assertion=profile_assertion,
2838            use_profile_assertion=model.use_profile_assertion,
2839        )
2840
2841    def create_offset_increment(
2842        self,
2843        model: OffsetIncrementModel,
2844        config: Config,
2845        decoder: Decoder,
2846        extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None,
2847        **kwargs: Any,
2848    ) -> OffsetIncrement:
2849        if isinstance(decoder, PaginationDecoderDecorator):
2850            inner_decoder = decoder.decoder
2851        else:
2852            inner_decoder = decoder
2853            decoder = PaginationDecoderDecorator(decoder=decoder)
2854
2855        if self._is_supported_decoder_for_pagination(inner_decoder):
2856            decoder_to_use = decoder
2857        else:
2858            raise ValueError(
2859                self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
2860            )
2861
2862        # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever)
2863        # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the
2864        # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing
2865        # behavior by having two separate extractors with identical behavior since they use the same extractor model.
2866        # When we have more time to investigate we can look into reusing the same component.
2867        extractor = (
2868            self._create_component_from_model(
2869                model=extractor_model, config=config, decoder=decoder_to_use
2870            )
2871            if extractor_model
2872            else None
2873        )
2874
2875        return OffsetIncrement(
2876            page_size=model.page_size,
2877            config=config,
2878            decoder=decoder_to_use,
2879            extractor=extractor,
2880            inject_on_first_request=model.inject_on_first_request or False,
2881            parameters=model.parameters or {},
2882        )
2883
2884    @staticmethod
2885    def create_page_increment(
2886        model: PageIncrementModel, config: Config, **kwargs: Any
2887    ) -> PageIncrement:
2888        return PageIncrement(
2889            page_size=model.page_size,
2890            config=config,
2891            start_from_page=model.start_from_page or 0,
2892            inject_on_first_request=model.inject_on_first_request or False,
2893            parameters=model.parameters or {},
2894        )
2895
2896    def create_parent_stream_config(
2897        self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2898    ) -> ParentStreamConfig:
2899        declarative_stream = self._create_component_from_model(
2900            model.stream, config=config, **kwargs
2901        )
2902        request_option = (
2903            self._create_component_from_model(model.request_option, config=config)
2904            if model.request_option
2905            else None
2906        )
2907
2908        if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer):
2909            raise ValueError(
2910                "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed."
2911            )
2912
2913        model_lazy_read_pointer: List[Union[InterpolatedString, str]] = (
2914            [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else []
2915        )
2916
2917        return ParentStreamConfig(
2918            parent_key=model.parent_key,
2919            request_option=request_option,
2920            stream=declarative_stream,
2921            partition_field=model.partition_field,
2922            config=config,
2923            incremental_dependency=model.incremental_dependency or False,
2924            parameters=model.parameters or {},
2925            extra_fields=model.extra_fields,
2926            lazy_read_pointer=model_lazy_read_pointer,
2927        )
2928
2929    def create_properties_from_endpoint(
2930        self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any
2931    ) -> PropertiesFromEndpoint:
2932        retriever = self._create_component_from_model(
2933            model=model.retriever,
2934            config=config,
2935            name="dynamic_properties",
2936            primary_key=None,
2937            stream_slicer=None,
2938            transformations=[],
2939            use_cache=True,  # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different
2940        )
2941        return PropertiesFromEndpoint(
2942            property_field_path=model.property_field_path,
2943            retriever=retriever,
2944            config=config,
2945            parameters=model.parameters or {},
2946        )
2947
2948    def create_property_chunking(
2949        self, model: PropertyChunkingModel, config: Config, **kwargs: Any
2950    ) -> PropertyChunking:
2951        record_merge_strategy = (
2952            self._create_component_from_model(
2953                model=model.record_merge_strategy, config=config, **kwargs
2954            )
2955            if model.record_merge_strategy
2956            else None
2957        )
2958
2959        property_limit_type: PropertyLimitType
2960        match model.property_limit_type:
2961            case PropertyLimitTypeModel.property_count:
2962                property_limit_type = PropertyLimitType.property_count
2963            case PropertyLimitTypeModel.characters:
2964                property_limit_type = PropertyLimitType.characters
2965            case _:
2966                raise ValueError(f"Invalid PropertyLimitType {property_limit_type}")
2967
2968        return PropertyChunking(
2969            property_limit_type=property_limit_type,
2970            property_limit=model.property_limit,
2971            record_merge_strategy=record_merge_strategy,
2972            config=config,
2973            parameters=model.parameters or {},
2974        )
2975
2976    def create_query_properties(
2977        self, model: QueryPropertiesModel, config: Config, **kwargs: Any
2978    ) -> QueryProperties:
2979        if isinstance(model.property_list, list):
2980            property_list = model.property_list
2981        else:
2982            property_list = self._create_component_from_model(
2983                model=model.property_list, config=config, **kwargs
2984            )
2985
2986        property_chunking = (
2987            self._create_component_from_model(
2988                model=model.property_chunking, config=config, **kwargs
2989            )
2990            if model.property_chunking
2991            else None
2992        )
2993
2994        return QueryProperties(
2995            property_list=property_list,
2996            always_include_properties=model.always_include_properties,
2997            property_chunking=property_chunking,
2998            config=config,
2999            parameters=model.parameters or {},
3000        )
3001
3002    @staticmethod
3003    def create_record_filter(
3004        model: RecordFilterModel, config: Config, **kwargs: Any
3005    ) -> RecordFilter:
3006        return RecordFilter(
3007            condition=model.condition or "", config=config, parameters=model.parameters or {}
3008        )
3009
3010    @staticmethod
3011    def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath:
3012        return RequestPath(parameters={})
3013
3014    @staticmethod
3015    def create_request_option(
3016        model: RequestOptionModel, config: Config, **kwargs: Any
3017    ) -> RequestOption:
3018        inject_into = RequestOptionType(model.inject_into.value)
3019        field_path: Optional[List[Union[InterpolatedString, str]]] = (
3020            [
3021                InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
3022                for segment in model.field_path
3023            ]
3024            if model.field_path
3025            else None
3026        )
3027        field_name = (
3028            InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
3029            if model.field_name
3030            else None
3031        )
3032        return RequestOption(
3033            field_name=field_name,
3034            field_path=field_path,
3035            inject_into=inject_into,
3036            parameters=kwargs.get("parameters", {}),
3037        )
3038
3039    def create_record_selector(
3040        self,
3041        model: RecordSelectorModel,
3042        config: Config,
3043        *,
3044        name: str,
3045        transformations: List[RecordTransformation] | None = None,
3046        decoder: Decoder | None = None,
3047        client_side_incremental_sync: Dict[str, Any] | None = None,
3048        file_uploader: Optional[DefaultFileUploader] = None,
3049        **kwargs: Any,
3050    ) -> RecordSelector:
3051        extractor = self._create_component_from_model(
3052            model=model.extractor, decoder=decoder, config=config
3053        )
3054        record_filter = (
3055            self._create_component_from_model(model.record_filter, config=config)
3056            if model.record_filter
3057            else None
3058        )
3059
3060        transform_before_filtering = (
3061            False if model.transform_before_filtering is None else model.transform_before_filtering
3062        )
3063        if client_side_incremental_sync:
3064            record_filter = ClientSideIncrementalRecordFilterDecorator(
3065                config=config,
3066                parameters=model.parameters,
3067                condition=model.record_filter.condition
3068                if (model.record_filter and hasattr(model.record_filter, "condition"))
3069                else None,
3070                **client_side_incremental_sync,
3071            )
3072            transform_before_filtering = (
3073                True
3074                if model.transform_before_filtering is None
3075                else model.transform_before_filtering
3076            )
3077
3078        if model.schema_normalization is None:
3079            # default to no schema normalization if not set
3080            model.schema_normalization = SchemaNormalizationModel.None_
3081
3082        schema_normalization = (
3083            TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
3084            if isinstance(model.schema_normalization, SchemaNormalizationModel)
3085            else self._create_component_from_model(model.schema_normalization, config=config)  # type: ignore[arg-type] # custom normalization model expected here
3086        )
3087
3088        return RecordSelector(
3089            extractor=extractor,
3090            name=name,
3091            config=config,
3092            record_filter=record_filter,
3093            transformations=transformations or [],
3094            file_uploader=file_uploader,
3095            schema_normalization=schema_normalization,
3096            parameters=model.parameters or {},
3097            transform_before_filtering=transform_before_filtering,
3098        )
3099
3100    @staticmethod
3101    def create_remove_fields(
3102        model: RemoveFieldsModel, config: Config, **kwargs: Any
3103    ) -> RemoveFields:
3104        return RemoveFields(
3105            field_pointers=model.field_pointers, condition=model.condition or "", parameters={}
3106        )
3107
3108    def create_selective_authenticator(
3109        self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any
3110    ) -> DeclarativeAuthenticator:
3111        authenticators = {
3112            name: self._create_component_from_model(model=auth, config=config)
3113            for name, auth in model.authenticators.items()
3114        }
3115        # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error
3116        return SelectiveAuthenticator(  # type: ignore[abstract]
3117            config=config,
3118            authenticators=authenticators,
3119            authenticator_selection_path=model.authenticator_selection_path,
3120            **kwargs,
3121        )
3122
3123    @staticmethod
3124    def create_legacy_session_token_authenticator(
3125        model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any
3126    ) -> LegacySessionTokenAuthenticator:
3127        return LegacySessionTokenAuthenticator(
3128            api_url=url_base,
3129            header=model.header,
3130            login_url=model.login_url,
3131            password=model.password or "",
3132            session_token=model.session_token or "",
3133            session_token_response_key=model.session_token_response_key or "",
3134            username=model.username or "",
3135            validate_session_url=model.validate_session_url,
3136            config=config,
3137            parameters=model.parameters or {},
3138        )
3139
3140    def create_simple_retriever(
3141        self,
3142        model: SimpleRetrieverModel,
3143        config: Config,
3144        *,
3145        name: str,
3146        primary_key: Optional[Union[str, List[str], List[List[str]]]],
3147        stream_slicer: Optional[StreamSlicer],
3148        request_options_provider: Optional[RequestOptionsProvider] = None,
3149        stop_condition_on_cursor: bool = False,
3150        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
3151        transformations: List[RecordTransformation],
3152        file_uploader: Optional[DefaultFileUploader] = None,
3153        incremental_sync: Optional[
3154            Union[
3155                IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
3156            ]
3157        ] = None,
3158        use_cache: Optional[bool] = None,
3159        log_formatter: Optional[Callable[[Response], Any]] = None,
3160        **kwargs: Any,
3161    ) -> SimpleRetriever:
3162        def _get_url() -> str:
3163            """
3164            Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever.
3165            This is needed because the URL is not set until the requester is created.
3166            """
3167
3168            _url: str = (
3169                model.requester.url
3170                if hasattr(model.requester, "url") and model.requester.url is not None
3171                else requester.get_url()
3172            )
3173            _url_base: str = (
3174                model.requester.url_base
3175                if hasattr(model.requester, "url_base") and model.requester.url_base is not None
3176                else requester.get_url_base()
3177            )
3178
3179            return _url or _url_base
3180
3181        decoder = (
3182            self._create_component_from_model(model=model.decoder, config=config)
3183            if model.decoder
3184            else JsonDecoder(parameters={})
3185        )
3186        record_selector = self._create_component_from_model(
3187            model=model.record_selector,
3188            name=name,
3189            config=config,
3190            decoder=decoder,
3191            transformations=transformations,
3192            client_side_incremental_sync=client_side_incremental_sync,
3193            file_uploader=file_uploader,
3194        )
3195
3196        query_properties: Optional[QueryProperties] = None
3197        query_properties_key: Optional[str] = None
3198        if self._query_properties_in_request_parameters(model.requester):
3199            # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple
3200            # places instead of default to request_parameters which isn't clearly documented
3201            if (
3202                hasattr(model.requester, "fetch_properties_from_endpoint")
3203                and model.requester.fetch_properties_from_endpoint
3204            ):
3205                raise ValueError(
3206                    f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters"
3207                )
3208
3209            query_properties_definitions = []
3210            for key, request_parameter in model.requester.request_parameters.items():  # type: ignore # request_parameters is already validated to be a Mapping using _query_properties_in_request_parameters()
3211                if isinstance(request_parameter, QueryPropertiesModel):
3212                    query_properties_key = key
3213                    query_properties_definitions.append(request_parameter)
3214
3215            if len(query_properties_definitions) > 1:
3216                raise ValueError(
3217                    f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages"
3218                )
3219
3220            if len(query_properties_definitions) == 1:
3221                query_properties = self._create_component_from_model(
3222                    model=query_properties_definitions[0], config=config
3223                )
3224        elif (
3225            hasattr(model.requester, "fetch_properties_from_endpoint")
3226            and model.requester.fetch_properties_from_endpoint
3227        ):
3228            # todo: Deprecate this condition once dependent connectors migrate to query_properties
3229            query_properties_definition = QueryPropertiesModel(
3230                type="QueryProperties",
3231                property_list=model.requester.fetch_properties_from_endpoint,
3232                always_include_properties=None,
3233                property_chunking=None,
3234            )  # type: ignore # $parameters has a default value
3235
3236            query_properties = self.create_query_properties(
3237                model=query_properties_definition,
3238                config=config,
3239            )
3240        elif hasattr(model.requester, "query_properties") and model.requester.query_properties:
3241            query_properties = self.create_query_properties(
3242                model=model.requester.query_properties,
3243                config=config,
3244            )
3245
3246        requester = self._create_component_from_model(
3247            model=model.requester,
3248            decoder=decoder,
3249            name=name,
3250            query_properties_key=query_properties_key,
3251            use_cache=use_cache,
3252            config=config,
3253        )
3254
3255        # Define cursor only if per partition or common incremental support is needed
3256        cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
3257
3258        if (
3259            not isinstance(stream_slicer, DatetimeBasedCursor)
3260            or type(stream_slicer) is not DatetimeBasedCursor
3261        ):
3262            # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
3263            # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
3264            # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
3265            # request_options_provider
3266            request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
3267        elif not request_options_provider:
3268            request_options_provider = DefaultRequestOptionsProvider(parameters={})
3269
3270        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3271        if self._should_limit_slices_fetched():
3272            stream_slicer = cast(
3273                StreamSlicer,
3274                StreamSlicerTestReadDecorator(
3275                    wrapped_slicer=stream_slicer,
3276                    maximum_number_of_slices=self._limit_slices_fetched or 5,
3277                ),
3278            )
3279
3280        cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None
3281        paginator = (
3282            self._create_component_from_model(
3283                model=model.paginator,
3284                config=config,
3285                url_base=_get_url(),
3286                extractor_model=model.record_selector.extractor,
3287                decoder=decoder,
3288                cursor_used_for_stop_condition=cursor_used_for_stop_condition,
3289            )
3290            if model.paginator
3291            else NoPagination(parameters={})
3292        )
3293
3294        ignore_stream_slicer_parameters_on_paginated_requests = (
3295            model.ignore_stream_slicer_parameters_on_paginated_requests or False
3296        )
3297
3298        if (
3299            model.partition_router
3300            and isinstance(model.partition_router, SubstreamPartitionRouterModel)
3301            and not bool(self._connector_state_manager.get_stream_state(name, None))
3302            and any(
3303                parent_stream_config.lazy_read_pointer
3304                for parent_stream_config in model.partition_router.parent_stream_configs
3305            )
3306        ):
3307            if incremental_sync:
3308                if incremental_sync.type != "DatetimeBasedCursor":
3309                    raise ValueError(
3310                        f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}."
3311                    )
3312
3313                elif incremental_sync.step or incremental_sync.cursor_granularity:
3314                    raise ValueError(
3315                        f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}."
3316                    )
3317
3318            if model.decoder and model.decoder.type != "JsonDecoder":
3319                raise ValueError(
3320                    f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}."
3321                )
3322
3323            return LazySimpleRetriever(
3324                name=name,
3325                paginator=paginator,
3326                primary_key=primary_key,
3327                requester=requester,
3328                record_selector=record_selector,
3329                stream_slicer=stream_slicer,
3330                request_option_provider=request_options_provider,
3331                cursor=cursor,
3332                config=config,
3333                ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3334                parameters=model.parameters or {},
3335            )
3336
3337        return SimpleRetriever(
3338            name=name,
3339            paginator=paginator,
3340            primary_key=primary_key,
3341            requester=requester,
3342            record_selector=record_selector,
3343            stream_slicer=stream_slicer,
3344            request_option_provider=request_options_provider,
3345            cursor=cursor,
3346            config=config,
3347            ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3348            additional_query_properties=query_properties,
3349            log_formatter=self._get_log_formatter(log_formatter, name),
3350            parameters=model.parameters or {},
3351        )
3352
3353    def _get_log_formatter(
3354        self, log_formatter: Callable[[Response], Any] | None, name: str
3355    ) -> Callable[[Response], Any] | None:
3356        if self._should_limit_slices_fetched():
3357            return (
3358                (
3359                    lambda response: format_http_message(
3360                        response,
3361                        f"Stream '{name}' request",
3362                        f"Request performed in order to extract records for stream '{name}'",
3363                        name,
3364                    )
3365                )
3366                if not log_formatter
3367                else log_formatter
3368            )
3369        return None
3370
3371    def _should_limit_slices_fetched(self) -> bool:
3372        """
3373        Returns True if the number of slices fetched should be limited, False otherwise.
3374        This is used to limit the number of slices fetched during tests.
3375        """
3376        return bool(self._limit_slices_fetched or self._emit_connector_builder_messages)
3377
3378    @staticmethod
3379    def _query_properties_in_request_parameters(
3380        requester: Union[HttpRequesterModel, CustomRequesterModel],
3381    ) -> bool:
3382        if not hasattr(requester, "request_parameters"):
3383            return False
3384        request_parameters = requester.request_parameters
3385        if request_parameters and isinstance(request_parameters, Mapping):
3386            for request_parameter in request_parameters.values():
3387                if isinstance(request_parameter, QueryPropertiesModel):
3388                    return True
3389        return False
3390
3391    @staticmethod
3392    def _remove_query_properties(
3393        request_parameters: Mapping[str, Union[str, QueryPropertiesModel]],
3394    ) -> Mapping[str, str]:
3395        return {
3396            parameter_field: request_parameter
3397            for parameter_field, request_parameter in request_parameters.items()
3398            if not isinstance(request_parameter, QueryPropertiesModel)
3399        }
3400
3401    def create_state_delegating_stream(
3402        self,
3403        model: StateDelegatingStreamModel,
3404        config: Config,
3405        has_parent_state: Optional[bool] = None,
3406        **kwargs: Any,
3407    ) -> DeclarativeStream:
3408        if (
3409            model.full_refresh_stream.name != model.name
3410            or model.name != model.incremental_stream.name
3411        ):
3412            raise ValueError(
3413                f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
3414            )
3415
3416        stream_model = (
3417            model.incremental_stream
3418            if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
3419            else model.full_refresh_stream
3420        )
3421
3422        return self._create_component_from_model(stream_model, config=config, **kwargs)  # type: ignore[no-any-return]  # Will be created DeclarativeStream as stream_model is stream description
3423
3424    def _create_async_job_status_mapping(
3425        self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
3426    ) -> Mapping[str, AsyncJobStatus]:
3427        api_status_to_cdk_status = {}
3428        for cdk_status, api_statuses in model.dict().items():
3429            if cdk_status == "type":
3430                # This is an element of the dict because of the typing of the CDK but it is not a CDK status
3431                continue
3432
3433            for status in api_statuses:
3434                if status in api_status_to_cdk_status:
3435                    raise ValueError(
3436                        f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once"
3437                    )
3438                api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status)
3439        return api_status_to_cdk_status
3440
3441    def _get_async_job_status(self, status: str) -> AsyncJobStatus:
3442        match status:
3443            case "running":
3444                return AsyncJobStatus.RUNNING
3445            case "completed":
3446                return AsyncJobStatus.COMPLETED
3447            case "failed":
3448                return AsyncJobStatus.FAILED
3449            case "timeout":
3450                return AsyncJobStatus.TIMED_OUT
3451            case _:
3452                raise ValueError(f"Unsupported CDK status {status}")
3453
3454    def create_async_retriever(
3455        self,
3456        model: AsyncRetrieverModel,
3457        config: Config,
3458        *,
3459        name: str,
3460        primary_key: Optional[
3461            Union[str, List[str], List[List[str]]]
3462        ],  # this seems to be needed to match create_simple_retriever
3463        stream_slicer: Optional[StreamSlicer],
3464        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
3465        transformations: List[RecordTransformation],
3466        **kwargs: Any,
3467    ) -> AsyncRetriever:
3468        def _get_download_retriever() -> SimpleRetriever:
3469            # We create a record selector for the download retriever
3470            # with no schema normalization and no transformations, neither record filter
3471            # as all this occurs in the record_selector of the AsyncRetriever
3472            record_selector = RecordSelector(
3473                extractor=download_extractor,
3474                name=name,
3475                record_filter=None,
3476                transformations=[],
3477                schema_normalization=TypeTransformer(TransformConfig.NoTransform),
3478                config=config,
3479                parameters={},
3480            )
3481            paginator = (
3482                self._create_component_from_model(
3483                    model=model.download_paginator,
3484                    decoder=decoder,
3485                    config=config,
3486                    url_base="",
3487                )
3488                if model.download_paginator
3489                else NoPagination(parameters={})
3490            )
3491
3492            return SimpleRetriever(
3493                requester=download_requester,
3494                record_selector=record_selector,
3495                primary_key=None,
3496                name=job_download_components_name,
3497                paginator=paginator,
3498                config=config,
3499                parameters={},
3500            )
3501
3502        def _get_job_timeout() -> datetime.timedelta:
3503            user_defined_timeout: Optional[int] = (
3504                int(
3505                    InterpolatedString.create(
3506                        str(model.polling_job_timeout),
3507                        parameters={},
3508                    ).eval(config)
3509                )
3510                if model.polling_job_timeout
3511                else None
3512            )
3513
3514            # check for user defined timeout during the test read or 15 minutes
3515            test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15)
3516            # default value for non-connector builder is 60 minutes.
3517            default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60)
3518
3519            return (
3520                test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout
3521            )
3522
3523        decoder = (
3524            self._create_component_from_model(model=model.decoder, config=config)
3525            if model.decoder
3526            else JsonDecoder(parameters={})
3527        )
3528        record_selector = self._create_component_from_model(
3529            model=model.record_selector,
3530            config=config,
3531            decoder=decoder,
3532            name=name,
3533            transformations=transformations,
3534            client_side_incremental_sync=client_side_incremental_sync,
3535        )
3536
3537        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3538        if self._should_limit_slices_fetched():
3539            stream_slicer = cast(
3540                StreamSlicer,
3541                StreamSlicerTestReadDecorator(
3542                    wrapped_slicer=stream_slicer,
3543                    maximum_number_of_slices=self._limit_slices_fetched or 5,
3544                ),
3545            )
3546
3547        creation_requester = self._create_component_from_model(
3548            model=model.creation_requester,
3549            decoder=decoder,
3550            config=config,
3551            name=f"job creation - {name}",
3552        )
3553        polling_requester = self._create_component_from_model(
3554            model=model.polling_requester,
3555            decoder=decoder,
3556            config=config,
3557            name=f"job polling - {name}",
3558        )
3559        job_download_components_name = f"job download - {name}"
3560        download_decoder = (
3561            self._create_component_from_model(model=model.download_decoder, config=config)
3562            if model.download_decoder
3563            else JsonDecoder(parameters={})
3564        )
3565        download_extractor = (
3566            self._create_component_from_model(
3567                model=model.download_extractor,
3568                config=config,
3569                decoder=download_decoder,
3570                parameters=model.parameters,
3571            )
3572            if model.download_extractor
3573            else DpathExtractor(
3574                [],
3575                config=config,
3576                decoder=download_decoder,
3577                parameters=model.parameters or {},
3578            )
3579        )
3580        download_requester = self._create_component_from_model(
3581            model=model.download_requester,
3582            decoder=download_decoder,
3583            config=config,
3584            name=job_download_components_name,
3585        )
3586        download_retriever = _get_download_retriever()
3587        abort_requester = (
3588            self._create_component_from_model(
3589                model=model.abort_requester,
3590                decoder=decoder,
3591                config=config,
3592                name=f"job abort - {name}",
3593            )
3594            if model.abort_requester
3595            else None
3596        )
3597        delete_requester = (
3598            self._create_component_from_model(
3599                model=model.delete_requester,
3600                decoder=decoder,
3601                config=config,
3602                name=f"job delete - {name}",
3603            )
3604            if model.delete_requester
3605            else None
3606        )
3607        download_target_requester = (
3608            self._create_component_from_model(
3609                model=model.download_target_requester,
3610                decoder=decoder,
3611                config=config,
3612                name=f"job extract_url - {name}",
3613            )
3614            if model.download_target_requester
3615            else None
3616        )
3617        status_extractor = self._create_component_from_model(
3618            model=model.status_extractor, decoder=decoder, config=config, name=name
3619        )
3620        download_target_extractor = self._create_component_from_model(
3621            model=model.download_target_extractor,
3622            decoder=decoder,
3623            config=config,
3624            name=name,
3625        )
3626
3627        job_repository: AsyncJobRepository = AsyncHttpJobRepository(
3628            creation_requester=creation_requester,
3629            polling_requester=polling_requester,
3630            download_retriever=download_retriever,
3631            download_target_requester=download_target_requester,
3632            abort_requester=abort_requester,
3633            delete_requester=delete_requester,
3634            status_extractor=status_extractor,
3635            status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
3636            download_target_extractor=download_target_extractor,
3637            job_timeout=_get_job_timeout(),
3638        )
3639
3640        async_job_partition_router = AsyncJobPartitionRouter(
3641            job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
3642                job_repository,
3643                stream_slices,
3644                self._job_tracker,
3645                self._message_repository,
3646                # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
3647                has_bulk_parent=False,
3648                # set the `job_max_retry` to 1 for the `Connector Builder`` use-case.
3649                # `None` == default retry is set to 3 attempts, under the hood.
3650                job_max_retry=1 if self._emit_connector_builder_messages else None,
3651            ),
3652            stream_slicer=stream_slicer,
3653            config=config,
3654            parameters=model.parameters or {},
3655        )
3656
3657        return AsyncRetriever(
3658            record_selector=record_selector,
3659            stream_slicer=async_job_partition_router,
3660            config=config,
3661            parameters=model.parameters or {},
3662        )
3663
3664    def create_spec(self, model: SpecModel, config: Config, **kwargs: Any) -> Spec:
3665        config_migrations = [
3666            self._create_component_from_model(migration, config)
3667            for migration in (
3668                model.config_normalization_rules.config_migrations
3669                if (
3670                    model.config_normalization_rules
3671                    and model.config_normalization_rules.config_migrations
3672                )
3673                else []
3674            )
3675        ]
3676        config_transformations = [
3677            self._create_component_from_model(transformation, config)
3678            for transformation in (
3679                model.config_normalization_rules.transformations
3680                if (
3681                    model.config_normalization_rules
3682                    and model.config_normalization_rules.transformations
3683                )
3684                else []
3685            )
3686        ]
3687        config_validations = [
3688            self._create_component_from_model(validation, config)
3689            for validation in (
3690                model.config_normalization_rules.validations
3691                if (
3692                    model.config_normalization_rules
3693                    and model.config_normalization_rules.validations
3694                )
3695                else []
3696            )
3697        ]
3698
3699        return Spec(
3700            connection_specification=model.connection_specification,
3701            documentation_url=model.documentation_url,
3702            advanced_auth=model.advanced_auth,
3703            parameters={},
3704            config_migrations=config_migrations,
3705            config_transformations=config_transformations,
3706            config_validations=config_validations,
3707        )
3708
3709    def create_substream_partition_router(
3710        self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any
3711    ) -> SubstreamPartitionRouter:
3712        parent_stream_configs = []
3713        if model.parent_stream_configs:
3714            parent_stream_configs.extend(
3715                [
3716                    self._create_message_repository_substream_wrapper(
3717                        model=parent_stream_config, config=config, **kwargs
3718                    )
3719                    for parent_stream_config in model.parent_stream_configs
3720                ]
3721            )
3722
3723        return SubstreamPartitionRouter(
3724            parent_stream_configs=parent_stream_configs,
3725            parameters=model.parameters or {},
3726            config=config,
3727        )
3728
3729    def _create_message_repository_substream_wrapper(
3730        self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
3731    ) -> Any:
3732        substream_factory = ModelToComponentFactory(
3733            limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
3734            limit_slices_fetched=self._limit_slices_fetched,
3735            emit_connector_builder_messages=self._emit_connector_builder_messages,
3736            disable_retries=self._disable_retries,
3737            disable_cache=self._disable_cache,
3738            message_repository=LogAppenderMessageRepositoryDecorator(
3739                {"airbyte_cdk": {"stream": {"is_substream": True}}, "http": {"is_auxiliary": True}},
3740                self._message_repository,
3741                self._evaluate_log_level(self._emit_connector_builder_messages),
3742            ),
3743        )
3744
3745        # This flag will be used exclusively for StateDelegatingStream when a parent stream is created
3746        has_parent_state = bool(
3747            self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
3748            if model.incremental_dependency
3749            else False
3750        )
3751        return substream_factory._create_component_from_model(
3752            model=model, config=config, has_parent_state=has_parent_state, **kwargs
3753        )
3754
3755    @staticmethod
3756    def create_wait_time_from_header(
3757        model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
3758    ) -> WaitTimeFromHeaderBackoffStrategy:
3759        return WaitTimeFromHeaderBackoffStrategy(
3760            header=model.header,
3761            parameters=model.parameters or {},
3762            config=config,
3763            regex=model.regex,
3764            max_waiting_time_in_seconds=model.max_waiting_time_in_seconds
3765            if model.max_waiting_time_in_seconds is not None
3766            else None,
3767        )
3768
3769    @staticmethod
3770    def create_wait_until_time_from_header(
3771        model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any
3772    ) -> WaitUntilTimeFromHeaderBackoffStrategy:
3773        return WaitUntilTimeFromHeaderBackoffStrategy(
3774            header=model.header,
3775            parameters=model.parameters or {},
3776            config=config,
3777            min_wait=model.min_wait,
3778            regex=model.regex,
3779        )
3780
3781    def get_message_repository(self) -> MessageRepository:
3782        return self._message_repository
3783
3784    def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level:
3785        return Level.DEBUG if emit_connector_builder_messages else Level.INFO
3786
3787    @staticmethod
3788    def create_components_mapping_definition(
3789        model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any
3790    ) -> ComponentMappingDefinition:
3791        interpolated_value = InterpolatedString.create(
3792            model.value, parameters=model.parameters or {}
3793        )
3794        field_path = [
3795            InterpolatedString.create(path, parameters=model.parameters or {})
3796            for path in model.field_path
3797        ]
3798        return ComponentMappingDefinition(
3799            field_path=field_path,  # type: ignore[arg-type] # field_path can be str and InterpolatedString
3800            value=interpolated_value,
3801            value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
3802            create_or_update=model.create_or_update,
3803            parameters=model.parameters or {},
3804        )
3805
3806    def create_http_components_resolver(
3807        self, model: HttpComponentsResolverModel, config: Config
3808    ) -> Any:
3809        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
3810        combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
3811
3812        retriever = self._create_component_from_model(
3813            model=model.retriever,
3814            config=config,
3815            name="",
3816            primary_key=None,
3817            stream_slicer=stream_slicer if stream_slicer else combined_slicers,
3818            transformations=[],
3819        )
3820
3821        components_mapping = [
3822            self._create_component_from_model(
3823                model=components_mapping_definition_model,
3824                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3825                    components_mapping_definition_model.value_type
3826                ),
3827                config=config,
3828            )
3829            for components_mapping_definition_model in model.components_mapping
3830        ]
3831
3832        return HttpComponentsResolver(
3833            retriever=retriever,
3834            config=config,
3835            components_mapping=components_mapping,
3836            parameters=model.parameters or {},
3837        )
3838
3839    @staticmethod
3840    def create_stream_config(
3841        model: StreamConfigModel, config: Config, **kwargs: Any
3842    ) -> StreamConfig:
3843        model_configs_pointer: List[Union[InterpolatedString, str]] = (
3844            [x for x in model.configs_pointer] if model.configs_pointer else []
3845        )
3846
3847        return StreamConfig(
3848            configs_pointer=model_configs_pointer,
3849            default_values=model.default_values,
3850            parameters=model.parameters or {},
3851        )
3852
3853    def create_config_components_resolver(
3854        self, model: ConfigComponentsResolverModel, config: Config
3855    ) -> Any:
3856        model_stream_configs = (
3857            model.stream_config if isinstance(model.stream_config, list) else [model.stream_config]
3858        )
3859
3860        stream_configs = [
3861            self._create_component_from_model(
3862                stream_config, config=config, parameters=model.parameters or {}
3863            )
3864            for stream_config in model_stream_configs
3865        ]
3866
3867        components_mapping = [
3868            self._create_component_from_model(
3869                model=components_mapping_definition_model,
3870                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3871                    components_mapping_definition_model.value_type
3872                ),
3873                config=config,
3874            )
3875            for components_mapping_definition_model in model.components_mapping
3876        ]
3877
3878        return ConfigComponentsResolver(
3879            stream_configs=stream_configs,
3880            config=config,
3881            components_mapping=components_mapping,
3882            parameters=model.parameters or {},
3883        )
3884
3885    def create_parametrized_components_resolver(
3886        self, model: ParametrizedComponentsResolverModel, config: Config
3887    ) -> ParametrizedComponentsResolver:
3888        stream_parameters = StreamParametersDefinition(
3889            list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream
3890        )
3891        components_mapping = [
3892            self._create_component_from_model(
3893                model=components_mapping_definition_model,
3894                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3895                    components_mapping_definition_model.value_type
3896                ),
3897                config=config,
3898            )
3899            for components_mapping_definition_model in model.components_mapping
3900        ]
3901        return ParametrizedComponentsResolver(
3902            stream_parameters=stream_parameters,
3903            config=config,
3904            components_mapping=components_mapping,
3905            parameters=model.parameters or {},
3906        )
3907
3908    _UNSUPPORTED_DECODER_ERROR = (
3909        "Specified decoder of {decoder_type} is not supported for pagination."
3910        "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead."
3911        "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`."
3912    )
3913
3914    def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool:
3915        if isinstance(decoder, (JsonDecoder, XmlDecoder)):
3916            return True
3917        elif isinstance(decoder, CompositeRawDecoder):
3918            return self._is_supported_parser_for_pagination(decoder.parser)
3919        else:
3920            return False
3921
3922    def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
3923        if isinstance(parser, JsonParser):
3924            return True
3925        elif isinstance(parser, GzipParser):
3926            return isinstance(parser.inner_parser, JsonParser)
3927        else:
3928            return False
3929
3930    def create_http_api_budget(
3931        self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
3932    ) -> HttpAPIBudget:
3933        policies = [
3934            self._create_component_from_model(model=policy, config=config)
3935            for policy in model.policies
3936        ]
3937
3938        return HttpAPIBudget(
3939            policies=policies,
3940            ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
3941            ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
3942            status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
3943        )
3944
3945    def create_fixed_window_call_rate_policy(
3946        self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
3947    ) -> FixedWindowCallRatePolicy:
3948        matchers = [
3949            self._create_component_from_model(model=matcher, config=config)
3950            for matcher in model.matchers
3951        ]
3952
3953        # Set the initial reset timestamp to 10 days from now.
3954        # This value will be updated by the first request.
3955        return FixedWindowCallRatePolicy(
3956            next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
3957            period=parse_duration(model.period),
3958            call_limit=model.call_limit,
3959            matchers=matchers,
3960        )
3961
3962    def create_file_uploader(
3963        self, model: FileUploaderModel, config: Config, **kwargs: Any
3964    ) -> FileUploader:
3965        name = "File Uploader"
3966        requester = self._create_component_from_model(
3967            model=model.requester,
3968            config=config,
3969            name=name,
3970            **kwargs,
3971        )
3972        download_target_extractor = self._create_component_from_model(
3973            model=model.download_target_extractor,
3974            config=config,
3975            name=name,
3976            **kwargs,
3977        )
3978        emit_connector_builder_messages = self._emit_connector_builder_messages
3979        file_uploader = DefaultFileUploader(
3980            requester=requester,
3981            download_target_extractor=download_target_extractor,
3982            config=config,
3983            file_writer=NoopFileWriter()
3984            if emit_connector_builder_messages
3985            else LocalFileSystemFileWriter(),
3986            parameters=model.parameters or {},
3987            filename_extractor=model.filename_extractor if model.filename_extractor else None,
3988        )
3989
3990        return (
3991            ConnectorBuilderFileUploader(file_uploader)
3992            if emit_connector_builder_messages
3993            else file_uploader
3994        )
3995
3996    def create_moving_window_call_rate_policy(
3997        self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
3998    ) -> MovingWindowCallRatePolicy:
3999        rates = [
4000            self._create_component_from_model(model=rate, config=config) for rate in model.rates
4001        ]
4002        matchers = [
4003            self._create_component_from_model(model=matcher, config=config)
4004            for matcher in model.matchers
4005        ]
4006        return MovingWindowCallRatePolicy(
4007            rates=rates,
4008            matchers=matchers,
4009        )
4010
4011    def create_unlimited_call_rate_policy(
4012        self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
4013    ) -> UnlimitedCallRatePolicy:
4014        matchers = [
4015            self._create_component_from_model(model=matcher, config=config)
4016            for matcher in model.matchers
4017        ]
4018
4019        return UnlimitedCallRatePolicy(
4020            matchers=matchers,
4021        )
4022
4023    def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
4024        interpolated_limit = InterpolatedString.create(str(model.limit), parameters={})
4025        return Rate(
4026            limit=int(interpolated_limit.eval(config=config)),
4027            interval=parse_duration(model.interval),
4028        )
4029
4030    def create_http_request_matcher(
4031        self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
4032    ) -> HttpRequestRegexMatcher:
4033        return HttpRequestRegexMatcher(
4034            method=model.method,
4035            url_base=model.url_base,
4036            url_path_pattern=model.url_path_pattern,
4037            params=model.params,
4038            headers=model.headers,
4039        )
4040
4041    def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
4042        self._api_budget = self.create_component(
4043            model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
4044        )
4045
4046    def create_grouping_partition_router(
4047        self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
4048    ) -> GroupingPartitionRouter:
4049        underlying_router = self._create_component_from_model(
4050            model=model.underlying_partition_router, config=config
4051        )
4052        if model.group_size < 1:
4053            raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
4054
4055        # Request options in underlying partition routers are not supported for GroupingPartitionRouter
4056        # because they are specific to individual partitions and cannot be aggregated or handled
4057        # when grouping, potentially leading to incorrect API calls. Any request customization
4058        # should be managed at the stream level through the requester's configuration.
4059        if isinstance(underlying_router, SubstreamPartitionRouter):
4060            if any(
4061                parent_config.request_option
4062                for parent_config in underlying_router.parent_stream_configs
4063            ):
4064                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
4065
4066        if isinstance(underlying_router, ListPartitionRouter):
4067            if underlying_router.request_option:
4068                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
4069
4070        return GroupingPartitionRouter(
4071            group_size=model.group_size,
4072            underlying_partition_router=underlying_router,
4073            deduplicate=model.deduplicate if model.deduplicate is not None else True,
4074            config=config,
4075        )
ComponentDefinition = typing.Mapping[str, typing.Any]
SCHEMA_TRANSFORMER_TYPE_MAPPING = {<SchemaNormalization.None_: 'None'>: <TransformConfig.NoTransform: 1>, <SchemaNormalization.Default: 'Default'>: <TransformConfig.DefaultSchemaNormalization: 2>}
class ModelToComponentFactory:
 628class ModelToComponentFactory:
 629    EPOCH_DATETIME_FORMAT = "%s"
 630
 631    def __init__(
 632        self,
 633        limit_pages_fetched_per_slice: Optional[int] = None,
 634        limit_slices_fetched: Optional[int] = None,
 635        emit_connector_builder_messages: bool = False,
 636        disable_retries: bool = False,
 637        disable_cache: bool = False,
 638        disable_resumable_full_refresh: bool = False,
 639        message_repository: Optional[MessageRepository] = None,
 640        connector_state_manager: Optional[ConnectorStateManager] = None,
 641        max_concurrent_async_job_count: Optional[int] = None,
 642    ):
 643        self._init_mappings()
 644        self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
 645        self._limit_slices_fetched = limit_slices_fetched
 646        self._emit_connector_builder_messages = emit_connector_builder_messages
 647        self._disable_retries = disable_retries
 648        self._disable_cache = disable_cache
 649        self._disable_resumable_full_refresh = disable_resumable_full_refresh
 650        self._message_repository = message_repository or InMemoryMessageRepository(
 651            self._evaluate_log_level(emit_connector_builder_messages)
 652        )
 653        self._connector_state_manager = connector_state_manager or ConnectorStateManager()
 654        self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
 655        self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1)
 656        # placeholder for deprecation warnings
 657        self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = []
 658
 659    def _init_mappings(self) -> None:
 660        self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
 661            AddedFieldDefinitionModel: self.create_added_field_definition,
 662            AddFieldsModel: self.create_add_fields,
 663            ApiKeyAuthenticatorModel: self.create_api_key_authenticator,
 664            BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
 665            BearerAuthenticatorModel: self.create_bearer_authenticator,
 666            CheckStreamModel: self.create_check_stream,
 667            DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config,
 668            CheckDynamicStreamModel: self.create_check_dynamic_stream,
 669            CompositeErrorHandlerModel: self.create_composite_error_handler,
 670            ConcurrencyLevelModel: self.create_concurrency_level,
 671            ConfigMigrationModel: self.create_config_migration,
 672            ConfigAddFieldsModel: self.create_config_add_fields,
 673            ConfigRemapFieldModel: self.create_config_remap_field,
 674            ConfigRemoveFieldsModel: self.create_config_remove_fields,
 675            ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
 676            CsvDecoderModel: self.create_csv_decoder,
 677            CursorPaginationModel: self.create_cursor_pagination,
 678            CustomAuthenticatorModel: self.create_custom_component,
 679            CustomBackoffStrategyModel: self.create_custom_component,
 680            CustomDecoderModel: self.create_custom_component,
 681            CustomErrorHandlerModel: self.create_custom_component,
 682            CustomIncrementalSyncModel: self.create_custom_component,
 683            CustomRecordExtractorModel: self.create_custom_component,
 684            CustomRecordFilterModel: self.create_custom_component,
 685            CustomRequesterModel: self.create_custom_component,
 686            CustomRetrieverModel: self.create_custom_component,
 687            CustomSchemaLoader: self.create_custom_component,
 688            CustomSchemaNormalizationModel: self.create_custom_component,
 689            CustomStateMigration: self.create_custom_component,
 690            CustomPaginationStrategyModel: self.create_custom_component,
 691            CustomPartitionRouterModel: self.create_custom_component,
 692            CustomTransformationModel: self.create_custom_component,
 693            CustomValidationStrategyModel: self.create_custom_component,
 694            CustomConfigTransformationModel: self.create_custom_component,
 695            DatetimeBasedCursorModel: self.create_datetime_based_cursor,
 696            DeclarativeStreamModel: self.create_declarative_stream,
 697            DefaultErrorHandlerModel: self.create_default_error_handler,
 698            DefaultPaginatorModel: self.create_default_paginator,
 699            DpathExtractorModel: self.create_dpath_extractor,
 700            DpathValidatorModel: self.create_dpath_validator,
 701            ResponseToFileExtractorModel: self.create_response_to_file_extractor,
 702            ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
 703            SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
 704            GroupByKeyMergeStrategyModel: self.create_group_by_key,
 705            HttpRequesterModel: self.create_http_requester,
 706            HttpResponseFilterModel: self.create_http_response_filter,
 707            InlineSchemaLoaderModel: self.create_inline_schema_loader,
 708            JsonDecoderModel: self.create_json_decoder,
 709            JsonlDecoderModel: self.create_jsonl_decoder,
 710            GzipDecoderModel: self.create_gzip_decoder,
 711            KeysToLowerModel: self.create_keys_to_lower_transformation,
 712            KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
 713            KeysReplaceModel: self.create_keys_replace_transformation,
 714            FlattenFieldsModel: self.create_flatten_fields,
 715            DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
 716            IterableDecoderModel: self.create_iterable_decoder,
 717            IncrementingCountCursorModel: self.create_incrementing_count_cursor,
 718            XmlDecoderModel: self.create_xml_decoder,
 719            JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
 720            DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
 721            SchemaTypeIdentifierModel: self.create_schema_type_identifier,
 722            TypesMapModel: self.create_types_map,
 723            ComplexFieldTypeModel: self.create_complex_field_type,
 724            JwtAuthenticatorModel: self.create_jwt_authenticator,
 725            LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
 726            ListPartitionRouterModel: self.create_list_partition_router,
 727            MinMaxDatetimeModel: self.create_min_max_datetime,
 728            NoAuthModel: self.create_no_auth,
 729            NoPaginationModel: self.create_no_pagination,
 730            OAuthAuthenticatorModel: self.create_oauth_authenticator,
 731            OffsetIncrementModel: self.create_offset_increment,
 732            PageIncrementModel: self.create_page_increment,
 733            ParentStreamConfigModel: self.create_parent_stream_config,
 734            PredicateValidatorModel: self.create_predicate_validator,
 735            PropertiesFromEndpointModel: self.create_properties_from_endpoint,
 736            PropertyChunkingModel: self.create_property_chunking,
 737            QueryPropertiesModel: self.create_query_properties,
 738            RecordFilterModel: self.create_record_filter,
 739            RecordSelectorModel: self.create_record_selector,
 740            RemoveFieldsModel: self.create_remove_fields,
 741            RequestPathModel: self.create_request_path,
 742            RequestOptionModel: self.create_request_option,
 743            LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator,
 744            SelectiveAuthenticatorModel: self.create_selective_authenticator,
 745            SimpleRetrieverModel: self.create_simple_retriever,
 746            StateDelegatingStreamModel: self.create_state_delegating_stream,
 747            SpecModel: self.create_spec,
 748            SubstreamPartitionRouterModel: self.create_substream_partition_router,
 749            ValidateAdheresToSchemaModel: self.create_validate_adheres_to_schema,
 750            WaitTimeFromHeaderModel: self.create_wait_time_from_header,
 751            WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
 752            AsyncRetrieverModel: self.create_async_retriever,
 753            HttpComponentsResolverModel: self.create_http_components_resolver,
 754            ConfigComponentsResolverModel: self.create_config_components_resolver,
 755            ParametrizedComponentsResolverModel: self.create_parametrized_components_resolver,
 756            StreamConfigModel: self.create_stream_config,
 757            ComponentMappingDefinitionModel: self.create_components_mapping_definition,
 758            ZipfileDecoderModel: self.create_zipfile_decoder,
 759            HTTPAPIBudgetModel: self.create_http_api_budget,
 760            FileUploaderModel: self.create_file_uploader,
 761            FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
 762            MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
 763            UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
 764            RateModel: self.create_rate,
 765            HttpRequestRegexMatcherModel: self.create_http_request_matcher,
 766            GroupingPartitionRouterModel: self.create_grouping_partition_router,
 767        }
 768
 769        # Needed for the case where we need to perform a second parse on the fields of a custom component
 770        self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR}
 771
 772    def create_component(
 773        self,
 774        model_type: Type[BaseModel],
 775        component_definition: ComponentDefinition,
 776        config: Config,
 777        **kwargs: Any,
 778    ) -> Any:
 779        """
 780        Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and
 781        subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating
 782        creating declarative components from that model.
 783
 784        :param model_type: The type of declarative component that is being initialized
 785        :param component_definition: The mapping that represents a declarative component
 786        :param config: The connector config that is provided by the customer
 787        :return: The declarative component to be used at runtime
 788        """
 789
 790        component_type = component_definition.get("type")
 791        if component_definition.get("type") != model_type.__name__:
 792            raise ValueError(
 793                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
 794            )
 795
 796        declarative_component_model = model_type.parse_obj(component_definition)
 797
 798        if not isinstance(declarative_component_model, model_type):
 799            raise ValueError(
 800                f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}"
 801            )
 802
 803        return self._create_component_from_model(
 804            model=declarative_component_model, config=config, **kwargs
 805        )
 806
 807    def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any:
 808        if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR:
 809            raise ValueError(
 810                f"{model.__class__} with attributes {model} is not a valid component type"
 811            )
 812        component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__)
 813        if not component_constructor:
 814            raise ValueError(f"Could not find constructor for {model.__class__}")
 815
 816        # collect deprecation warnings for supported models.
 817        if isinstance(model, BaseModelWithDeprecations):
 818            self._collect_model_deprecations(model)
 819
 820        return component_constructor(model=model, config=config, **kwargs)
 821
 822    def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]:
 823        """
 824        Returns the deprecation warnings that were collected during the creation of components.
 825        """
 826        return self._collected_deprecation_logs
 827
 828    def _collect_model_deprecations(self, model: BaseModelWithDeprecations) -> None:
 829        """
 830        Collects deprecation logs from the given model and appends any new logs to the internal collection.
 831
 832        This method checks if the provided model has deprecation logs (identified by the presence of the DEPRECATION_LOGS_TAG attribute and a non-None `_deprecation_logs` property). It iterates through each deprecation log in the model and appends it to the `_collected_deprecation_logs` list if it has not already been collected, ensuring that duplicate logs are avoided.
 833
 834        Args:
 835            model (BaseModelWithDeprecations): The model instance from which to collect deprecation logs.
 836        """
 837        if hasattr(model, DEPRECATION_LOGS_TAG) and model._deprecation_logs is not None:
 838            for log in model._deprecation_logs:
 839                # avoid duplicates for deprecation logs observed.
 840                if log not in self._collected_deprecation_logs:
 841                    self._collected_deprecation_logs.append(log)
 842
 843    def create_config_migration(
 844        self, model: ConfigMigrationModel, config: Config
 845    ) -> ConfigMigration:
 846        transformations: List[ConfigTransformation] = [
 847            self._create_component_from_model(transformation, config)
 848            for transformation in model.transformations
 849        ]
 850
 851        return ConfigMigration(
 852            description=model.description,
 853            transformations=transformations,
 854        )
 855
 856    def create_config_add_fields(
 857        self, model: ConfigAddFieldsModel, config: Config, **kwargs: Any
 858    ) -> ConfigAddFields:
 859        fields = [self._create_component_from_model(field, config) for field in model.fields]
 860        return ConfigAddFields(
 861            fields=fields,
 862            condition=model.condition or "",
 863        )
 864
 865    @staticmethod
 866    def create_config_remove_fields(
 867        model: ConfigRemoveFieldsModel, config: Config, **kwargs: Any
 868    ) -> ConfigRemoveFields:
 869        return ConfigRemoveFields(
 870            field_pointers=model.field_pointers,
 871            condition=model.condition or "",
 872        )
 873
 874    @staticmethod
 875    def create_config_remap_field(
 876        model: ConfigRemapFieldModel, config: Config, **kwargs: Any
 877    ) -> ConfigRemapField:
 878        mapping = cast(Mapping[str, Any], model.map)
 879        return ConfigRemapField(
 880            map=mapping,
 881            field_path=model.field_path,
 882            config=config,
 883        )
 884
 885    def create_dpath_validator(self, model: DpathValidatorModel, config: Config) -> DpathValidator:
 886        strategy = self._create_component_from_model(model.validation_strategy, config)
 887
 888        return DpathValidator(
 889            field_path=model.field_path,
 890            strategy=strategy,
 891        )
 892
 893    def create_predicate_validator(
 894        self, model: PredicateValidatorModel, config: Config
 895    ) -> PredicateValidator:
 896        strategy = self._create_component_from_model(model.validation_strategy, config)
 897
 898        return PredicateValidator(
 899            value=model.value,
 900            strategy=strategy,
 901        )
 902
 903    @staticmethod
 904    def create_validate_adheres_to_schema(
 905        model: ValidateAdheresToSchemaModel, config: Config, **kwargs: Any
 906    ) -> ValidateAdheresToSchema:
 907        base_schema = cast(Mapping[str, Any], model.base_schema)
 908        return ValidateAdheresToSchema(
 909            schema=base_schema,
 910        )
 911
 912    @staticmethod
 913    def create_added_field_definition(
 914        model: AddedFieldDefinitionModel, config: Config, **kwargs: Any
 915    ) -> AddedFieldDefinition:
 916        interpolated_value = InterpolatedString.create(
 917            model.value, parameters=model.parameters or {}
 918        )
 919        return AddedFieldDefinition(
 920            path=model.path,
 921            value=interpolated_value,
 922            value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
 923            parameters=model.parameters or {},
 924        )
 925
 926    def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields:
 927        added_field_definitions = [
 928            self._create_component_from_model(
 929                model=added_field_definition_model,
 930                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
 931                    added_field_definition_model.value_type
 932                ),
 933                config=config,
 934            )
 935            for added_field_definition_model in model.fields
 936        ]
 937        return AddFields(
 938            fields=added_field_definitions,
 939            condition=model.condition or "",
 940            parameters=model.parameters or {},
 941        )
 942
 943    def create_keys_to_lower_transformation(
 944        self, model: KeysToLowerModel, config: Config, **kwargs: Any
 945    ) -> KeysToLowerTransformation:
 946        return KeysToLowerTransformation()
 947
 948    def create_keys_to_snake_transformation(
 949        self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
 950    ) -> KeysToSnakeCaseTransformation:
 951        return KeysToSnakeCaseTransformation()
 952
 953    def create_keys_replace_transformation(
 954        self, model: KeysReplaceModel, config: Config, **kwargs: Any
 955    ) -> KeysReplaceTransformation:
 956        return KeysReplaceTransformation(
 957            old=model.old, new=model.new, parameters=model.parameters or {}
 958        )
 959
 960    def create_flatten_fields(
 961        self, model: FlattenFieldsModel, config: Config, **kwargs: Any
 962    ) -> FlattenFields:
 963        return FlattenFields(
 964            flatten_lists=model.flatten_lists if model.flatten_lists is not None else True
 965        )
 966
 967    def create_dpath_flatten_fields(
 968        self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any
 969    ) -> DpathFlattenFields:
 970        model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
 971        key_transformation = (
 972            KeyTransformation(
 973                config=config,
 974                prefix=model.key_transformation.prefix,
 975                suffix=model.key_transformation.suffix,
 976                parameters=model.parameters or {},
 977            )
 978            if model.key_transformation is not None
 979            else None
 980        )
 981        return DpathFlattenFields(
 982            config=config,
 983            field_path=model_field_path,
 984            delete_origin_value=model.delete_origin_value
 985            if model.delete_origin_value is not None
 986            else False,
 987            replace_record=model.replace_record if model.replace_record is not None else False,
 988            key_transformation=key_transformation,
 989            parameters=model.parameters or {},
 990        )
 991
 992    @staticmethod
 993    def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
 994        if not value_type:
 995            return None
 996        names_to_types = {
 997            ValueType.string: str,
 998            ValueType.number: float,
 999            ValueType.integer: int,
1000            ValueType.boolean: bool,
1001        }
1002        return names_to_types[value_type]
1003
1004    def create_api_key_authenticator(
1005        self,
1006        model: ApiKeyAuthenticatorModel,
1007        config: Config,
1008        token_provider: Optional[TokenProvider] = None,
1009        **kwargs: Any,
1010    ) -> ApiKeyAuthenticator:
1011        if model.inject_into is None and model.header is None:
1012            raise ValueError(
1013                "Expected either inject_into or header to be set for ApiKeyAuthenticator"
1014            )
1015
1016        if model.inject_into is not None and model.header is not None:
1017            raise ValueError(
1018                "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option"
1019            )
1020
1021        if token_provider is not None and model.api_token != "":
1022            raise ValueError(
1023                "If token_provider is set, api_token is ignored and has to be set to empty string."
1024            )
1025
1026        request_option = (
1027            self._create_component_from_model(
1028                model.inject_into, config, parameters=model.parameters or {}
1029            )
1030            if model.inject_into
1031            else RequestOption(
1032                inject_into=RequestOptionType.header,
1033                field_name=model.header or "",
1034                parameters=model.parameters or {},
1035            )
1036        )
1037
1038        return ApiKeyAuthenticator(
1039            token_provider=(
1040                token_provider
1041                if token_provider is not None
1042                else InterpolatedStringTokenProvider(
1043                    api_token=model.api_token or "",
1044                    config=config,
1045                    parameters=model.parameters or {},
1046                )
1047            ),
1048            request_option=request_option,
1049            config=config,
1050            parameters=model.parameters or {},
1051        )
1052
1053    def create_legacy_to_per_partition_state_migration(
1054        self,
1055        model: LegacyToPerPartitionStateMigrationModel,
1056        config: Mapping[str, Any],
1057        declarative_stream: DeclarativeStreamModel,
1058    ) -> LegacyToPerPartitionStateMigration:
1059        retriever = declarative_stream.retriever
1060        if not isinstance(retriever, (SimpleRetrieverModel, AsyncRetrieverModel)):
1061            raise ValueError(
1062                f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever or AsyncRetriever. Got {type(retriever)}"
1063            )
1064        partition_router = retriever.partition_router
1065        if not isinstance(
1066            partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel)
1067        ):
1068            raise ValueError(
1069                f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}"
1070            )
1071        if not hasattr(partition_router, "parent_stream_configs"):
1072            raise ValueError(
1073                "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration."
1074            )
1075
1076        if not hasattr(declarative_stream, "incremental_sync"):
1077            raise ValueError(
1078                "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration."
1079            )
1080
1081        return LegacyToPerPartitionStateMigration(
1082            partition_router,  # type: ignore # was already checked above
1083            declarative_stream.incremental_sync,  # type: ignore # was already checked. Migration can be applied only to incremental streams.
1084            config,
1085            declarative_stream.parameters,  # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
1086        )
1087
1088    def create_session_token_authenticator(
1089        self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any
1090    ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]:
1091        decoder = (
1092            self._create_component_from_model(model=model.decoder, config=config)
1093            if model.decoder
1094            else JsonDecoder(parameters={})
1095        )
1096        login_requester = self._create_component_from_model(
1097            model=model.login_requester,
1098            config=config,
1099            name=f"{name}_login_requester",
1100            decoder=decoder,
1101        )
1102        token_provider = SessionTokenProvider(
1103            login_requester=login_requester,
1104            session_token_path=model.session_token_path,
1105            expiration_duration=parse_duration(model.expiration_duration)
1106            if model.expiration_duration
1107            else None,
1108            parameters=model.parameters or {},
1109            message_repository=self._message_repository,
1110            decoder=decoder,
1111        )
1112        if model.request_authentication.type == "Bearer":
1113            return ModelToComponentFactory.create_bearer_authenticator(
1114                BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""),  # type: ignore # $parameters has a default value
1115                config,
1116                token_provider=token_provider,
1117            )
1118        else:
1119            return self.create_api_key_authenticator(
1120                ApiKeyAuthenticatorModel(
1121                    type="ApiKeyAuthenticator",
1122                    api_token="",
1123                    inject_into=model.request_authentication.inject_into,
1124                ),  # type: ignore # $parameters and headers default to None
1125                config=config,
1126                token_provider=token_provider,
1127            )
1128
1129    @staticmethod
1130    def create_basic_http_authenticator(
1131        model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any
1132    ) -> BasicHttpAuthenticator:
1133        return BasicHttpAuthenticator(
1134            password=model.password or "",
1135            username=model.username,
1136            config=config,
1137            parameters=model.parameters or {},
1138        )
1139
1140    @staticmethod
1141    def create_bearer_authenticator(
1142        model: BearerAuthenticatorModel,
1143        config: Config,
1144        token_provider: Optional[TokenProvider] = None,
1145        **kwargs: Any,
1146    ) -> BearerAuthenticator:
1147        if token_provider is not None and model.api_token != "":
1148            raise ValueError(
1149                "If token_provider is set, api_token is ignored and has to be set to empty string."
1150            )
1151        return BearerAuthenticator(
1152            token_provider=(
1153                token_provider
1154                if token_provider is not None
1155                else InterpolatedStringTokenProvider(
1156                    api_token=model.api_token or "",
1157                    config=config,
1158                    parameters=model.parameters or {},
1159                )
1160            ),
1161            config=config,
1162            parameters=model.parameters or {},
1163        )
1164
1165    @staticmethod
1166    def create_dynamic_stream_check_config(
1167        model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any
1168    ) -> DynamicStreamCheckConfig:
1169        return DynamicStreamCheckConfig(
1170            dynamic_stream_name=model.dynamic_stream_name,
1171            stream_count=model.stream_count or 0,
1172        )
1173
1174    def create_check_stream(
1175        self, model: CheckStreamModel, config: Config, **kwargs: Any
1176    ) -> CheckStream:
1177        if model.dynamic_streams_check_configs is None and model.stream_names is None:
1178            raise ValueError(
1179                "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream"
1180            )
1181
1182        dynamic_streams_check_configs = (
1183            [
1184                self._create_component_from_model(model=dynamic_stream_check_config, config=config)
1185                for dynamic_stream_check_config in model.dynamic_streams_check_configs
1186            ]
1187            if model.dynamic_streams_check_configs
1188            else []
1189        )
1190
1191        return CheckStream(
1192            stream_names=model.stream_names or [],
1193            dynamic_streams_check_configs=dynamic_streams_check_configs,
1194            parameters={},
1195        )
1196
1197    @staticmethod
1198    def create_check_dynamic_stream(
1199        model: CheckDynamicStreamModel, config: Config, **kwargs: Any
1200    ) -> CheckDynamicStream:
1201        assert model.use_check_availability is not None  # for mypy
1202
1203        use_check_availability = model.use_check_availability
1204
1205        return CheckDynamicStream(
1206            stream_count=model.stream_count,
1207            use_check_availability=use_check_availability,
1208            parameters={},
1209        )
1210
1211    def create_composite_error_handler(
1212        self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
1213    ) -> CompositeErrorHandler:
1214        error_handlers = [
1215            self._create_component_from_model(model=error_handler_model, config=config)
1216            for error_handler_model in model.error_handlers
1217        ]
1218        return CompositeErrorHandler(
1219            error_handlers=error_handlers, parameters=model.parameters or {}
1220        )
1221
1222    @staticmethod
1223    def create_concurrency_level(
1224        model: ConcurrencyLevelModel, config: Config, **kwargs: Any
1225    ) -> ConcurrencyLevel:
1226        return ConcurrencyLevel(
1227            default_concurrency=model.default_concurrency,
1228            max_concurrency=model.max_concurrency,
1229            config=config,
1230            parameters={},
1231        )
1232
1233    @staticmethod
1234    def apply_stream_state_migrations(
1235        stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any]
1236    ) -> MutableMapping[str, Any]:
1237        if stream_state_migrations:
1238            for state_migration in stream_state_migrations:
1239                if state_migration.should_migrate(stream_state):
1240                    # The state variable is expected to be mutable but the migrate method returns an immutable mapping.
1241                    stream_state = dict(state_migration.migrate(stream_state))
1242        return stream_state
1243
1244    def create_concurrent_cursor_from_datetime_based_cursor(
1245        self,
1246        model_type: Type[BaseModel],
1247        component_definition: ComponentDefinition,
1248        stream_name: str,
1249        stream_namespace: Optional[str],
1250        config: Config,
1251        message_repository: Optional[MessageRepository] = None,
1252        runtime_lookback_window: Optional[datetime.timedelta] = None,
1253        stream_state_migrations: Optional[List[Any]] = None,
1254        **kwargs: Any,
1255    ) -> ConcurrentCursor:
1256        # Per-partition incremental streams can dynamically create child cursors which will pass their current
1257        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1258        # incoming state and connector_state_manager that is initialized when the component factory is created
1259        stream_state = (
1260            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1261            if "stream_state" not in kwargs
1262            else kwargs["stream_state"]
1263        )
1264        stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1265
1266        component_type = component_definition.get("type")
1267        if component_definition.get("type") != model_type.__name__:
1268            raise ValueError(
1269                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1270            )
1271
1272        datetime_based_cursor_model = model_type.parse_obj(component_definition)
1273
1274        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1275            raise ValueError(
1276                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1277            )
1278
1279        interpolated_cursor_field = InterpolatedString.create(
1280            datetime_based_cursor_model.cursor_field,
1281            parameters=datetime_based_cursor_model.parameters or {},
1282        )
1283        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1284
1285        interpolated_partition_field_start = InterpolatedString.create(
1286            datetime_based_cursor_model.partition_field_start or "start_time",
1287            parameters=datetime_based_cursor_model.parameters or {},
1288        )
1289        interpolated_partition_field_end = InterpolatedString.create(
1290            datetime_based_cursor_model.partition_field_end or "end_time",
1291            parameters=datetime_based_cursor_model.parameters or {},
1292        )
1293
1294        slice_boundary_fields = (
1295            interpolated_partition_field_start.eval(config=config),
1296            interpolated_partition_field_end.eval(config=config),
1297        )
1298
1299        datetime_format = datetime_based_cursor_model.datetime_format
1300
1301        cursor_granularity = (
1302            parse_duration(datetime_based_cursor_model.cursor_granularity)
1303            if datetime_based_cursor_model.cursor_granularity
1304            else None
1305        )
1306
1307        lookback_window = None
1308        interpolated_lookback_window = (
1309            InterpolatedString.create(
1310                datetime_based_cursor_model.lookback_window,
1311                parameters=datetime_based_cursor_model.parameters or {},
1312            )
1313            if datetime_based_cursor_model.lookback_window
1314            else None
1315        )
1316        if interpolated_lookback_window:
1317            evaluated_lookback_window = interpolated_lookback_window.eval(config=config)
1318            if evaluated_lookback_window:
1319                lookback_window = parse_duration(evaluated_lookback_window)
1320
1321        connector_state_converter: DateTimeStreamStateConverter
1322        connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1323            datetime_format=datetime_format,
1324            input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1325            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1326            cursor_granularity=cursor_granularity,
1327        )
1328
1329        # Adjusts the stream state by applying the runtime lookback window.
1330        # This is used to ensure correct state handling in case of failed partitions.
1331        stream_state_value = stream_state.get(cursor_field.cursor_field_key)
1332        if runtime_lookback_window and stream_state_value:
1333            new_stream_state = (
1334                connector_state_converter.parse_timestamp(stream_state_value)
1335                - runtime_lookback_window
1336            )
1337            stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
1338                new_stream_state
1339            )
1340
1341        start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
1342        if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
1343            start_date_runtime_value = self.create_min_max_datetime(
1344                model=datetime_based_cursor_model.start_datetime, config=config
1345            )
1346        else:
1347            start_date_runtime_value = datetime_based_cursor_model.start_datetime
1348
1349        end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]]
1350        if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel):
1351            end_date_runtime_value = self.create_min_max_datetime(
1352                model=datetime_based_cursor_model.end_datetime, config=config
1353            )
1354        else:
1355            end_date_runtime_value = datetime_based_cursor_model.end_datetime
1356
1357        interpolated_start_date = MinMaxDatetime.create(
1358            interpolated_string_or_min_max_datetime=start_date_runtime_value,
1359            parameters=datetime_based_cursor_model.parameters,
1360        )
1361        interpolated_end_date = (
1362            None
1363            if not end_date_runtime_value
1364            else MinMaxDatetime.create(
1365                end_date_runtime_value, datetime_based_cursor_model.parameters
1366            )
1367        )
1368
1369        # If datetime format is not specified then start/end datetime should inherit it from the stream slicer
1370        if not interpolated_start_date.datetime_format:
1371            interpolated_start_date.datetime_format = datetime_format
1372        if interpolated_end_date and not interpolated_end_date.datetime_format:
1373            interpolated_end_date.datetime_format = datetime_format
1374
1375        start_date = interpolated_start_date.get_datetime(config=config)
1376        end_date_provider = (
1377            partial(interpolated_end_date.get_datetime, config)
1378            if interpolated_end_date
1379            else connector_state_converter.get_end_provider()
1380        )
1381
1382        if (
1383            datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity
1384        ) or (
1385            not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity
1386        ):
1387            raise ValueError(
1388                f"If step is defined, cursor_granularity should be as well and vice-versa. "
1389                f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`"
1390            )
1391
1392        # When step is not defined, default to a step size from the starting date to the present moment
1393        step_length = datetime.timedelta.max
1394        interpolated_step = (
1395            InterpolatedString.create(
1396                datetime_based_cursor_model.step,
1397                parameters=datetime_based_cursor_model.parameters or {},
1398            )
1399            if datetime_based_cursor_model.step
1400            else None
1401        )
1402        if interpolated_step:
1403            evaluated_step = interpolated_step.eval(config)
1404            if evaluated_step:
1405                step_length = parse_duration(evaluated_step)
1406
1407        clamping_strategy: ClampingStrategy = NoClamping()
1408        if datetime_based_cursor_model.clamping:
1409            # While it is undesirable to interpolate within the model factory (as opposed to at runtime),
1410            # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
1411            # object which we want to keep agnostic of being low-code
1412            target = InterpolatedString(
1413                string=datetime_based_cursor_model.clamping.target,
1414                parameters=datetime_based_cursor_model.parameters or {},
1415            )
1416            evaluated_target = target.eval(config=config)
1417            match evaluated_target:
1418                case "DAY":
1419                    clamping_strategy = DayClampingStrategy()
1420                    end_date_provider = ClampingEndProvider(
1421                        DayClampingStrategy(is_ceiling=False),
1422                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1423                        granularity=cursor_granularity or datetime.timedelta(seconds=1),
1424                    )
1425                case "WEEK":
1426                    if (
1427                        not datetime_based_cursor_model.clamping.target_details
1428                        or "weekday" not in datetime_based_cursor_model.clamping.target_details
1429                    ):
1430                        raise ValueError(
1431                            "Given WEEK clamping, weekday needs to be provided as target_details"
1432                        )
1433                    weekday = self._assemble_weekday(
1434                        datetime_based_cursor_model.clamping.target_details["weekday"]
1435                    )
1436                    clamping_strategy = WeekClampingStrategy(weekday)
1437                    end_date_provider = ClampingEndProvider(
1438                        WeekClampingStrategy(weekday, is_ceiling=False),
1439                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1440                        granularity=cursor_granularity or datetime.timedelta(days=1),
1441                    )
1442                case "MONTH":
1443                    clamping_strategy = MonthClampingStrategy()
1444                    end_date_provider = ClampingEndProvider(
1445                        MonthClampingStrategy(is_ceiling=False),
1446                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1447                        granularity=cursor_granularity or datetime.timedelta(days=1),
1448                    )
1449                case _:
1450                    raise ValueError(
1451                        f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
1452                    )
1453
1454        return ConcurrentCursor(
1455            stream_name=stream_name,
1456            stream_namespace=stream_namespace,
1457            stream_state=stream_state,
1458            message_repository=message_repository or self._message_repository,
1459            connector_state_manager=self._connector_state_manager,
1460            connector_state_converter=connector_state_converter,
1461            cursor_field=cursor_field,
1462            slice_boundary_fields=slice_boundary_fields,
1463            start=start_date,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1464            end_provider=end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1465            lookback_window=lookback_window,
1466            slice_range=step_length,
1467            cursor_granularity=cursor_granularity,
1468            clamping_strategy=clamping_strategy,
1469        )
1470
1471    def create_concurrent_cursor_from_incrementing_count_cursor(
1472        self,
1473        model_type: Type[BaseModel],
1474        component_definition: ComponentDefinition,
1475        stream_name: str,
1476        stream_namespace: Optional[str],
1477        config: Config,
1478        message_repository: Optional[MessageRepository] = None,
1479        **kwargs: Any,
1480    ) -> ConcurrentCursor:
1481        # Per-partition incremental streams can dynamically create child cursors which will pass their current
1482        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1483        # incoming state and connector_state_manager that is initialized when the component factory is created
1484        stream_state = (
1485            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1486            if "stream_state" not in kwargs
1487            else kwargs["stream_state"]
1488        )
1489
1490        component_type = component_definition.get("type")
1491        if component_definition.get("type") != model_type.__name__:
1492            raise ValueError(
1493                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1494            )
1495
1496        incrementing_count_cursor_model = model_type.parse_obj(component_definition)
1497
1498        if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
1499            raise ValueError(
1500                f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
1501            )
1502
1503        interpolated_start_value = (
1504            InterpolatedString.create(
1505                incrementing_count_cursor_model.start_value,  # type: ignore
1506                parameters=incrementing_count_cursor_model.parameters or {},
1507            )
1508            if incrementing_count_cursor_model.start_value
1509            else 0
1510        )
1511
1512        interpolated_cursor_field = InterpolatedString.create(
1513            incrementing_count_cursor_model.cursor_field,
1514            parameters=incrementing_count_cursor_model.parameters or {},
1515        )
1516        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1517
1518        connector_state_converter = IncrementingCountStreamStateConverter(
1519            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1520        )
1521
1522        return ConcurrentCursor(
1523            stream_name=stream_name,
1524            stream_namespace=stream_namespace,
1525            stream_state=stream_state,
1526            message_repository=message_repository or self._message_repository,
1527            connector_state_manager=self._connector_state_manager,
1528            connector_state_converter=connector_state_converter,
1529            cursor_field=cursor_field,
1530            slice_boundary_fields=None,
1531            start=interpolated_start_value,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1532            end_provider=connector_state_converter.get_end_provider(),  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1533        )
1534
1535    def _assemble_weekday(self, weekday: str) -> Weekday:
1536        match weekday:
1537            case "MONDAY":
1538                return Weekday.MONDAY
1539            case "TUESDAY":
1540                return Weekday.TUESDAY
1541            case "WEDNESDAY":
1542                return Weekday.WEDNESDAY
1543            case "THURSDAY":
1544                return Weekday.THURSDAY
1545            case "FRIDAY":
1546                return Weekday.FRIDAY
1547            case "SATURDAY":
1548                return Weekday.SATURDAY
1549            case "SUNDAY":
1550                return Weekday.SUNDAY
1551            case _:
1552                raise ValueError(f"Unknown weekday {weekday}")
1553
1554    def create_concurrent_cursor_from_perpartition_cursor(
1555        self,
1556        state_manager: ConnectorStateManager,
1557        model_type: Type[BaseModel],
1558        component_definition: ComponentDefinition,
1559        stream_name: str,
1560        stream_namespace: Optional[str],
1561        config: Config,
1562        stream_state: MutableMapping[str, Any],
1563        partition_router: PartitionRouter,
1564        stream_state_migrations: Optional[List[Any]] = None,
1565        **kwargs: Any,
1566    ) -> ConcurrentPerPartitionCursor:
1567        component_type = component_definition.get("type")
1568        if component_definition.get("type") != model_type.__name__:
1569            raise ValueError(
1570                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1571            )
1572
1573        datetime_based_cursor_model = model_type.parse_obj(component_definition)
1574
1575        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1576            raise ValueError(
1577                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1578            )
1579
1580        interpolated_cursor_field = InterpolatedString.create(
1581            datetime_based_cursor_model.cursor_field,
1582            parameters=datetime_based_cursor_model.parameters or {},
1583        )
1584        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1585
1586        datetime_format = datetime_based_cursor_model.datetime_format
1587
1588        cursor_granularity = (
1589            parse_duration(datetime_based_cursor_model.cursor_granularity)
1590            if datetime_based_cursor_model.cursor_granularity
1591            else None
1592        )
1593
1594        connector_state_converter: DateTimeStreamStateConverter
1595        connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1596            datetime_format=datetime_format,
1597            input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1598            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1599            cursor_granularity=cursor_granularity,
1600        )
1601
1602        # Create the cursor factory
1603        cursor_factory = ConcurrentCursorFactory(
1604            partial(
1605                self.create_concurrent_cursor_from_datetime_based_cursor,
1606                state_manager=state_manager,
1607                model_type=model_type,
1608                component_definition=component_definition,
1609                stream_name=stream_name,
1610                stream_namespace=stream_namespace,
1611                config=config,
1612                message_repository=NoopMessageRepository(),
1613                stream_state_migrations=stream_state_migrations,
1614            )
1615        )
1616
1617        stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1618        # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
1619        use_global_cursor = isinstance(
1620            partition_router, GroupingPartitionRouter
1621        ) or component_definition.get("global_substream_cursor", False)
1622
1623        # Return the concurrent cursor and state converter
1624        return ConcurrentPerPartitionCursor(
1625            cursor_factory=cursor_factory,
1626            partition_router=partition_router,
1627            stream_name=stream_name,
1628            stream_namespace=stream_namespace,
1629            stream_state=stream_state,
1630            message_repository=self._message_repository,  # type: ignore
1631            connector_state_manager=state_manager,
1632            connector_state_converter=connector_state_converter,
1633            cursor_field=cursor_field,
1634            use_global_cursor=use_global_cursor,
1635        )
1636
1637    @staticmethod
1638    def create_constant_backoff_strategy(
1639        model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
1640    ) -> ConstantBackoffStrategy:
1641        return ConstantBackoffStrategy(
1642            backoff_time_in_seconds=model.backoff_time_in_seconds,
1643            config=config,
1644            parameters=model.parameters or {},
1645        )
1646
1647    def create_cursor_pagination(
1648        self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
1649    ) -> CursorPaginationStrategy:
1650        if isinstance(decoder, PaginationDecoderDecorator):
1651            inner_decoder = decoder.decoder
1652        else:
1653            inner_decoder = decoder
1654            decoder = PaginationDecoderDecorator(decoder=decoder)
1655
1656        if self._is_supported_decoder_for_pagination(inner_decoder):
1657            decoder_to_use = decoder
1658        else:
1659            raise ValueError(
1660                self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1661            )
1662
1663        return CursorPaginationStrategy(
1664            cursor_value=model.cursor_value,
1665            decoder=decoder_to_use,
1666            page_size=model.page_size,
1667            stop_condition=model.stop_condition,
1668            config=config,
1669            parameters=model.parameters or {},
1670        )
1671
1672    def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any:
1673        """
1674        Generically creates a custom component based on the model type and a class_name reference to the custom Python class being
1675        instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
1676        :param model: The Pydantic model of the custom component being created
1677        :param config: The custom defined connector config
1678        :return: The declarative component built from the Pydantic model to be used at runtime
1679        """
1680        custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
1681        component_fields = get_type_hints(custom_component_class)
1682        model_args = model.dict()
1683        model_args["config"] = config
1684
1685        # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions
1686        # we defer to these arguments over the component's definition
1687        for key, arg in kwargs.items():
1688            model_args[key] = arg
1689
1690        # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not
1691        # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to
1692        # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components
1693        for model_field, model_value in model_args.items():
1694            # If a custom component field doesn't have a type set, we try to use the type hints to infer the type
1695            if (
1696                isinstance(model_value, dict)
1697                and "type" not in model_value
1698                and model_field in component_fields
1699            ):
1700                derived_type = self._derive_component_type_from_type_hints(
1701                    component_fields.get(model_field)
1702                )
1703                if derived_type:
1704                    model_value["type"] = derived_type
1705
1706            if self._is_component(model_value):
1707                model_args[model_field] = self._create_nested_component(
1708                    model, model_field, model_value, config
1709                )
1710            elif isinstance(model_value, list):
1711                vals = []
1712                for v in model_value:
1713                    if isinstance(v, dict) and "type" not in v and model_field in component_fields:
1714                        derived_type = self._derive_component_type_from_type_hints(
1715                            component_fields.get(model_field)
1716                        )
1717                        if derived_type:
1718                            v["type"] = derived_type
1719                    if self._is_component(v):
1720                        vals.append(self._create_nested_component(model, model_field, v, config))
1721                    else:
1722                        vals.append(v)
1723                model_args[model_field] = vals
1724
1725        kwargs = {
1726            class_field: model_args[class_field]
1727            for class_field in component_fields.keys()
1728            if class_field in model_args
1729        }
1730        return custom_component_class(**kwargs)
1731
1732    @staticmethod
1733    def _get_class_from_fully_qualified_class_name(
1734        full_qualified_class_name: str,
1735    ) -> Any:
1736        """Get a class from its fully qualified name.
1737
1738        If a custom components module is needed, we assume it is already registered - probably
1739        as `source_declarative_manifest.components` or `components`.
1740
1741        Args:
1742            full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
1743
1744        Returns:
1745            Any: The class object.
1746
1747        Raises:
1748            ValueError: If the class cannot be loaded.
1749        """
1750        split = full_qualified_class_name.split(".")
1751        module_name_full = ".".join(split[:-1])
1752        class_name = split[-1]
1753
1754        try:
1755            module_ref = importlib.import_module(module_name_full)
1756        except ModuleNotFoundError as e:
1757            if split[0] == "source_declarative_manifest":
1758                # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append
1759                try:
1760                    import os
1761
1762                    module_name_with_source_declarative_manifest = ".".join(split[1:-1])
1763                    module_ref = importlib.import_module(
1764                        module_name_with_source_declarative_manifest
1765                    )
1766                except ModuleNotFoundError:
1767                    raise ValueError(f"Could not load module `{module_name_full}`.") from e
1768            else:
1769                raise ValueError(f"Could not load module `{module_name_full}`.") from e
1770
1771        try:
1772            return getattr(module_ref, class_name)
1773        except AttributeError as e:
1774            raise ValueError(
1775                f"Could not load class `{class_name}` from module `{module_name_full}`.",
1776            ) from e
1777
1778    @staticmethod
1779    def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
1780        interface = field_type
1781        while True:
1782            origin = get_origin(interface)
1783            if origin:
1784                # Unnest types until we reach the raw type
1785                # List[T] -> T
1786                # Optional[List[T]] -> T
1787                args = get_args(interface)
1788                interface = args[0]
1789            else:
1790                break
1791        if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface):
1792            return interface.__name__
1793        return None
1794
1795    @staticmethod
1796    def is_builtin_type(cls: Optional[Type[Any]]) -> bool:
1797        if not cls:
1798            return False
1799        return cls.__module__ == "builtins"
1800
1801    @staticmethod
1802    def _extract_missing_parameters(error: TypeError) -> List[str]:
1803        parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error))
1804        if parameter_search:
1805            return re.findall(r"\'(.+?)\'", parameter_search.group(1))
1806        else:
1807            return []
1808
1809    def _create_nested_component(
1810        self, model: Any, model_field: str, model_value: Any, config: Config
1811    ) -> Any:
1812        type_name = model_value.get("type", None)
1813        if not type_name:
1814            # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent
1815            return model_value
1816
1817        model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None)
1818        if model_type:
1819            parsed_model = model_type.parse_obj(model_value)
1820            try:
1821                # To improve usability of the language, certain fields are shared between components. This can come in the form of
1822                # a parent component passing some of its fields to a child component or the parent extracting fields from other child
1823                # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base
1824                # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created
1825                # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that
1826                # are needed by a component and could not be shared.
1827                model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__)
1828                constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs
1829                model_parameters = model_value.get("$parameters", {})
1830                matching_parameters = {
1831                    kwarg: model_parameters[kwarg]
1832                    for kwarg in constructor_kwargs
1833                    if kwarg in model_parameters
1834                }
1835                return self._create_component_from_model(
1836                    model=parsed_model, config=config, **matching_parameters
1837                )
1838            except TypeError as error:
1839                missing_parameters = self._extract_missing_parameters(error)
1840                if missing_parameters:
1841                    raise ValueError(
1842                        f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide "
1843                        + ", ".join(
1844                            (
1845                                f"{type_name}.$parameters.{parameter}"
1846                                for parameter in missing_parameters
1847                            )
1848                        )
1849                    )
1850                raise TypeError(
1851                    f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}"
1852                )
1853        else:
1854            raise ValueError(
1855                f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'"
1856            )
1857
1858    @staticmethod
1859    def _is_component(model_value: Any) -> bool:
1860        return isinstance(model_value, dict) and model_value.get("type") is not None
1861
1862    def create_datetime_based_cursor(
1863        self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any
1864    ) -> DatetimeBasedCursor:
1865        start_datetime: Union[str, MinMaxDatetime] = (
1866            model.start_datetime
1867            if isinstance(model.start_datetime, str)
1868            else self.create_min_max_datetime(model.start_datetime, config)
1869        )
1870        end_datetime: Union[str, MinMaxDatetime, None] = None
1871        if model.is_data_feed and model.end_datetime:
1872            raise ValueError("Data feed does not support end_datetime")
1873        if model.is_data_feed and model.is_client_side_incremental:
1874            raise ValueError(
1875                "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them."
1876            )
1877        if model.end_datetime:
1878            end_datetime = (
1879                model.end_datetime
1880                if isinstance(model.end_datetime, str)
1881                else self.create_min_max_datetime(model.end_datetime, config)
1882            )
1883
1884        end_time_option = (
1885            self._create_component_from_model(
1886                model.end_time_option, config, parameters=model.parameters or {}
1887            )
1888            if model.end_time_option
1889            else None
1890        )
1891        start_time_option = (
1892            self._create_component_from_model(
1893                model.start_time_option, config, parameters=model.parameters or {}
1894            )
1895            if model.start_time_option
1896            else None
1897        )
1898
1899        return DatetimeBasedCursor(
1900            cursor_field=model.cursor_field,
1901            cursor_datetime_formats=model.cursor_datetime_formats
1902            if model.cursor_datetime_formats
1903            else [],
1904            cursor_granularity=model.cursor_granularity,
1905            datetime_format=model.datetime_format,
1906            end_datetime=end_datetime,
1907            start_datetime=start_datetime,
1908            step=model.step,
1909            end_time_option=end_time_option,
1910            lookback_window=model.lookback_window,
1911            start_time_option=start_time_option,
1912            partition_field_end=model.partition_field_end,
1913            partition_field_start=model.partition_field_start,
1914            message_repository=self._message_repository,
1915            is_compare_strictly=model.is_compare_strictly,
1916            config=config,
1917            parameters=model.parameters or {},
1918        )
1919
1920    def create_declarative_stream(
1921        self, model: DeclarativeStreamModel, config: Config, **kwargs: Any
1922    ) -> DeclarativeStream:
1923        # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
1924        # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
1925        # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
1926        # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one.
1927        combined_slicers = self._merge_stream_slicers(model=model, config=config)
1928
1929        primary_key = model.primary_key.__root__ if model.primary_key else None
1930        stop_condition_on_cursor = (
1931            model.incremental_sync
1932            and hasattr(model.incremental_sync, "is_data_feed")
1933            and model.incremental_sync.is_data_feed
1934        )
1935        client_side_incremental_sync = None
1936        if (
1937            model.incremental_sync
1938            and hasattr(model.incremental_sync, "is_client_side_incremental")
1939            and model.incremental_sync.is_client_side_incremental
1940        ):
1941            supported_slicers = (
1942                DatetimeBasedCursor,
1943                GlobalSubstreamCursor,
1944                PerPartitionWithGlobalCursor,
1945            )
1946            if combined_slicers and not isinstance(combined_slicers, supported_slicers):
1947                raise ValueError(
1948                    "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1949                )
1950            cursor = (
1951                combined_slicers
1952                if isinstance(
1953                    combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1954                )
1955                else self._create_component_from_model(model=model.incremental_sync, config=config)
1956            )
1957
1958            client_side_incremental_sync = {"cursor": cursor}
1959
1960        if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1961            cursor_model = model.incremental_sync
1962
1963            end_time_option = (
1964                self._create_component_from_model(
1965                    cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
1966                )
1967                if cursor_model.end_time_option
1968                else None
1969            )
1970            start_time_option = (
1971                self._create_component_from_model(
1972                    cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
1973                )
1974                if cursor_model.start_time_option
1975                else None
1976            )
1977
1978            request_options_provider = DatetimeBasedRequestOptionsProvider(
1979                start_time_option=start_time_option,
1980                end_time_option=end_time_option,
1981                partition_field_start=cursor_model.partition_field_end,
1982                partition_field_end=cursor_model.partition_field_end,
1983                config=config,
1984                parameters=model.parameters or {},
1985            )
1986        elif model.incremental_sync and isinstance(
1987            model.incremental_sync, IncrementingCountCursorModel
1988        ):
1989            cursor_model: IncrementingCountCursorModel = model.incremental_sync  # type: ignore
1990
1991            start_time_option = (
1992                self._create_component_from_model(
1993                    cursor_model.start_value_option,  # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1994                    config,
1995                    parameters=cursor_model.parameters or {},
1996                )
1997                if cursor_model.start_value_option  # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1998                else None
1999            )
2000
2001            # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
2002            # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
2003            partition_field_start = "start"
2004
2005            request_options_provider = DatetimeBasedRequestOptionsProvider(
2006                start_time_option=start_time_option,
2007                partition_field_start=partition_field_start,
2008                config=config,
2009                parameters=model.parameters or {},
2010            )
2011        else:
2012            request_options_provider = None
2013
2014        transformations = []
2015        if model.transformations:
2016            for transformation_model in model.transformations:
2017                transformations.append(
2018                    self._create_component_from_model(model=transformation_model, config=config)
2019                )
2020        file_uploader = None
2021        if model.file_uploader:
2022            file_uploader = self._create_component_from_model(
2023                model=model.file_uploader, config=config
2024            )
2025
2026        retriever = self._create_component_from_model(
2027            model=model.retriever,
2028            config=config,
2029            name=model.name,
2030            primary_key=primary_key,
2031            stream_slicer=combined_slicers,
2032            request_options_provider=request_options_provider,
2033            stop_condition_on_cursor=stop_condition_on_cursor,
2034            client_side_incremental_sync=client_side_incremental_sync,
2035            transformations=transformations,
2036            file_uploader=file_uploader,
2037            incremental_sync=model.incremental_sync,
2038        )
2039        cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
2040
2041        if model.state_migrations:
2042            state_transformations = [
2043                self._create_component_from_model(state_migration, config, declarative_stream=model)
2044                for state_migration in model.state_migrations
2045            ]
2046        else:
2047            state_transformations = []
2048
2049        schema_loader: Union[
2050            CompositeSchemaLoader,
2051            DefaultSchemaLoader,
2052            DynamicSchemaLoader,
2053            InlineSchemaLoader,
2054            JsonFileSchemaLoader,
2055        ]
2056        if model.schema_loader and isinstance(model.schema_loader, list):
2057            nested_schema_loaders = [
2058                self._create_component_from_model(model=nested_schema_loader, config=config)
2059                for nested_schema_loader in model.schema_loader
2060            ]
2061            schema_loader = CompositeSchemaLoader(
2062                schema_loaders=nested_schema_loaders, parameters={}
2063            )
2064        elif model.schema_loader:
2065            schema_loader = self._create_component_from_model(
2066                model=model.schema_loader,  # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models
2067                config=config,
2068            )
2069        else:
2070            options = model.parameters or {}
2071            if "name" not in options:
2072                options["name"] = model.name
2073            schema_loader = DefaultSchemaLoader(config=config, parameters=options)
2074
2075        return DeclarativeStream(
2076            name=model.name or "",
2077            primary_key=primary_key,
2078            retriever=retriever,
2079            schema_loader=schema_loader,
2080            stream_cursor_field=cursor_field or "",
2081            state_migrations=state_transformations,
2082            config=config,
2083            parameters=model.parameters or {},
2084        )
2085
2086    def _build_stream_slicer_from_partition_router(
2087        self,
2088        model: Union[
2089            AsyncRetrieverModel,
2090            CustomRetrieverModel,
2091            SimpleRetrieverModel,
2092        ],
2093        config: Config,
2094        stream_name: Optional[str] = None,
2095    ) -> Optional[PartitionRouter]:
2096        if (
2097            hasattr(model, "partition_router")
2098            and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
2099            and model.partition_router
2100        ):
2101            stream_slicer_model = model.partition_router
2102            if isinstance(stream_slicer_model, list):
2103                return CartesianProductStreamSlicer(
2104                    [
2105                        self._create_component_from_model(
2106                            model=slicer, config=config, stream_name=stream_name or ""
2107                        )
2108                        for slicer in stream_slicer_model
2109                    ],
2110                    parameters={},
2111                )
2112            else:
2113                return self._create_component_from_model(  # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
2114                    model=stream_slicer_model, config=config, stream_name=stream_name or ""
2115                )
2116        return None
2117
2118    def _build_incremental_cursor(
2119        self,
2120        model: DeclarativeStreamModel,
2121        stream_slicer: Optional[PartitionRouter],
2122        config: Config,
2123    ) -> Optional[StreamSlicer]:
2124        if model.incremental_sync and stream_slicer:
2125            if model.retriever.type == "AsyncRetriever":
2126                stream_name = model.name or ""
2127                stream_namespace = None
2128                stream_state = self._connector_state_manager.get_stream_state(
2129                    stream_name, stream_namespace
2130                )
2131                state_transformations = (
2132                    [
2133                        self._create_component_from_model(
2134                            state_migration, config, declarative_stream=model
2135                        )
2136                        for state_migration in model.state_migrations
2137                    ]
2138                    if model.state_migrations
2139                    else []
2140                )
2141
2142                return self.create_concurrent_cursor_from_perpartition_cursor(  # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
2143                    state_manager=self._connector_state_manager,
2144                    model_type=DatetimeBasedCursorModel,
2145                    component_definition=model.incremental_sync.__dict__,
2146                    stream_name=stream_name,
2147                    stream_namespace=stream_namespace,
2148                    config=config or {},
2149                    stream_state=stream_state,
2150                    stream_state_migrations=state_transformations,
2151                    partition_router=stream_slicer,
2152                )
2153
2154            incremental_sync_model = model.incremental_sync
2155            cursor_component = self._create_component_from_model(
2156                model=incremental_sync_model, config=config
2157            )
2158            is_global_cursor = (
2159                hasattr(incremental_sync_model, "global_substream_cursor")
2160                and incremental_sync_model.global_substream_cursor
2161            )
2162
2163            if is_global_cursor:
2164                return GlobalSubstreamCursor(
2165                    stream_cursor=cursor_component, partition_router=stream_slicer
2166                )
2167            return PerPartitionWithGlobalCursor(
2168                cursor_factory=CursorFactory(
2169                    lambda: self._create_component_from_model(
2170                        model=incremental_sync_model, config=config
2171                    ),
2172                ),
2173                partition_router=stream_slicer,
2174                stream_cursor=cursor_component,
2175            )
2176        elif model.incremental_sync:
2177            if model.retriever.type == "AsyncRetriever":
2178                return self.create_concurrent_cursor_from_datetime_based_cursor(  # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
2179                    model_type=DatetimeBasedCursorModel,
2180                    component_definition=model.incremental_sync.__dict__,
2181                    stream_name=model.name or "",
2182                    stream_namespace=None,
2183                    config=config or {},
2184                    stream_state_migrations=model.state_migrations,
2185                )
2186            return self._create_component_from_model(model=model.incremental_sync, config=config)  # type: ignore[no-any-return]  # Will be created Cursor as stream_slicer_model is model.incremental_sync
2187        return None
2188
2189    def _build_resumable_cursor(
2190        self,
2191        model: Union[
2192            AsyncRetrieverModel,
2193            CustomRetrieverModel,
2194            SimpleRetrieverModel,
2195        ],
2196        stream_slicer: Optional[PartitionRouter],
2197    ) -> Optional[StreamSlicer]:
2198        if hasattr(model, "paginator") and model.paginator and not stream_slicer:
2199            # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
2200            return ResumableFullRefreshCursor(parameters={})
2201        elif stream_slicer:
2202            # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
2203            return PerPartitionCursor(
2204                cursor_factory=CursorFactory(
2205                    create_function=partial(ChildPartitionResumableFullRefreshCursor, {})
2206                ),
2207                partition_router=stream_slicer,
2208            )
2209        return None
2210
2211    def _merge_stream_slicers(
2212        self, model: DeclarativeStreamModel, config: Config
2213    ) -> Optional[StreamSlicer]:
2214        retriever_model = model.retriever
2215
2216        stream_slicer = self._build_stream_slicer_from_partition_router(
2217            retriever_model, config, stream_name=model.name
2218        )
2219
2220        if retriever_model.type == "AsyncRetriever":
2221            is_not_datetime_cursor = (
2222                model.incremental_sync.type != "DatetimeBasedCursor"
2223                if model.incremental_sync
2224                else None
2225            )
2226            is_partition_router = (
2227                bool(retriever_model.partition_router) if model.incremental_sync else None
2228            )
2229
2230            if is_not_datetime_cursor:
2231                # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
2232                # support or unordered slices (for example, when we trigger reports for January and February, the report
2233                # in February can be completed first). Once we have support for custom concurrent cursor or have a new
2234                # implementation available in the CDK, we can enable more cursors here.
2235                raise ValueError(
2236                    "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
2237                )
2238
2239            if is_partition_router and not stream_slicer:
2240                # Note that this development is also done in parallel to the per partition development which once merged
2241                # we could support here by calling create_concurrent_cursor_from_perpartition_cursor
2242                raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
2243
2244        if model.incremental_sync:
2245            return self._build_incremental_cursor(model, stream_slicer, config)
2246
2247        return (
2248            stream_slicer
2249            if self._disable_resumable_full_refresh
2250            else self._build_resumable_cursor(retriever_model, stream_slicer)
2251        )
2252
2253    def create_default_error_handler(
2254        self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
2255    ) -> DefaultErrorHandler:
2256        backoff_strategies = []
2257        if model.backoff_strategies:
2258            for backoff_strategy_model in model.backoff_strategies:
2259                backoff_strategies.append(
2260                    self._create_component_from_model(model=backoff_strategy_model, config=config)
2261                )
2262
2263        response_filters = []
2264        if model.response_filters:
2265            for response_filter_model in model.response_filters:
2266                response_filters.append(
2267                    self._create_component_from_model(model=response_filter_model, config=config)
2268                )
2269        response_filters.append(
2270            HttpResponseFilter(config=config, parameters=model.parameters or {})
2271        )
2272
2273        return DefaultErrorHandler(
2274            backoff_strategies=backoff_strategies,
2275            max_retries=model.max_retries,
2276            response_filters=response_filters,
2277            config=config,
2278            parameters=model.parameters or {},
2279        )
2280
2281    def create_default_paginator(
2282        self,
2283        model: DefaultPaginatorModel,
2284        config: Config,
2285        *,
2286        url_base: str,
2287        extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None,
2288        decoder: Optional[Decoder] = None,
2289        cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
2290    ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
2291        if decoder:
2292            if self._is_supported_decoder_for_pagination(decoder):
2293                decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
2294            else:
2295                raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder)))
2296        else:
2297            decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
2298        page_size_option = (
2299            self._create_component_from_model(model=model.page_size_option, config=config)
2300            if model.page_size_option
2301            else None
2302        )
2303        page_token_option = (
2304            self._create_component_from_model(model=model.page_token_option, config=config)
2305            if model.page_token_option
2306            else None
2307        )
2308        pagination_strategy = self._create_component_from_model(
2309            model=model.pagination_strategy,
2310            config=config,
2311            decoder=decoder_to_use,
2312            extractor_model=extractor_model,
2313        )
2314        if cursor_used_for_stop_condition:
2315            pagination_strategy = StopConditionPaginationStrategyDecorator(
2316                pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition)
2317            )
2318        paginator = DefaultPaginator(
2319            decoder=decoder_to_use,
2320            page_size_option=page_size_option,
2321            page_token_option=page_token_option,
2322            pagination_strategy=pagination_strategy,
2323            url_base=url_base,
2324            config=config,
2325            parameters=model.parameters or {},
2326        )
2327        if self._limit_pages_fetched_per_slice:
2328            return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice)
2329        return paginator
2330
2331    def create_dpath_extractor(
2332        self,
2333        model: DpathExtractorModel,
2334        config: Config,
2335        decoder: Optional[Decoder] = None,
2336        **kwargs: Any,
2337    ) -> DpathExtractor:
2338        if decoder:
2339            decoder_to_use = decoder
2340        else:
2341            decoder_to_use = JsonDecoder(parameters={})
2342        model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
2343        return DpathExtractor(
2344            decoder=decoder_to_use,
2345            field_path=model_field_path,
2346            config=config,
2347            parameters=model.parameters or {},
2348        )
2349
2350    @staticmethod
2351    def create_response_to_file_extractor(
2352        model: ResponseToFileExtractorModel,
2353        **kwargs: Any,
2354    ) -> ResponseToFileExtractor:
2355        return ResponseToFileExtractor(parameters=model.parameters or {})
2356
2357    @staticmethod
2358    def create_exponential_backoff_strategy(
2359        model: ExponentialBackoffStrategyModel, config: Config
2360    ) -> ExponentialBackoffStrategy:
2361        return ExponentialBackoffStrategy(
2362            factor=model.factor or 5, parameters=model.parameters or {}, config=config
2363        )
2364
2365    @staticmethod
2366    def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey:
2367        return GroupByKey(model.key, config=config, parameters=model.parameters or {})
2368
2369    def create_http_requester(
2370        self,
2371        model: HttpRequesterModel,
2372        config: Config,
2373        decoder: Decoder = JsonDecoder(parameters={}),
2374        query_properties_key: Optional[str] = None,
2375        use_cache: Optional[bool] = None,
2376        *,
2377        name: str,
2378    ) -> HttpRequester:
2379        authenticator = (
2380            self._create_component_from_model(
2381                model=model.authenticator,
2382                config=config,
2383                url_base=model.url or model.url_base,
2384                name=name,
2385                decoder=decoder,
2386            )
2387            if model.authenticator
2388            else None
2389        )
2390        error_handler = (
2391            self._create_component_from_model(model=model.error_handler, config=config)
2392            if model.error_handler
2393            else DefaultErrorHandler(
2394                backoff_strategies=[],
2395                response_filters=[],
2396                config=config,
2397                parameters=model.parameters or {},
2398            )
2399        )
2400
2401        api_budget = self._api_budget
2402
2403        # Removes QueryProperties components from the interpolated mappings because it has been designed
2404        # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly
2405        # instead of through jinja interpolation
2406        request_parameters: Optional[Union[str, Mapping[str, str]]]
2407        if isinstance(model.request_parameters, Mapping):
2408            request_parameters = self._remove_query_properties(model.request_parameters)
2409        else:
2410            request_parameters = model.request_parameters
2411
2412        request_options_provider = InterpolatedRequestOptionsProvider(
2413            request_body=model.request_body,
2414            request_body_data=model.request_body_data,
2415            request_body_json=model.request_body_json,
2416            request_headers=model.request_headers,
2417            request_parameters=request_parameters,
2418            query_properties_key=query_properties_key,
2419            config=config,
2420            parameters=model.parameters or {},
2421        )
2422
2423        assert model.use_cache is not None  # for mypy
2424        assert model.http_method is not None  # for mypy
2425
2426        should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache
2427
2428        return HttpRequester(
2429            name=name,
2430            url=model.url,
2431            url_base=model.url_base,
2432            path=model.path,
2433            authenticator=authenticator,
2434            error_handler=error_handler,
2435            api_budget=api_budget,
2436            http_method=HttpMethod[model.http_method.value],
2437            request_options_provider=request_options_provider,
2438            config=config,
2439            disable_retries=self._disable_retries,
2440            parameters=model.parameters or {},
2441            message_repository=self._message_repository,
2442            use_cache=should_use_cache,
2443            decoder=decoder,
2444            stream_response=decoder.is_stream_response() if decoder else False,
2445        )
2446
2447    @staticmethod
2448    def create_http_response_filter(
2449        model: HttpResponseFilterModel, config: Config, **kwargs: Any
2450    ) -> HttpResponseFilter:
2451        if model.action:
2452            action = ResponseAction(model.action.value)
2453        else:
2454            action = None
2455
2456        failure_type = FailureType(model.failure_type.value) if model.failure_type else None
2457
2458        http_codes = (
2459            set(model.http_codes) if model.http_codes else set()
2460        )  # JSON schema notation has no set data type. The schema enforces an array of unique elements
2461
2462        return HttpResponseFilter(
2463            action=action,
2464            failure_type=failure_type,
2465            error_message=model.error_message or "",
2466            error_message_contains=model.error_message_contains or "",
2467            http_codes=http_codes,
2468            predicate=model.predicate or "",
2469            config=config,
2470            parameters=model.parameters or {},
2471        )
2472
2473    @staticmethod
2474    def create_inline_schema_loader(
2475        model: InlineSchemaLoaderModel, config: Config, **kwargs: Any
2476    ) -> InlineSchemaLoader:
2477        return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
2478
2479    def create_complex_field_type(
2480        self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
2481    ) -> ComplexFieldType:
2482        items = (
2483            self._create_component_from_model(model=model.items, config=config)
2484            if isinstance(model.items, ComplexFieldTypeModel)
2485            else model.items
2486        )
2487
2488        return ComplexFieldType(field_type=model.field_type, items=items)
2489
2490    def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
2491        target_type = (
2492            self._create_component_from_model(model=model.target_type, config=config)
2493            if isinstance(model.target_type, ComplexFieldTypeModel)
2494            else model.target_type
2495        )
2496
2497        return TypesMap(
2498            target_type=target_type,
2499            current_type=model.current_type,
2500            condition=model.condition if model.condition is not None else "True",
2501        )
2502
2503    def create_schema_type_identifier(
2504        self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
2505    ) -> SchemaTypeIdentifier:
2506        types_mapping = []
2507        if model.types_mapping:
2508            types_mapping.extend(
2509                [
2510                    self._create_component_from_model(types_map, config=config)
2511                    for types_map in model.types_mapping
2512                ]
2513            )
2514        model_schema_pointer: List[Union[InterpolatedString, str]] = (
2515            [x for x in model.schema_pointer] if model.schema_pointer else []
2516        )
2517        model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
2518        model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
2519            [x for x in model.type_pointer] if model.type_pointer else None
2520        )
2521
2522        return SchemaTypeIdentifier(
2523            schema_pointer=model_schema_pointer,
2524            key_pointer=model_key_pointer,
2525            type_pointer=model_type_pointer,
2526            types_mapping=types_mapping,
2527            parameters=model.parameters or {},
2528        )
2529
2530    def create_dynamic_schema_loader(
2531        self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
2532    ) -> DynamicSchemaLoader:
2533        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
2534        combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
2535
2536        schema_transformations = []
2537        if model.schema_transformations:
2538            for transformation_model in model.schema_transformations:
2539                schema_transformations.append(
2540                    self._create_component_from_model(model=transformation_model, config=config)
2541                )
2542        name = "dynamic_properties"
2543        retriever = self._create_component_from_model(
2544            model=model.retriever,
2545            config=config,
2546            name=name,
2547            primary_key=None,
2548            stream_slicer=combined_slicers,
2549            transformations=[],
2550            use_cache=True,
2551            log_formatter=(
2552                lambda response: format_http_message(
2553                    response,
2554                    f"Schema loader '{name}' request",
2555                    f"Request performed in order to extract schema.",
2556                    name,
2557                    is_auxiliary=True,
2558                )
2559            ),
2560        )
2561        schema_type_identifier = self._create_component_from_model(
2562            model.schema_type_identifier, config=config, parameters=model.parameters or {}
2563        )
2564        schema_filter = (
2565            self._create_component_from_model(
2566                model.schema_filter, config=config, parameters=model.parameters or {}
2567            )
2568            if model.schema_filter is not None
2569            else None
2570        )
2571
2572        return DynamicSchemaLoader(
2573            retriever=retriever,
2574            config=config,
2575            schema_transformations=schema_transformations,
2576            schema_filter=schema_filter,
2577            schema_type_identifier=schema_type_identifier,
2578            parameters=model.parameters or {},
2579        )
2580
2581    @staticmethod
2582    def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2583        return JsonDecoder(parameters={})
2584
2585    def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2586        return CompositeRawDecoder(
2587            parser=ModelToComponentFactory._get_parser(model, config),
2588            stream_response=False if self._emit_connector_builder_messages else True,
2589        )
2590
2591    def create_jsonl_decoder(
2592        self, model: JsonlDecoderModel, config: Config, **kwargs: Any
2593    ) -> Decoder:
2594        return CompositeRawDecoder(
2595            parser=ModelToComponentFactory._get_parser(model, config),
2596            stream_response=False if self._emit_connector_builder_messages else True,
2597        )
2598
2599    def create_gzip_decoder(
2600        self, model: GzipDecoderModel, config: Config, **kwargs: Any
2601    ) -> Decoder:
2602        _compressed_response_types = {
2603            "gzip",
2604            "x-gzip",
2605            "gzip, deflate",
2606            "x-gzip, deflate",
2607            "application/zip",
2608            "application/gzip",
2609            "application/x-gzip",
2610            "application/x-zip-compressed",
2611        }
2612
2613        gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config)  # type: ignore  # based on the model, we know this will be a GzipParser
2614
2615        if self._emit_connector_builder_messages:
2616            # This is very surprising but if the response is not streamed,
2617            # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw,
2618            # which uses urllib3 directly and does not uncompress the data.
2619            return CompositeRawDecoder(gzip_parser.inner_parser, False)
2620
2621        return CompositeRawDecoder.by_headers(
2622            [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
2623            stream_response=True,
2624            fallback_parser=gzip_parser.inner_parser,
2625        )
2626
2627    @staticmethod
2628    def create_incrementing_count_cursor(
2629        model: IncrementingCountCursorModel, config: Config, **kwargs: Any
2630    ) -> DatetimeBasedCursor:
2631        # This should not actually get used anywhere at runtime, but needed to add this to pass checks since
2632        # we still parse models into components. The issue is that there's no runtime implementation of a
2633        # IncrementingCountCursor.
2634        # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
2635        return DatetimeBasedCursor(
2636            cursor_field=model.cursor_field,
2637            datetime_format="%Y-%m-%d",
2638            start_datetime="2024-12-12",
2639            config=config,
2640            parameters={},
2641        )
2642
2643    @staticmethod
2644    def create_iterable_decoder(
2645        model: IterableDecoderModel, config: Config, **kwargs: Any
2646    ) -> IterableDecoder:
2647        return IterableDecoder(parameters={})
2648
2649    @staticmethod
2650    def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
2651        return XmlDecoder(parameters={})
2652
2653    def create_zipfile_decoder(
2654        self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
2655    ) -> ZipfileDecoder:
2656        return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config))
2657
2658    @staticmethod
2659    def _get_parser(model: BaseModel, config: Config) -> Parser:
2660        if isinstance(model, JsonDecoderModel):
2661            # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases
2662            return JsonParser()
2663        elif isinstance(model, JsonlDecoderModel):
2664            return JsonLineParser()
2665        elif isinstance(model, CsvDecoderModel):
2666            return CsvParser(
2667                encoding=model.encoding,
2668                delimiter=model.delimiter,
2669                set_values_to_none=model.set_values_to_none,
2670            )
2671        elif isinstance(model, GzipDecoderModel):
2672            return GzipParser(
2673                inner_parser=ModelToComponentFactory._get_parser(model.decoder, config)
2674            )
2675        elif isinstance(
2676            model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel)
2677        ):
2678            raise ValueError(f"Decoder type {model} does not have parser associated to it")
2679
2680        raise ValueError(f"Unknown decoder type {model}")
2681
2682    @staticmethod
2683    def create_json_file_schema_loader(
2684        model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
2685    ) -> JsonFileSchemaLoader:
2686        return JsonFileSchemaLoader(
2687            file_path=model.file_path or "", config=config, parameters=model.parameters or {}
2688        )
2689
2690    @staticmethod
2691    def create_jwt_authenticator(
2692        model: JwtAuthenticatorModel, config: Config, **kwargs: Any
2693    ) -> JwtAuthenticator:
2694        jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None)
2695        jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None)
2696        return JwtAuthenticator(
2697            config=config,
2698            parameters=model.parameters or {},
2699            algorithm=JwtAlgorithm(model.algorithm.value),
2700            secret_key=model.secret_key,
2701            base64_encode_secret_key=model.base64_encode_secret_key,
2702            token_duration=model.token_duration,
2703            header_prefix=model.header_prefix,
2704            kid=jwt_headers.kid,
2705            typ=jwt_headers.typ,
2706            cty=jwt_headers.cty,
2707            iss=jwt_payload.iss,
2708            sub=jwt_payload.sub,
2709            aud=jwt_payload.aud,
2710            additional_jwt_headers=model.additional_jwt_headers,
2711            additional_jwt_payload=model.additional_jwt_payload,
2712        )
2713
2714    def create_list_partition_router(
2715        self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
2716    ) -> ListPartitionRouter:
2717        request_option = (
2718            self._create_component_from_model(model.request_option, config)
2719            if model.request_option
2720            else None
2721        )
2722        return ListPartitionRouter(
2723            cursor_field=model.cursor_field,
2724            request_option=request_option,
2725            values=model.values,
2726            config=config,
2727            parameters=model.parameters or {},
2728        )
2729
2730    @staticmethod
2731    def create_min_max_datetime(
2732        model: MinMaxDatetimeModel, config: Config, **kwargs: Any
2733    ) -> MinMaxDatetime:
2734        return MinMaxDatetime(
2735            datetime=model.datetime,
2736            datetime_format=model.datetime_format or "",
2737            max_datetime=model.max_datetime or "",
2738            min_datetime=model.min_datetime or "",
2739            parameters=model.parameters or {},
2740        )
2741
2742    @staticmethod
2743    def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth:
2744        return NoAuth(parameters=model.parameters or {})
2745
2746    @staticmethod
2747    def create_no_pagination(
2748        model: NoPaginationModel, config: Config, **kwargs: Any
2749    ) -> NoPagination:
2750        return NoPagination(parameters={})
2751
2752    def create_oauth_authenticator(
2753        self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
2754    ) -> DeclarativeOauth2Authenticator:
2755        profile_assertion = (
2756            self._create_component_from_model(model.profile_assertion, config=config)
2757            if model.profile_assertion
2758            else None
2759        )
2760
2761        if model.refresh_token_updater:
2762            # ignore type error because fixing it would have a lot of dependencies, revisit later
2763            return DeclarativeSingleUseRefreshTokenOauth2Authenticator(  # type: ignore
2764                config,
2765                InterpolatedString.create(
2766                    model.token_refresh_endpoint,  # type: ignore
2767                    parameters=model.parameters or {},
2768                ).eval(config),
2769                access_token_name=InterpolatedString.create(
2770                    model.access_token_name or "access_token", parameters=model.parameters or {}
2771                ).eval(config),
2772                refresh_token_name=model.refresh_token_updater.refresh_token_name,
2773                expires_in_name=InterpolatedString.create(
2774                    model.expires_in_name or "expires_in", parameters=model.parameters or {}
2775                ).eval(config),
2776                client_id_name=InterpolatedString.create(
2777                    model.client_id_name or "client_id", parameters=model.parameters or {}
2778                ).eval(config),
2779                client_id=InterpolatedString.create(
2780                    model.client_id, parameters=model.parameters or {}
2781                ).eval(config)
2782                if model.client_id
2783                else model.client_id,
2784                client_secret_name=InterpolatedString.create(
2785                    model.client_secret_name or "client_secret", parameters=model.parameters or {}
2786                ).eval(config),
2787                client_secret=InterpolatedString.create(
2788                    model.client_secret, parameters=model.parameters or {}
2789                ).eval(config)
2790                if model.client_secret
2791                else model.client_secret,
2792                access_token_config_path=model.refresh_token_updater.access_token_config_path,
2793                refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
2794                token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
2795                grant_type_name=InterpolatedString.create(
2796                    model.grant_type_name or "grant_type", parameters=model.parameters or {}
2797                ).eval(config),
2798                grant_type=InterpolatedString.create(
2799                    model.grant_type or "refresh_token", parameters=model.parameters or {}
2800                ).eval(config),
2801                refresh_request_body=InterpolatedMapping(
2802                    model.refresh_request_body or {}, parameters=model.parameters or {}
2803                ).eval(config),
2804                refresh_request_headers=InterpolatedMapping(
2805                    model.refresh_request_headers or {}, parameters=model.parameters or {}
2806                ).eval(config),
2807                scopes=model.scopes,
2808                token_expiry_date_format=model.token_expiry_date_format,
2809                token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
2810                message_repository=self._message_repository,
2811                refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes,
2812                refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key,
2813                refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values,
2814            )
2815        # ignore type error because fixing it would have a lot of dependencies, revisit later
2816        return DeclarativeOauth2Authenticator(  # type: ignore
2817            access_token_name=model.access_token_name or "access_token",
2818            access_token_value=model.access_token_value,
2819            client_id_name=model.client_id_name or "client_id",
2820            client_id=model.client_id,
2821            client_secret_name=model.client_secret_name or "client_secret",
2822            client_secret=model.client_secret,
2823            expires_in_name=model.expires_in_name or "expires_in",
2824            grant_type_name=model.grant_type_name or "grant_type",
2825            grant_type=model.grant_type or "refresh_token",
2826            refresh_request_body=model.refresh_request_body,
2827            refresh_request_headers=model.refresh_request_headers,
2828            refresh_token_name=model.refresh_token_name or "refresh_token",
2829            refresh_token=model.refresh_token,
2830            scopes=model.scopes,
2831            token_expiry_date=model.token_expiry_date,
2832            token_expiry_date_format=model.token_expiry_date_format,
2833            token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
2834            token_refresh_endpoint=model.token_refresh_endpoint,
2835            config=config,
2836            parameters=model.parameters or {},
2837            message_repository=self._message_repository,
2838            profile_assertion=profile_assertion,
2839            use_profile_assertion=model.use_profile_assertion,
2840        )
2841
2842    def create_offset_increment(
2843        self,
2844        model: OffsetIncrementModel,
2845        config: Config,
2846        decoder: Decoder,
2847        extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None,
2848        **kwargs: Any,
2849    ) -> OffsetIncrement:
2850        if isinstance(decoder, PaginationDecoderDecorator):
2851            inner_decoder = decoder.decoder
2852        else:
2853            inner_decoder = decoder
2854            decoder = PaginationDecoderDecorator(decoder=decoder)
2855
2856        if self._is_supported_decoder_for_pagination(inner_decoder):
2857            decoder_to_use = decoder
2858        else:
2859            raise ValueError(
2860                self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
2861            )
2862
2863        # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever)
2864        # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the
2865        # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing
2866        # behavior by having two separate extractors with identical behavior since they use the same extractor model.
2867        # When we have more time to investigate we can look into reusing the same component.
2868        extractor = (
2869            self._create_component_from_model(
2870                model=extractor_model, config=config, decoder=decoder_to_use
2871            )
2872            if extractor_model
2873            else None
2874        )
2875
2876        return OffsetIncrement(
2877            page_size=model.page_size,
2878            config=config,
2879            decoder=decoder_to_use,
2880            extractor=extractor,
2881            inject_on_first_request=model.inject_on_first_request or False,
2882            parameters=model.parameters or {},
2883        )
2884
2885    @staticmethod
2886    def create_page_increment(
2887        model: PageIncrementModel, config: Config, **kwargs: Any
2888    ) -> PageIncrement:
2889        return PageIncrement(
2890            page_size=model.page_size,
2891            config=config,
2892            start_from_page=model.start_from_page or 0,
2893            inject_on_first_request=model.inject_on_first_request or False,
2894            parameters=model.parameters or {},
2895        )
2896
2897    def create_parent_stream_config(
2898        self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2899    ) -> ParentStreamConfig:
2900        declarative_stream = self._create_component_from_model(
2901            model.stream, config=config, **kwargs
2902        )
2903        request_option = (
2904            self._create_component_from_model(model.request_option, config=config)
2905            if model.request_option
2906            else None
2907        )
2908
2909        if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer):
2910            raise ValueError(
2911                "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed."
2912            )
2913
2914        model_lazy_read_pointer: List[Union[InterpolatedString, str]] = (
2915            [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else []
2916        )
2917
2918        return ParentStreamConfig(
2919            parent_key=model.parent_key,
2920            request_option=request_option,
2921            stream=declarative_stream,
2922            partition_field=model.partition_field,
2923            config=config,
2924            incremental_dependency=model.incremental_dependency or False,
2925            parameters=model.parameters or {},
2926            extra_fields=model.extra_fields,
2927            lazy_read_pointer=model_lazy_read_pointer,
2928        )
2929
2930    def create_properties_from_endpoint(
2931        self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any
2932    ) -> PropertiesFromEndpoint:
2933        retriever = self._create_component_from_model(
2934            model=model.retriever,
2935            config=config,
2936            name="dynamic_properties",
2937            primary_key=None,
2938            stream_slicer=None,
2939            transformations=[],
2940            use_cache=True,  # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different
2941        )
2942        return PropertiesFromEndpoint(
2943            property_field_path=model.property_field_path,
2944            retriever=retriever,
2945            config=config,
2946            parameters=model.parameters or {},
2947        )
2948
2949    def create_property_chunking(
2950        self, model: PropertyChunkingModel, config: Config, **kwargs: Any
2951    ) -> PropertyChunking:
2952        record_merge_strategy = (
2953            self._create_component_from_model(
2954                model=model.record_merge_strategy, config=config, **kwargs
2955            )
2956            if model.record_merge_strategy
2957            else None
2958        )
2959
2960        property_limit_type: PropertyLimitType
2961        match model.property_limit_type:
2962            case PropertyLimitTypeModel.property_count:
2963                property_limit_type = PropertyLimitType.property_count
2964            case PropertyLimitTypeModel.characters:
2965                property_limit_type = PropertyLimitType.characters
2966            case _:
2967                raise ValueError(f"Invalid PropertyLimitType {property_limit_type}")
2968
2969        return PropertyChunking(
2970            property_limit_type=property_limit_type,
2971            property_limit=model.property_limit,
2972            record_merge_strategy=record_merge_strategy,
2973            config=config,
2974            parameters=model.parameters or {},
2975        )
2976
2977    def create_query_properties(
2978        self, model: QueryPropertiesModel, config: Config, **kwargs: Any
2979    ) -> QueryProperties:
2980        if isinstance(model.property_list, list):
2981            property_list = model.property_list
2982        else:
2983            property_list = self._create_component_from_model(
2984                model=model.property_list, config=config, **kwargs
2985            )
2986
2987        property_chunking = (
2988            self._create_component_from_model(
2989                model=model.property_chunking, config=config, **kwargs
2990            )
2991            if model.property_chunking
2992            else None
2993        )
2994
2995        return QueryProperties(
2996            property_list=property_list,
2997            always_include_properties=model.always_include_properties,
2998            property_chunking=property_chunking,
2999            config=config,
3000            parameters=model.parameters or {},
3001        )
3002
3003    @staticmethod
3004    def create_record_filter(
3005        model: RecordFilterModel, config: Config, **kwargs: Any
3006    ) -> RecordFilter:
3007        return RecordFilter(
3008            condition=model.condition or "", config=config, parameters=model.parameters or {}
3009        )
3010
3011    @staticmethod
3012    def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath:
3013        return RequestPath(parameters={})
3014
3015    @staticmethod
3016    def create_request_option(
3017        model: RequestOptionModel, config: Config, **kwargs: Any
3018    ) -> RequestOption:
3019        inject_into = RequestOptionType(model.inject_into.value)
3020        field_path: Optional[List[Union[InterpolatedString, str]]] = (
3021            [
3022                InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
3023                for segment in model.field_path
3024            ]
3025            if model.field_path
3026            else None
3027        )
3028        field_name = (
3029            InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
3030            if model.field_name
3031            else None
3032        )
3033        return RequestOption(
3034            field_name=field_name,
3035            field_path=field_path,
3036            inject_into=inject_into,
3037            parameters=kwargs.get("parameters", {}),
3038        )
3039
3040    def create_record_selector(
3041        self,
3042        model: RecordSelectorModel,
3043        config: Config,
3044        *,
3045        name: str,
3046        transformations: List[RecordTransformation] | None = None,
3047        decoder: Decoder | None = None,
3048        client_side_incremental_sync: Dict[str, Any] | None = None,
3049        file_uploader: Optional[DefaultFileUploader] = None,
3050        **kwargs: Any,
3051    ) -> RecordSelector:
3052        extractor = self._create_component_from_model(
3053            model=model.extractor, decoder=decoder, config=config
3054        )
3055        record_filter = (
3056            self._create_component_from_model(model.record_filter, config=config)
3057            if model.record_filter
3058            else None
3059        )
3060
3061        transform_before_filtering = (
3062            False if model.transform_before_filtering is None else model.transform_before_filtering
3063        )
3064        if client_side_incremental_sync:
3065            record_filter = ClientSideIncrementalRecordFilterDecorator(
3066                config=config,
3067                parameters=model.parameters,
3068                condition=model.record_filter.condition
3069                if (model.record_filter and hasattr(model.record_filter, "condition"))
3070                else None,
3071                **client_side_incremental_sync,
3072            )
3073            transform_before_filtering = (
3074                True
3075                if model.transform_before_filtering is None
3076                else model.transform_before_filtering
3077            )
3078
3079        if model.schema_normalization is None:
3080            # default to no schema normalization if not set
3081            model.schema_normalization = SchemaNormalizationModel.None_
3082
3083        schema_normalization = (
3084            TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
3085            if isinstance(model.schema_normalization, SchemaNormalizationModel)
3086            else self._create_component_from_model(model.schema_normalization, config=config)  # type: ignore[arg-type] # custom normalization model expected here
3087        )
3088
3089        return RecordSelector(
3090            extractor=extractor,
3091            name=name,
3092            config=config,
3093            record_filter=record_filter,
3094            transformations=transformations or [],
3095            file_uploader=file_uploader,
3096            schema_normalization=schema_normalization,
3097            parameters=model.parameters or {},
3098            transform_before_filtering=transform_before_filtering,
3099        )
3100
3101    @staticmethod
3102    def create_remove_fields(
3103        model: RemoveFieldsModel, config: Config, **kwargs: Any
3104    ) -> RemoveFields:
3105        return RemoveFields(
3106            field_pointers=model.field_pointers, condition=model.condition or "", parameters={}
3107        )
3108
3109    def create_selective_authenticator(
3110        self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any
3111    ) -> DeclarativeAuthenticator:
3112        authenticators = {
3113            name: self._create_component_from_model(model=auth, config=config)
3114            for name, auth in model.authenticators.items()
3115        }
3116        # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error
3117        return SelectiveAuthenticator(  # type: ignore[abstract]
3118            config=config,
3119            authenticators=authenticators,
3120            authenticator_selection_path=model.authenticator_selection_path,
3121            **kwargs,
3122        )
3123
3124    @staticmethod
3125    def create_legacy_session_token_authenticator(
3126        model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any
3127    ) -> LegacySessionTokenAuthenticator:
3128        return LegacySessionTokenAuthenticator(
3129            api_url=url_base,
3130            header=model.header,
3131            login_url=model.login_url,
3132            password=model.password or "",
3133            session_token=model.session_token or "",
3134            session_token_response_key=model.session_token_response_key or "",
3135            username=model.username or "",
3136            validate_session_url=model.validate_session_url,
3137            config=config,
3138            parameters=model.parameters or {},
3139        )
3140
3141    def create_simple_retriever(
3142        self,
3143        model: SimpleRetrieverModel,
3144        config: Config,
3145        *,
3146        name: str,
3147        primary_key: Optional[Union[str, List[str], List[List[str]]]],
3148        stream_slicer: Optional[StreamSlicer],
3149        request_options_provider: Optional[RequestOptionsProvider] = None,
3150        stop_condition_on_cursor: bool = False,
3151        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
3152        transformations: List[RecordTransformation],
3153        file_uploader: Optional[DefaultFileUploader] = None,
3154        incremental_sync: Optional[
3155            Union[
3156                IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
3157            ]
3158        ] = None,
3159        use_cache: Optional[bool] = None,
3160        log_formatter: Optional[Callable[[Response], Any]] = None,
3161        **kwargs: Any,
3162    ) -> SimpleRetriever:
3163        def _get_url() -> str:
3164            """
3165            Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever.
3166            This is needed because the URL is not set until the requester is created.
3167            """
3168
3169            _url: str = (
3170                model.requester.url
3171                if hasattr(model.requester, "url") and model.requester.url is not None
3172                else requester.get_url()
3173            )
3174            _url_base: str = (
3175                model.requester.url_base
3176                if hasattr(model.requester, "url_base") and model.requester.url_base is not None
3177                else requester.get_url_base()
3178            )
3179
3180            return _url or _url_base
3181
3182        decoder = (
3183            self._create_component_from_model(model=model.decoder, config=config)
3184            if model.decoder
3185            else JsonDecoder(parameters={})
3186        )
3187        record_selector = self._create_component_from_model(
3188            model=model.record_selector,
3189            name=name,
3190            config=config,
3191            decoder=decoder,
3192            transformations=transformations,
3193            client_side_incremental_sync=client_side_incremental_sync,
3194            file_uploader=file_uploader,
3195        )
3196
3197        query_properties: Optional[QueryProperties] = None
3198        query_properties_key: Optional[str] = None
3199        if self._query_properties_in_request_parameters(model.requester):
3200            # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple
3201            # places instead of default to request_parameters which isn't clearly documented
3202            if (
3203                hasattr(model.requester, "fetch_properties_from_endpoint")
3204                and model.requester.fetch_properties_from_endpoint
3205            ):
3206                raise ValueError(
3207                    f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters"
3208                )
3209
3210            query_properties_definitions = []
3211            for key, request_parameter in model.requester.request_parameters.items():  # type: ignore # request_parameters is already validated to be a Mapping using _query_properties_in_request_parameters()
3212                if isinstance(request_parameter, QueryPropertiesModel):
3213                    query_properties_key = key
3214                    query_properties_definitions.append(request_parameter)
3215
3216            if len(query_properties_definitions) > 1:
3217                raise ValueError(
3218                    f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages"
3219                )
3220
3221            if len(query_properties_definitions) == 1:
3222                query_properties = self._create_component_from_model(
3223                    model=query_properties_definitions[0], config=config
3224                )
3225        elif (
3226            hasattr(model.requester, "fetch_properties_from_endpoint")
3227            and model.requester.fetch_properties_from_endpoint
3228        ):
3229            # todo: Deprecate this condition once dependent connectors migrate to query_properties
3230            query_properties_definition = QueryPropertiesModel(
3231                type="QueryProperties",
3232                property_list=model.requester.fetch_properties_from_endpoint,
3233                always_include_properties=None,
3234                property_chunking=None,
3235            )  # type: ignore # $parameters has a default value
3236
3237            query_properties = self.create_query_properties(
3238                model=query_properties_definition,
3239                config=config,
3240            )
3241        elif hasattr(model.requester, "query_properties") and model.requester.query_properties:
3242            query_properties = self.create_query_properties(
3243                model=model.requester.query_properties,
3244                config=config,
3245            )
3246
3247        requester = self._create_component_from_model(
3248            model=model.requester,
3249            decoder=decoder,
3250            name=name,
3251            query_properties_key=query_properties_key,
3252            use_cache=use_cache,
3253            config=config,
3254        )
3255
3256        # Define cursor only if per partition or common incremental support is needed
3257        cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
3258
3259        if (
3260            not isinstance(stream_slicer, DatetimeBasedCursor)
3261            or type(stream_slicer) is not DatetimeBasedCursor
3262        ):
3263            # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
3264            # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
3265            # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
3266            # request_options_provider
3267            request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
3268        elif not request_options_provider:
3269            request_options_provider = DefaultRequestOptionsProvider(parameters={})
3270
3271        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3272        if self._should_limit_slices_fetched():
3273            stream_slicer = cast(
3274                StreamSlicer,
3275                StreamSlicerTestReadDecorator(
3276                    wrapped_slicer=stream_slicer,
3277                    maximum_number_of_slices=self._limit_slices_fetched or 5,
3278                ),
3279            )
3280
3281        cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None
3282        paginator = (
3283            self._create_component_from_model(
3284                model=model.paginator,
3285                config=config,
3286                url_base=_get_url(),
3287                extractor_model=model.record_selector.extractor,
3288                decoder=decoder,
3289                cursor_used_for_stop_condition=cursor_used_for_stop_condition,
3290            )
3291            if model.paginator
3292            else NoPagination(parameters={})
3293        )
3294
3295        ignore_stream_slicer_parameters_on_paginated_requests = (
3296            model.ignore_stream_slicer_parameters_on_paginated_requests or False
3297        )
3298
3299        if (
3300            model.partition_router
3301            and isinstance(model.partition_router, SubstreamPartitionRouterModel)
3302            and not bool(self._connector_state_manager.get_stream_state(name, None))
3303            and any(
3304                parent_stream_config.lazy_read_pointer
3305                for parent_stream_config in model.partition_router.parent_stream_configs
3306            )
3307        ):
3308            if incremental_sync:
3309                if incremental_sync.type != "DatetimeBasedCursor":
3310                    raise ValueError(
3311                        f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}."
3312                    )
3313
3314                elif incremental_sync.step or incremental_sync.cursor_granularity:
3315                    raise ValueError(
3316                        f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}."
3317                    )
3318
3319            if model.decoder and model.decoder.type != "JsonDecoder":
3320                raise ValueError(
3321                    f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}."
3322                )
3323
3324            return LazySimpleRetriever(
3325                name=name,
3326                paginator=paginator,
3327                primary_key=primary_key,
3328                requester=requester,
3329                record_selector=record_selector,
3330                stream_slicer=stream_slicer,
3331                request_option_provider=request_options_provider,
3332                cursor=cursor,
3333                config=config,
3334                ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3335                parameters=model.parameters or {},
3336            )
3337
3338        return SimpleRetriever(
3339            name=name,
3340            paginator=paginator,
3341            primary_key=primary_key,
3342            requester=requester,
3343            record_selector=record_selector,
3344            stream_slicer=stream_slicer,
3345            request_option_provider=request_options_provider,
3346            cursor=cursor,
3347            config=config,
3348            ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3349            additional_query_properties=query_properties,
3350            log_formatter=self._get_log_formatter(log_formatter, name),
3351            parameters=model.parameters or {},
3352        )
3353
3354    def _get_log_formatter(
3355        self, log_formatter: Callable[[Response], Any] | None, name: str
3356    ) -> Callable[[Response], Any] | None:
3357        if self._should_limit_slices_fetched():
3358            return (
3359                (
3360                    lambda response: format_http_message(
3361                        response,
3362                        f"Stream '{name}' request",
3363                        f"Request performed in order to extract records for stream '{name}'",
3364                        name,
3365                    )
3366                )
3367                if not log_formatter
3368                else log_formatter
3369            )
3370        return None
3371
3372    def _should_limit_slices_fetched(self) -> bool:
3373        """
3374        Returns True if the number of slices fetched should be limited, False otherwise.
3375        This is used to limit the number of slices fetched during tests.
3376        """
3377        return bool(self._limit_slices_fetched or self._emit_connector_builder_messages)
3378
3379    @staticmethod
3380    def _query_properties_in_request_parameters(
3381        requester: Union[HttpRequesterModel, CustomRequesterModel],
3382    ) -> bool:
3383        if not hasattr(requester, "request_parameters"):
3384            return False
3385        request_parameters = requester.request_parameters
3386        if request_parameters and isinstance(request_parameters, Mapping):
3387            for request_parameter in request_parameters.values():
3388                if isinstance(request_parameter, QueryPropertiesModel):
3389                    return True
3390        return False
3391
3392    @staticmethod
3393    def _remove_query_properties(
3394        request_parameters: Mapping[str, Union[str, QueryPropertiesModel]],
3395    ) -> Mapping[str, str]:
3396        return {
3397            parameter_field: request_parameter
3398            for parameter_field, request_parameter in request_parameters.items()
3399            if not isinstance(request_parameter, QueryPropertiesModel)
3400        }
3401
3402    def create_state_delegating_stream(
3403        self,
3404        model: StateDelegatingStreamModel,
3405        config: Config,
3406        has_parent_state: Optional[bool] = None,
3407        **kwargs: Any,
3408    ) -> DeclarativeStream:
3409        if (
3410            model.full_refresh_stream.name != model.name
3411            or model.name != model.incremental_stream.name
3412        ):
3413            raise ValueError(
3414                f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
3415            )
3416
3417        stream_model = (
3418            model.incremental_stream
3419            if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
3420            else model.full_refresh_stream
3421        )
3422
3423        return self._create_component_from_model(stream_model, config=config, **kwargs)  # type: ignore[no-any-return]  # Will be created DeclarativeStream as stream_model is stream description
3424
3425    def _create_async_job_status_mapping(
3426        self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
3427    ) -> Mapping[str, AsyncJobStatus]:
3428        api_status_to_cdk_status = {}
3429        for cdk_status, api_statuses in model.dict().items():
3430            if cdk_status == "type":
3431                # This is an element of the dict because of the typing of the CDK but it is not a CDK status
3432                continue
3433
3434            for status in api_statuses:
3435                if status in api_status_to_cdk_status:
3436                    raise ValueError(
3437                        f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once"
3438                    )
3439                api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status)
3440        return api_status_to_cdk_status
3441
3442    def _get_async_job_status(self, status: str) -> AsyncJobStatus:
3443        match status:
3444            case "running":
3445                return AsyncJobStatus.RUNNING
3446            case "completed":
3447                return AsyncJobStatus.COMPLETED
3448            case "failed":
3449                return AsyncJobStatus.FAILED
3450            case "timeout":
3451                return AsyncJobStatus.TIMED_OUT
3452            case _:
3453                raise ValueError(f"Unsupported CDK status {status}")
3454
3455    def create_async_retriever(
3456        self,
3457        model: AsyncRetrieverModel,
3458        config: Config,
3459        *,
3460        name: str,
3461        primary_key: Optional[
3462            Union[str, List[str], List[List[str]]]
3463        ],  # this seems to be needed to match create_simple_retriever
3464        stream_slicer: Optional[StreamSlicer],
3465        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
3466        transformations: List[RecordTransformation],
3467        **kwargs: Any,
3468    ) -> AsyncRetriever:
3469        def _get_download_retriever() -> SimpleRetriever:
3470            # We create a record selector for the download retriever
3471            # with no schema normalization and no transformations, neither record filter
3472            # as all this occurs in the record_selector of the AsyncRetriever
3473            record_selector = RecordSelector(
3474                extractor=download_extractor,
3475                name=name,
3476                record_filter=None,
3477                transformations=[],
3478                schema_normalization=TypeTransformer(TransformConfig.NoTransform),
3479                config=config,
3480                parameters={},
3481            )
3482            paginator = (
3483                self._create_component_from_model(
3484                    model=model.download_paginator,
3485                    decoder=decoder,
3486                    config=config,
3487                    url_base="",
3488                )
3489                if model.download_paginator
3490                else NoPagination(parameters={})
3491            )
3492
3493            return SimpleRetriever(
3494                requester=download_requester,
3495                record_selector=record_selector,
3496                primary_key=None,
3497                name=job_download_components_name,
3498                paginator=paginator,
3499                config=config,
3500                parameters={},
3501            )
3502
3503        def _get_job_timeout() -> datetime.timedelta:
3504            user_defined_timeout: Optional[int] = (
3505                int(
3506                    InterpolatedString.create(
3507                        str(model.polling_job_timeout),
3508                        parameters={},
3509                    ).eval(config)
3510                )
3511                if model.polling_job_timeout
3512                else None
3513            )
3514
3515            # check for user defined timeout during the test read or 15 minutes
3516            test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15)
3517            # default value for non-connector builder is 60 minutes.
3518            default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60)
3519
3520            return (
3521                test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout
3522            )
3523
3524        decoder = (
3525            self._create_component_from_model(model=model.decoder, config=config)
3526            if model.decoder
3527            else JsonDecoder(parameters={})
3528        )
3529        record_selector = self._create_component_from_model(
3530            model=model.record_selector,
3531            config=config,
3532            decoder=decoder,
3533            name=name,
3534            transformations=transformations,
3535            client_side_incremental_sync=client_side_incremental_sync,
3536        )
3537
3538        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3539        if self._should_limit_slices_fetched():
3540            stream_slicer = cast(
3541                StreamSlicer,
3542                StreamSlicerTestReadDecorator(
3543                    wrapped_slicer=stream_slicer,
3544                    maximum_number_of_slices=self._limit_slices_fetched or 5,
3545                ),
3546            )
3547
3548        creation_requester = self._create_component_from_model(
3549            model=model.creation_requester,
3550            decoder=decoder,
3551            config=config,
3552            name=f"job creation - {name}",
3553        )
3554        polling_requester = self._create_component_from_model(
3555            model=model.polling_requester,
3556            decoder=decoder,
3557            config=config,
3558            name=f"job polling - {name}",
3559        )
3560        job_download_components_name = f"job download - {name}"
3561        download_decoder = (
3562            self._create_component_from_model(model=model.download_decoder, config=config)
3563            if model.download_decoder
3564            else JsonDecoder(parameters={})
3565        )
3566        download_extractor = (
3567            self._create_component_from_model(
3568                model=model.download_extractor,
3569                config=config,
3570                decoder=download_decoder,
3571                parameters=model.parameters,
3572            )
3573            if model.download_extractor
3574            else DpathExtractor(
3575                [],
3576                config=config,
3577                decoder=download_decoder,
3578                parameters=model.parameters or {},
3579            )
3580        )
3581        download_requester = self._create_component_from_model(
3582            model=model.download_requester,
3583            decoder=download_decoder,
3584            config=config,
3585            name=job_download_components_name,
3586        )
3587        download_retriever = _get_download_retriever()
3588        abort_requester = (
3589            self._create_component_from_model(
3590                model=model.abort_requester,
3591                decoder=decoder,
3592                config=config,
3593                name=f"job abort - {name}",
3594            )
3595            if model.abort_requester
3596            else None
3597        )
3598        delete_requester = (
3599            self._create_component_from_model(
3600                model=model.delete_requester,
3601                decoder=decoder,
3602                config=config,
3603                name=f"job delete - {name}",
3604            )
3605            if model.delete_requester
3606            else None
3607        )
3608        download_target_requester = (
3609            self._create_component_from_model(
3610                model=model.download_target_requester,
3611                decoder=decoder,
3612                config=config,
3613                name=f"job extract_url - {name}",
3614            )
3615            if model.download_target_requester
3616            else None
3617        )
3618        status_extractor = self._create_component_from_model(
3619            model=model.status_extractor, decoder=decoder, config=config, name=name
3620        )
3621        download_target_extractor = self._create_component_from_model(
3622            model=model.download_target_extractor,
3623            decoder=decoder,
3624            config=config,
3625            name=name,
3626        )
3627
3628        job_repository: AsyncJobRepository = AsyncHttpJobRepository(
3629            creation_requester=creation_requester,
3630            polling_requester=polling_requester,
3631            download_retriever=download_retriever,
3632            download_target_requester=download_target_requester,
3633            abort_requester=abort_requester,
3634            delete_requester=delete_requester,
3635            status_extractor=status_extractor,
3636            status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
3637            download_target_extractor=download_target_extractor,
3638            job_timeout=_get_job_timeout(),
3639        )
3640
3641        async_job_partition_router = AsyncJobPartitionRouter(
3642            job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
3643                job_repository,
3644                stream_slices,
3645                self._job_tracker,
3646                self._message_repository,
3647                # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
3648                has_bulk_parent=False,
3649                # set the `job_max_retry` to 1 for the `Connector Builder`` use-case.
3650                # `None` == default retry is set to 3 attempts, under the hood.
3651                job_max_retry=1 if self._emit_connector_builder_messages else None,
3652            ),
3653            stream_slicer=stream_slicer,
3654            config=config,
3655            parameters=model.parameters or {},
3656        )
3657
3658        return AsyncRetriever(
3659            record_selector=record_selector,
3660            stream_slicer=async_job_partition_router,
3661            config=config,
3662            parameters=model.parameters or {},
3663        )
3664
3665    def create_spec(self, model: SpecModel, config: Config, **kwargs: Any) -> Spec:
3666        config_migrations = [
3667            self._create_component_from_model(migration, config)
3668            for migration in (
3669                model.config_normalization_rules.config_migrations
3670                if (
3671                    model.config_normalization_rules
3672                    and model.config_normalization_rules.config_migrations
3673                )
3674                else []
3675            )
3676        ]
3677        config_transformations = [
3678            self._create_component_from_model(transformation, config)
3679            for transformation in (
3680                model.config_normalization_rules.transformations
3681                if (
3682                    model.config_normalization_rules
3683                    and model.config_normalization_rules.transformations
3684                )
3685                else []
3686            )
3687        ]
3688        config_validations = [
3689            self._create_component_from_model(validation, config)
3690            for validation in (
3691                model.config_normalization_rules.validations
3692                if (
3693                    model.config_normalization_rules
3694                    and model.config_normalization_rules.validations
3695                )
3696                else []
3697            )
3698        ]
3699
3700        return Spec(
3701            connection_specification=model.connection_specification,
3702            documentation_url=model.documentation_url,
3703            advanced_auth=model.advanced_auth,
3704            parameters={},
3705            config_migrations=config_migrations,
3706            config_transformations=config_transformations,
3707            config_validations=config_validations,
3708        )
3709
3710    def create_substream_partition_router(
3711        self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any
3712    ) -> SubstreamPartitionRouter:
3713        parent_stream_configs = []
3714        if model.parent_stream_configs:
3715            parent_stream_configs.extend(
3716                [
3717                    self._create_message_repository_substream_wrapper(
3718                        model=parent_stream_config, config=config, **kwargs
3719                    )
3720                    for parent_stream_config in model.parent_stream_configs
3721                ]
3722            )
3723
3724        return SubstreamPartitionRouter(
3725            parent_stream_configs=parent_stream_configs,
3726            parameters=model.parameters or {},
3727            config=config,
3728        )
3729
3730    def _create_message_repository_substream_wrapper(
3731        self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
3732    ) -> Any:
3733        substream_factory = ModelToComponentFactory(
3734            limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
3735            limit_slices_fetched=self._limit_slices_fetched,
3736            emit_connector_builder_messages=self._emit_connector_builder_messages,
3737            disable_retries=self._disable_retries,
3738            disable_cache=self._disable_cache,
3739            message_repository=LogAppenderMessageRepositoryDecorator(
3740                {"airbyte_cdk": {"stream": {"is_substream": True}}, "http": {"is_auxiliary": True}},
3741                self._message_repository,
3742                self._evaluate_log_level(self._emit_connector_builder_messages),
3743            ),
3744        )
3745
3746        # This flag will be used exclusively for StateDelegatingStream when a parent stream is created
3747        has_parent_state = bool(
3748            self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
3749            if model.incremental_dependency
3750            else False
3751        )
3752        return substream_factory._create_component_from_model(
3753            model=model, config=config, has_parent_state=has_parent_state, **kwargs
3754        )
3755
3756    @staticmethod
3757    def create_wait_time_from_header(
3758        model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
3759    ) -> WaitTimeFromHeaderBackoffStrategy:
3760        return WaitTimeFromHeaderBackoffStrategy(
3761            header=model.header,
3762            parameters=model.parameters or {},
3763            config=config,
3764            regex=model.regex,
3765            max_waiting_time_in_seconds=model.max_waiting_time_in_seconds
3766            if model.max_waiting_time_in_seconds is not None
3767            else None,
3768        )
3769
3770    @staticmethod
3771    def create_wait_until_time_from_header(
3772        model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any
3773    ) -> WaitUntilTimeFromHeaderBackoffStrategy:
3774        return WaitUntilTimeFromHeaderBackoffStrategy(
3775            header=model.header,
3776            parameters=model.parameters or {},
3777            config=config,
3778            min_wait=model.min_wait,
3779            regex=model.regex,
3780        )
3781
3782    def get_message_repository(self) -> MessageRepository:
3783        return self._message_repository
3784
3785    def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level:
3786        return Level.DEBUG if emit_connector_builder_messages else Level.INFO
3787
3788    @staticmethod
3789    def create_components_mapping_definition(
3790        model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any
3791    ) -> ComponentMappingDefinition:
3792        interpolated_value = InterpolatedString.create(
3793            model.value, parameters=model.parameters or {}
3794        )
3795        field_path = [
3796            InterpolatedString.create(path, parameters=model.parameters or {})
3797            for path in model.field_path
3798        ]
3799        return ComponentMappingDefinition(
3800            field_path=field_path,  # type: ignore[arg-type] # field_path can be str and InterpolatedString
3801            value=interpolated_value,
3802            value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
3803            create_or_update=model.create_or_update,
3804            parameters=model.parameters or {},
3805        )
3806
3807    def create_http_components_resolver(
3808        self, model: HttpComponentsResolverModel, config: Config
3809    ) -> Any:
3810        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
3811        combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
3812
3813        retriever = self._create_component_from_model(
3814            model=model.retriever,
3815            config=config,
3816            name="",
3817            primary_key=None,
3818            stream_slicer=stream_slicer if stream_slicer else combined_slicers,
3819            transformations=[],
3820        )
3821
3822        components_mapping = [
3823            self._create_component_from_model(
3824                model=components_mapping_definition_model,
3825                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3826                    components_mapping_definition_model.value_type
3827                ),
3828                config=config,
3829            )
3830            for components_mapping_definition_model in model.components_mapping
3831        ]
3832
3833        return HttpComponentsResolver(
3834            retriever=retriever,
3835            config=config,
3836            components_mapping=components_mapping,
3837            parameters=model.parameters or {},
3838        )
3839
3840    @staticmethod
3841    def create_stream_config(
3842        model: StreamConfigModel, config: Config, **kwargs: Any
3843    ) -> StreamConfig:
3844        model_configs_pointer: List[Union[InterpolatedString, str]] = (
3845            [x for x in model.configs_pointer] if model.configs_pointer else []
3846        )
3847
3848        return StreamConfig(
3849            configs_pointer=model_configs_pointer,
3850            default_values=model.default_values,
3851            parameters=model.parameters or {},
3852        )
3853
3854    def create_config_components_resolver(
3855        self, model: ConfigComponentsResolverModel, config: Config
3856    ) -> Any:
3857        model_stream_configs = (
3858            model.stream_config if isinstance(model.stream_config, list) else [model.stream_config]
3859        )
3860
3861        stream_configs = [
3862            self._create_component_from_model(
3863                stream_config, config=config, parameters=model.parameters or {}
3864            )
3865            for stream_config in model_stream_configs
3866        ]
3867
3868        components_mapping = [
3869            self._create_component_from_model(
3870                model=components_mapping_definition_model,
3871                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3872                    components_mapping_definition_model.value_type
3873                ),
3874                config=config,
3875            )
3876            for components_mapping_definition_model in model.components_mapping
3877        ]
3878
3879        return ConfigComponentsResolver(
3880            stream_configs=stream_configs,
3881            config=config,
3882            components_mapping=components_mapping,
3883            parameters=model.parameters or {},
3884        )
3885
3886    def create_parametrized_components_resolver(
3887        self, model: ParametrizedComponentsResolverModel, config: Config
3888    ) -> ParametrizedComponentsResolver:
3889        stream_parameters = StreamParametersDefinition(
3890            list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream
3891        )
3892        components_mapping = [
3893            self._create_component_from_model(
3894                model=components_mapping_definition_model,
3895                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3896                    components_mapping_definition_model.value_type
3897                ),
3898                config=config,
3899            )
3900            for components_mapping_definition_model in model.components_mapping
3901        ]
3902        return ParametrizedComponentsResolver(
3903            stream_parameters=stream_parameters,
3904            config=config,
3905            components_mapping=components_mapping,
3906            parameters=model.parameters or {},
3907        )
3908
3909    _UNSUPPORTED_DECODER_ERROR = (
3910        "Specified decoder of {decoder_type} is not supported for pagination."
3911        "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead."
3912        "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`."
3913    )
3914
3915    def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool:
3916        if isinstance(decoder, (JsonDecoder, XmlDecoder)):
3917            return True
3918        elif isinstance(decoder, CompositeRawDecoder):
3919            return self._is_supported_parser_for_pagination(decoder.parser)
3920        else:
3921            return False
3922
3923    def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
3924        if isinstance(parser, JsonParser):
3925            return True
3926        elif isinstance(parser, GzipParser):
3927            return isinstance(parser.inner_parser, JsonParser)
3928        else:
3929            return False
3930
3931    def create_http_api_budget(
3932        self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
3933    ) -> HttpAPIBudget:
3934        policies = [
3935            self._create_component_from_model(model=policy, config=config)
3936            for policy in model.policies
3937        ]
3938
3939        return HttpAPIBudget(
3940            policies=policies,
3941            ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
3942            ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
3943            status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
3944        )
3945
3946    def create_fixed_window_call_rate_policy(
3947        self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
3948    ) -> FixedWindowCallRatePolicy:
3949        matchers = [
3950            self._create_component_from_model(model=matcher, config=config)
3951            for matcher in model.matchers
3952        ]
3953
3954        # Set the initial reset timestamp to 10 days from now.
3955        # This value will be updated by the first request.
3956        return FixedWindowCallRatePolicy(
3957            next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
3958            period=parse_duration(model.period),
3959            call_limit=model.call_limit,
3960            matchers=matchers,
3961        )
3962
3963    def create_file_uploader(
3964        self, model: FileUploaderModel, config: Config, **kwargs: Any
3965    ) -> FileUploader:
3966        name = "File Uploader"
3967        requester = self._create_component_from_model(
3968            model=model.requester,
3969            config=config,
3970            name=name,
3971            **kwargs,
3972        )
3973        download_target_extractor = self._create_component_from_model(
3974            model=model.download_target_extractor,
3975            config=config,
3976            name=name,
3977            **kwargs,
3978        )
3979        emit_connector_builder_messages = self._emit_connector_builder_messages
3980        file_uploader = DefaultFileUploader(
3981            requester=requester,
3982            download_target_extractor=download_target_extractor,
3983            config=config,
3984            file_writer=NoopFileWriter()
3985            if emit_connector_builder_messages
3986            else LocalFileSystemFileWriter(),
3987            parameters=model.parameters or {},
3988            filename_extractor=model.filename_extractor if model.filename_extractor else None,
3989        )
3990
3991        return (
3992            ConnectorBuilderFileUploader(file_uploader)
3993            if emit_connector_builder_messages
3994            else file_uploader
3995        )
3996
3997    def create_moving_window_call_rate_policy(
3998        self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
3999    ) -> MovingWindowCallRatePolicy:
4000        rates = [
4001            self._create_component_from_model(model=rate, config=config) for rate in model.rates
4002        ]
4003        matchers = [
4004            self._create_component_from_model(model=matcher, config=config)
4005            for matcher in model.matchers
4006        ]
4007        return MovingWindowCallRatePolicy(
4008            rates=rates,
4009            matchers=matchers,
4010        )
4011
4012    def create_unlimited_call_rate_policy(
4013        self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
4014    ) -> UnlimitedCallRatePolicy:
4015        matchers = [
4016            self._create_component_from_model(model=matcher, config=config)
4017            for matcher in model.matchers
4018        ]
4019
4020        return UnlimitedCallRatePolicy(
4021            matchers=matchers,
4022        )
4023
4024    def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
4025        interpolated_limit = InterpolatedString.create(str(model.limit), parameters={})
4026        return Rate(
4027            limit=int(interpolated_limit.eval(config=config)),
4028            interval=parse_duration(model.interval),
4029        )
4030
4031    def create_http_request_matcher(
4032        self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
4033    ) -> HttpRequestRegexMatcher:
4034        return HttpRequestRegexMatcher(
4035            method=model.method,
4036            url_base=model.url_base,
4037            url_path_pattern=model.url_path_pattern,
4038            params=model.params,
4039            headers=model.headers,
4040        )
4041
4042    def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
4043        self._api_budget = self.create_component(
4044            model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
4045        )
4046
4047    def create_grouping_partition_router(
4048        self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
4049    ) -> GroupingPartitionRouter:
4050        underlying_router = self._create_component_from_model(
4051            model=model.underlying_partition_router, config=config
4052        )
4053        if model.group_size < 1:
4054            raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
4055
4056        # Request options in underlying partition routers are not supported for GroupingPartitionRouter
4057        # because they are specific to individual partitions and cannot be aggregated or handled
4058        # when grouping, potentially leading to incorrect API calls. Any request customization
4059        # should be managed at the stream level through the requester's configuration.
4060        if isinstance(underlying_router, SubstreamPartitionRouter):
4061            if any(
4062                parent_config.request_option
4063                for parent_config in underlying_router.parent_stream_configs
4064            ):
4065                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
4066
4067        if isinstance(underlying_router, ListPartitionRouter):
4068            if underlying_router.request_option:
4069                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
4070
4071        return GroupingPartitionRouter(
4072            group_size=model.group_size,
4073            underlying_partition_router=underlying_router,
4074            deduplicate=model.deduplicate if model.deduplicate is not None else True,
4075            config=config,
4076        )
ModelToComponentFactory( limit_pages_fetched_per_slice: Optional[int] = None, limit_slices_fetched: Optional[int] = None, emit_connector_builder_messages: bool = False, disable_retries: bool = False, disable_cache: bool = False, disable_resumable_full_refresh: bool = False, message_repository: Optional[airbyte_cdk.MessageRepository] = None, connector_state_manager: Optional[airbyte_cdk.ConnectorStateManager] = None, max_concurrent_async_job_count: Optional[int] = None)
631    def __init__(
632        self,
633        limit_pages_fetched_per_slice: Optional[int] = None,
634        limit_slices_fetched: Optional[int] = None,
635        emit_connector_builder_messages: bool = False,
636        disable_retries: bool = False,
637        disable_cache: bool = False,
638        disable_resumable_full_refresh: bool = False,
639        message_repository: Optional[MessageRepository] = None,
640        connector_state_manager: Optional[ConnectorStateManager] = None,
641        max_concurrent_async_job_count: Optional[int] = None,
642    ):
643        self._init_mappings()
644        self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice
645        self._limit_slices_fetched = limit_slices_fetched
646        self._emit_connector_builder_messages = emit_connector_builder_messages
647        self._disable_retries = disable_retries
648        self._disable_cache = disable_cache
649        self._disable_resumable_full_refresh = disable_resumable_full_refresh
650        self._message_repository = message_repository or InMemoryMessageRepository(
651            self._evaluate_log_level(emit_connector_builder_messages)
652        )
653        self._connector_state_manager = connector_state_manager or ConnectorStateManager()
654        self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
655        self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1)
656        # placeholder for deprecation warnings
657        self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = []
EPOCH_DATETIME_FORMAT = '%s'
def create_component( self, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], config: Mapping[str, Any], **kwargs: Any) -> Any:
772    def create_component(
773        self,
774        model_type: Type[BaseModel],
775        component_definition: ComponentDefinition,
776        config: Config,
777        **kwargs: Any,
778    ) -> Any:
779        """
780        Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and
781        subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating
782        creating declarative components from that model.
783
784        :param model_type: The type of declarative component that is being initialized
785        :param component_definition: The mapping that represents a declarative component
786        :param config: The connector config that is provided by the customer
787        :return: The declarative component to be used at runtime
788        """
789
790        component_type = component_definition.get("type")
791        if component_definition.get("type") != model_type.__name__:
792            raise ValueError(
793                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
794            )
795
796        declarative_component_model = model_type.parse_obj(component_definition)
797
798        if not isinstance(declarative_component_model, model_type):
799            raise ValueError(
800                f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}"
801            )
802
803        return self._create_component_from_model(
804            model=declarative_component_model, config=config, **kwargs
805        )

Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating creating declarative components from that model.

Parameters
  • model_type: The type of declarative component that is being initialized
  • component_definition: The mapping that represents a declarative component
  • config: The connector config that is provided by the customer
Returns

The declarative component to be used at runtime

def get_model_deprecations(self) -> List[airbyte_cdk.connector_builder.models.LogMessage]:
822    def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]:
823        """
824        Returns the deprecation warnings that were collected during the creation of components.
825        """
826        return self._collected_deprecation_logs

Returns the deprecation warnings that were collected during the creation of components.

843    def create_config_migration(
844        self, model: ConfigMigrationModel, config: Config
845    ) -> ConfigMigration:
846        transformations: List[ConfigTransformation] = [
847            self._create_component_from_model(transformation, config)
848            for transformation in model.transformations
849        ]
850
851        return ConfigMigration(
852            description=model.description,
853            transformations=transformations,
854        )
856    def create_config_add_fields(
857        self, model: ConfigAddFieldsModel, config: Config, **kwargs: Any
858    ) -> ConfigAddFields:
859        fields = [self._create_component_from_model(field, config) for field in model.fields]
860        return ConfigAddFields(
861            fields=fields,
862            condition=model.condition or "",
863        )
865    @staticmethod
866    def create_config_remove_fields(
867        model: ConfigRemoveFieldsModel, config: Config, **kwargs: Any
868    ) -> ConfigRemoveFields:
869        return ConfigRemoveFields(
870            field_pointers=model.field_pointers,
871            condition=model.condition or "",
872        )
@staticmethod
def create_config_remap_field( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ConfigRemapField, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.config_transformations.ConfigRemapField:
874    @staticmethod
875    def create_config_remap_field(
876        model: ConfigRemapFieldModel, config: Config, **kwargs: Any
877    ) -> ConfigRemapField:
878        mapping = cast(Mapping[str, Any], model.map)
879        return ConfigRemapField(
880            map=mapping,
881            field_path=model.field_path,
882            config=config,
883        )
885    def create_dpath_validator(self, model: DpathValidatorModel, config: Config) -> DpathValidator:
886        strategy = self._create_component_from_model(model.validation_strategy, config)
887
888        return DpathValidator(
889            field_path=model.field_path,
890            strategy=strategy,
891        )
893    def create_predicate_validator(
894        self, model: PredicateValidatorModel, config: Config
895    ) -> PredicateValidator:
896        strategy = self._create_component_from_model(model.validation_strategy, config)
897
898        return PredicateValidator(
899            value=model.value,
900            strategy=strategy,
901        )
@staticmethod
def create_validate_adheres_to_schema( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ValidateAdheresToSchema, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.validators.ValidateAdheresToSchema:
903    @staticmethod
904    def create_validate_adheres_to_schema(
905        model: ValidateAdheresToSchemaModel, config: Config, **kwargs: Any
906    ) -> ValidateAdheresToSchema:
907        base_schema = cast(Mapping[str, Any], model.base_schema)
908        return ValidateAdheresToSchema(
909            schema=base_schema,
910        )
@staticmethod
def create_added_field_definition( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.AddedFieldDefinition, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.AddedFieldDefinition:
912    @staticmethod
913    def create_added_field_definition(
914        model: AddedFieldDefinitionModel, config: Config, **kwargs: Any
915    ) -> AddedFieldDefinition:
916        interpolated_value = InterpolatedString.create(
917            model.value, parameters=model.parameters or {}
918        )
919        return AddedFieldDefinition(
920            path=model.path,
921            value=interpolated_value,
922            value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
923            parameters=model.parameters or {},
924        )
def create_add_fields( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.AddFields, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.AddFields:
926    def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields:
927        added_field_definitions = [
928            self._create_component_from_model(
929                model=added_field_definition_model,
930                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
931                    added_field_definition_model.value_type
932                ),
933                config=config,
934            )
935            for added_field_definition_model in model.fields
936        ]
937        return AddFields(
938            fields=added_field_definitions,
939            condition=model.condition or "",
940            parameters=model.parameters or {},
941        )
943    def create_keys_to_lower_transformation(
944        self, model: KeysToLowerModel, config: Config, **kwargs: Any
945    ) -> KeysToLowerTransformation:
946        return KeysToLowerTransformation()
948    def create_keys_to_snake_transformation(
949        self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
950    ) -> KeysToSnakeCaseTransformation:
951        return KeysToSnakeCaseTransformation()
953    def create_keys_replace_transformation(
954        self, model: KeysReplaceModel, config: Config, **kwargs: Any
955    ) -> KeysReplaceTransformation:
956        return KeysReplaceTransformation(
957            old=model.old, new=model.new, parameters=model.parameters or {}
958        )
960    def create_flatten_fields(
961        self, model: FlattenFieldsModel, config: Config, **kwargs: Any
962    ) -> FlattenFields:
963        return FlattenFields(
964            flatten_lists=model.flatten_lists if model.flatten_lists is not None else True
965        )
967    def create_dpath_flatten_fields(
968        self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any
969    ) -> DpathFlattenFields:
970        model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
971        key_transformation = (
972            KeyTransformation(
973                config=config,
974                prefix=model.key_transformation.prefix,
975                suffix=model.key_transformation.suffix,
976                parameters=model.parameters or {},
977            )
978            if model.key_transformation is not None
979            else None
980        )
981        return DpathFlattenFields(
982            config=config,
983            field_path=model_field_path,
984            delete_origin_value=model.delete_origin_value
985            if model.delete_origin_value is not None
986            else False,
987            replace_record=model.replace_record if model.replace_record is not None else False,
988            key_transformation=key_transformation,
989            parameters=model.parameters or {},
990        )
def create_api_key_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ApiKeyAuthenticator, config: Mapping[str, Any], token_provider: Optional[airbyte_cdk.sources.declarative.auth.token_provider.TokenProvider] = None, **kwargs: Any) -> airbyte_cdk.ApiKeyAuthenticator:
1004    def create_api_key_authenticator(
1005        self,
1006        model: ApiKeyAuthenticatorModel,
1007        config: Config,
1008        token_provider: Optional[TokenProvider] = None,
1009        **kwargs: Any,
1010    ) -> ApiKeyAuthenticator:
1011        if model.inject_into is None and model.header is None:
1012            raise ValueError(
1013                "Expected either inject_into or header to be set for ApiKeyAuthenticator"
1014            )
1015
1016        if model.inject_into is not None and model.header is not None:
1017            raise ValueError(
1018                "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option"
1019            )
1020
1021        if token_provider is not None and model.api_token != "":
1022            raise ValueError(
1023                "If token_provider is set, api_token is ignored and has to be set to empty string."
1024            )
1025
1026        request_option = (
1027            self._create_component_from_model(
1028                model.inject_into, config, parameters=model.parameters or {}
1029            )
1030            if model.inject_into
1031            else RequestOption(
1032                inject_into=RequestOptionType.header,
1033                field_name=model.header or "",
1034                parameters=model.parameters or {},
1035            )
1036        )
1037
1038        return ApiKeyAuthenticator(
1039            token_provider=(
1040                token_provider
1041                if token_provider is not None
1042                else InterpolatedStringTokenProvider(
1043                    api_token=model.api_token or "",
1044                    config=config,
1045                    parameters=model.parameters or {},
1046                )
1047            ),
1048            request_option=request_option,
1049            config=config,
1050            parameters=model.parameters or {},
1051        )
1053    def create_legacy_to_per_partition_state_migration(
1054        self,
1055        model: LegacyToPerPartitionStateMigrationModel,
1056        config: Mapping[str, Any],
1057        declarative_stream: DeclarativeStreamModel,
1058    ) -> LegacyToPerPartitionStateMigration:
1059        retriever = declarative_stream.retriever
1060        if not isinstance(retriever, (SimpleRetrieverModel, AsyncRetrieverModel)):
1061            raise ValueError(
1062                f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever or AsyncRetriever. Got {type(retriever)}"
1063            )
1064        partition_router = retriever.partition_router
1065        if not isinstance(
1066            partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel)
1067        ):
1068            raise ValueError(
1069                f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}"
1070            )
1071        if not hasattr(partition_router, "parent_stream_configs"):
1072            raise ValueError(
1073                "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration."
1074            )
1075
1076        if not hasattr(declarative_stream, "incremental_sync"):
1077            raise ValueError(
1078                "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration."
1079            )
1080
1081        return LegacyToPerPartitionStateMigration(
1082            partition_router,  # type: ignore # was already checked above
1083            declarative_stream.incremental_sync,  # type: ignore # was already checked. Migration can be applied only to incremental streams.
1084            config,
1085            declarative_stream.parameters,  # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]
1086        )
def create_session_token_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SessionTokenAuthenticator, config: Mapping[str, Any], name: str, **kwargs: Any) -> Union[airbyte_cdk.ApiKeyAuthenticator, airbyte_cdk.BearerAuthenticator]:
1088    def create_session_token_authenticator(
1089        self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any
1090    ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]:
1091        decoder = (
1092            self._create_component_from_model(model=model.decoder, config=config)
1093            if model.decoder
1094            else JsonDecoder(parameters={})
1095        )
1096        login_requester = self._create_component_from_model(
1097            model=model.login_requester,
1098            config=config,
1099            name=f"{name}_login_requester",
1100            decoder=decoder,
1101        )
1102        token_provider = SessionTokenProvider(
1103            login_requester=login_requester,
1104            session_token_path=model.session_token_path,
1105            expiration_duration=parse_duration(model.expiration_duration)
1106            if model.expiration_duration
1107            else None,
1108            parameters=model.parameters or {},
1109            message_repository=self._message_repository,
1110            decoder=decoder,
1111        )
1112        if model.request_authentication.type == "Bearer":
1113            return ModelToComponentFactory.create_bearer_authenticator(
1114                BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""),  # type: ignore # $parameters has a default value
1115                config,
1116                token_provider=token_provider,
1117            )
1118        else:
1119            return self.create_api_key_authenticator(
1120                ApiKeyAuthenticatorModel(
1121                    type="ApiKeyAuthenticator",
1122                    api_token="",
1123                    inject_into=model.request_authentication.inject_into,
1124                ),  # type: ignore # $parameters and headers default to None
1125                config=config,
1126                token_provider=token_provider,
1127            )
@staticmethod
def create_basic_http_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.BasicHttpAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.BasicHttpAuthenticator:
1129    @staticmethod
1130    def create_basic_http_authenticator(
1131        model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any
1132    ) -> BasicHttpAuthenticator:
1133        return BasicHttpAuthenticator(
1134            password=model.password or "",
1135            username=model.username,
1136            config=config,
1137            parameters=model.parameters or {},
1138        )
@staticmethod
def create_bearer_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.BearerAuthenticator, config: Mapping[str, Any], token_provider: Optional[airbyte_cdk.sources.declarative.auth.token_provider.TokenProvider] = None, **kwargs: Any) -> airbyte_cdk.BearerAuthenticator:
1140    @staticmethod
1141    def create_bearer_authenticator(
1142        model: BearerAuthenticatorModel,
1143        config: Config,
1144        token_provider: Optional[TokenProvider] = None,
1145        **kwargs: Any,
1146    ) -> BearerAuthenticator:
1147        if token_provider is not None and model.api_token != "":
1148            raise ValueError(
1149                "If token_provider is set, api_token is ignored and has to be set to empty string."
1150            )
1151        return BearerAuthenticator(
1152            token_provider=(
1153                token_provider
1154                if token_provider is not None
1155                else InterpolatedStringTokenProvider(
1156                    api_token=model.api_token or "",
1157                    config=config,
1158                    parameters=model.parameters or {},
1159                )
1160            ),
1161            config=config,
1162            parameters=model.parameters or {},
1163        )
@staticmethod
def create_dynamic_stream_check_config( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DynamicStreamCheckConfig, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.checks.DynamicStreamCheckConfig:
1165    @staticmethod
1166    def create_dynamic_stream_check_config(
1167        model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any
1168    ) -> DynamicStreamCheckConfig:
1169        return DynamicStreamCheckConfig(
1170            dynamic_stream_name=model.dynamic_stream_name,
1171            stream_count=model.stream_count or 0,
1172        )
def create_check_stream( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CheckStream, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.checks.CheckStream:
1174    def create_check_stream(
1175        self, model: CheckStreamModel, config: Config, **kwargs: Any
1176    ) -> CheckStream:
1177        if model.dynamic_streams_check_configs is None and model.stream_names is None:
1178            raise ValueError(
1179                "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream"
1180            )
1181
1182        dynamic_streams_check_configs = (
1183            [
1184                self._create_component_from_model(model=dynamic_stream_check_config, config=config)
1185                for dynamic_stream_check_config in model.dynamic_streams_check_configs
1186            ]
1187            if model.dynamic_streams_check_configs
1188            else []
1189        )
1190
1191        return CheckStream(
1192            stream_names=model.stream_names or [],
1193            dynamic_streams_check_configs=dynamic_streams_check_configs,
1194            parameters={},
1195        )
@staticmethod
def create_check_dynamic_stream( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CheckDynamicStream, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.checks.CheckDynamicStream:
1197    @staticmethod
1198    def create_check_dynamic_stream(
1199        model: CheckDynamicStreamModel, config: Config, **kwargs: Any
1200    ) -> CheckDynamicStream:
1201        assert model.use_check_availability is not None  # for mypy
1202
1203        use_check_availability = model.use_check_availability
1204
1205        return CheckDynamicStream(
1206            stream_count=model.stream_count,
1207            use_check_availability=use_check_availability,
1208            parameters={},
1209        )
1211    def create_composite_error_handler(
1212        self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
1213    ) -> CompositeErrorHandler:
1214        error_handlers = [
1215            self._create_component_from_model(model=error_handler_model, config=config)
1216            for error_handler_model in model.error_handlers
1217        ]
1218        return CompositeErrorHandler(
1219            error_handlers=error_handlers, parameters=model.parameters or {}
1220        )
@staticmethod
def create_concurrency_level( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ConcurrencyLevel, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.concurrency_level.ConcurrencyLevel:
1222    @staticmethod
1223    def create_concurrency_level(
1224        model: ConcurrencyLevelModel, config: Config, **kwargs: Any
1225    ) -> ConcurrencyLevel:
1226        return ConcurrencyLevel(
1227            default_concurrency=model.default_concurrency,
1228            max_concurrency=model.max_concurrency,
1229            config=config,
1230            parameters={},
1231        )
@staticmethod
def apply_stream_state_migrations( stream_state_migrations: Optional[List[Any]], stream_state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
1233    @staticmethod
1234    def apply_stream_state_migrations(
1235        stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any]
1236    ) -> MutableMapping[str, Any]:
1237        if stream_state_migrations:
1238            for state_migration in stream_state_migrations:
1239                if state_migration.should_migrate(stream_state):
1240                    # The state variable is expected to be mutable but the migrate method returns an immutable mapping.
1241                    stream_state = dict(state_migration.migrate(stream_state))
1242        return stream_state
def create_concurrent_cursor_from_datetime_based_cursor( self, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], stream_name: str, stream_namespace: Optional[str], config: Mapping[str, Any], message_repository: Optional[airbyte_cdk.MessageRepository] = None, runtime_lookback_window: Optional[datetime.timedelta] = None, stream_state_migrations: Optional[List[Any]] = None, **kwargs: Any) -> airbyte_cdk.ConcurrentCursor:
1244    def create_concurrent_cursor_from_datetime_based_cursor(
1245        self,
1246        model_type: Type[BaseModel],
1247        component_definition: ComponentDefinition,
1248        stream_name: str,
1249        stream_namespace: Optional[str],
1250        config: Config,
1251        message_repository: Optional[MessageRepository] = None,
1252        runtime_lookback_window: Optional[datetime.timedelta] = None,
1253        stream_state_migrations: Optional[List[Any]] = None,
1254        **kwargs: Any,
1255    ) -> ConcurrentCursor:
1256        # Per-partition incremental streams can dynamically create child cursors which will pass their current
1257        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1258        # incoming state and connector_state_manager that is initialized when the component factory is created
1259        stream_state = (
1260            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1261            if "stream_state" not in kwargs
1262            else kwargs["stream_state"]
1263        )
1264        stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1265
1266        component_type = component_definition.get("type")
1267        if component_definition.get("type") != model_type.__name__:
1268            raise ValueError(
1269                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1270            )
1271
1272        datetime_based_cursor_model = model_type.parse_obj(component_definition)
1273
1274        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1275            raise ValueError(
1276                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1277            )
1278
1279        interpolated_cursor_field = InterpolatedString.create(
1280            datetime_based_cursor_model.cursor_field,
1281            parameters=datetime_based_cursor_model.parameters or {},
1282        )
1283        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1284
1285        interpolated_partition_field_start = InterpolatedString.create(
1286            datetime_based_cursor_model.partition_field_start or "start_time",
1287            parameters=datetime_based_cursor_model.parameters or {},
1288        )
1289        interpolated_partition_field_end = InterpolatedString.create(
1290            datetime_based_cursor_model.partition_field_end or "end_time",
1291            parameters=datetime_based_cursor_model.parameters or {},
1292        )
1293
1294        slice_boundary_fields = (
1295            interpolated_partition_field_start.eval(config=config),
1296            interpolated_partition_field_end.eval(config=config),
1297        )
1298
1299        datetime_format = datetime_based_cursor_model.datetime_format
1300
1301        cursor_granularity = (
1302            parse_duration(datetime_based_cursor_model.cursor_granularity)
1303            if datetime_based_cursor_model.cursor_granularity
1304            else None
1305        )
1306
1307        lookback_window = None
1308        interpolated_lookback_window = (
1309            InterpolatedString.create(
1310                datetime_based_cursor_model.lookback_window,
1311                parameters=datetime_based_cursor_model.parameters or {},
1312            )
1313            if datetime_based_cursor_model.lookback_window
1314            else None
1315        )
1316        if interpolated_lookback_window:
1317            evaluated_lookback_window = interpolated_lookback_window.eval(config=config)
1318            if evaluated_lookback_window:
1319                lookback_window = parse_duration(evaluated_lookback_window)
1320
1321        connector_state_converter: DateTimeStreamStateConverter
1322        connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1323            datetime_format=datetime_format,
1324            input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1325            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1326            cursor_granularity=cursor_granularity,
1327        )
1328
1329        # Adjusts the stream state by applying the runtime lookback window.
1330        # This is used to ensure correct state handling in case of failed partitions.
1331        stream_state_value = stream_state.get(cursor_field.cursor_field_key)
1332        if runtime_lookback_window and stream_state_value:
1333            new_stream_state = (
1334                connector_state_converter.parse_timestamp(stream_state_value)
1335                - runtime_lookback_window
1336            )
1337            stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
1338                new_stream_state
1339            )
1340
1341        start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
1342        if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
1343            start_date_runtime_value = self.create_min_max_datetime(
1344                model=datetime_based_cursor_model.start_datetime, config=config
1345            )
1346        else:
1347            start_date_runtime_value = datetime_based_cursor_model.start_datetime
1348
1349        end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]]
1350        if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel):
1351            end_date_runtime_value = self.create_min_max_datetime(
1352                model=datetime_based_cursor_model.end_datetime, config=config
1353            )
1354        else:
1355            end_date_runtime_value = datetime_based_cursor_model.end_datetime
1356
1357        interpolated_start_date = MinMaxDatetime.create(
1358            interpolated_string_or_min_max_datetime=start_date_runtime_value,
1359            parameters=datetime_based_cursor_model.parameters,
1360        )
1361        interpolated_end_date = (
1362            None
1363            if not end_date_runtime_value
1364            else MinMaxDatetime.create(
1365                end_date_runtime_value, datetime_based_cursor_model.parameters
1366            )
1367        )
1368
1369        # If datetime format is not specified then start/end datetime should inherit it from the stream slicer
1370        if not interpolated_start_date.datetime_format:
1371            interpolated_start_date.datetime_format = datetime_format
1372        if interpolated_end_date and not interpolated_end_date.datetime_format:
1373            interpolated_end_date.datetime_format = datetime_format
1374
1375        start_date = interpolated_start_date.get_datetime(config=config)
1376        end_date_provider = (
1377            partial(interpolated_end_date.get_datetime, config)
1378            if interpolated_end_date
1379            else connector_state_converter.get_end_provider()
1380        )
1381
1382        if (
1383            datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity
1384        ) or (
1385            not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity
1386        ):
1387            raise ValueError(
1388                f"If step is defined, cursor_granularity should be as well and vice-versa. "
1389                f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`"
1390            )
1391
1392        # When step is not defined, default to a step size from the starting date to the present moment
1393        step_length = datetime.timedelta.max
1394        interpolated_step = (
1395            InterpolatedString.create(
1396                datetime_based_cursor_model.step,
1397                parameters=datetime_based_cursor_model.parameters or {},
1398            )
1399            if datetime_based_cursor_model.step
1400            else None
1401        )
1402        if interpolated_step:
1403            evaluated_step = interpolated_step.eval(config)
1404            if evaluated_step:
1405                step_length = parse_duration(evaluated_step)
1406
1407        clamping_strategy: ClampingStrategy = NoClamping()
1408        if datetime_based_cursor_model.clamping:
1409            # While it is undesirable to interpolate within the model factory (as opposed to at runtime),
1410            # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
1411            # object which we want to keep agnostic of being low-code
1412            target = InterpolatedString(
1413                string=datetime_based_cursor_model.clamping.target,
1414                parameters=datetime_based_cursor_model.parameters or {},
1415            )
1416            evaluated_target = target.eval(config=config)
1417            match evaluated_target:
1418                case "DAY":
1419                    clamping_strategy = DayClampingStrategy()
1420                    end_date_provider = ClampingEndProvider(
1421                        DayClampingStrategy(is_ceiling=False),
1422                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1423                        granularity=cursor_granularity or datetime.timedelta(seconds=1),
1424                    )
1425                case "WEEK":
1426                    if (
1427                        not datetime_based_cursor_model.clamping.target_details
1428                        or "weekday" not in datetime_based_cursor_model.clamping.target_details
1429                    ):
1430                        raise ValueError(
1431                            "Given WEEK clamping, weekday needs to be provided as target_details"
1432                        )
1433                    weekday = self._assemble_weekday(
1434                        datetime_based_cursor_model.clamping.target_details["weekday"]
1435                    )
1436                    clamping_strategy = WeekClampingStrategy(weekday)
1437                    end_date_provider = ClampingEndProvider(
1438                        WeekClampingStrategy(weekday, is_ceiling=False),
1439                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1440                        granularity=cursor_granularity or datetime.timedelta(days=1),
1441                    )
1442                case "MONTH":
1443                    clamping_strategy = MonthClampingStrategy()
1444                    end_date_provider = ClampingEndProvider(
1445                        MonthClampingStrategy(is_ceiling=False),
1446                        end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1447                        granularity=cursor_granularity or datetime.timedelta(days=1),
1448                    )
1449                case _:
1450                    raise ValueError(
1451                        f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
1452                    )
1453
1454        return ConcurrentCursor(
1455            stream_name=stream_name,
1456            stream_namespace=stream_namespace,
1457            stream_state=stream_state,
1458            message_repository=message_repository or self._message_repository,
1459            connector_state_manager=self._connector_state_manager,
1460            connector_state_converter=connector_state_converter,
1461            cursor_field=cursor_field,
1462            slice_boundary_fields=slice_boundary_fields,
1463            start=start_date,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1464            end_provider=end_date_provider,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1465            lookback_window=lookback_window,
1466            slice_range=step_length,
1467            cursor_granularity=cursor_granularity,
1468            clamping_strategy=clamping_strategy,
1469        )
def create_concurrent_cursor_from_incrementing_count_cursor( self, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], stream_name: str, stream_namespace: Optional[str], config: Mapping[str, Any], message_repository: Optional[airbyte_cdk.MessageRepository] = None, **kwargs: Any) -> airbyte_cdk.ConcurrentCursor:
1471    def create_concurrent_cursor_from_incrementing_count_cursor(
1472        self,
1473        model_type: Type[BaseModel],
1474        component_definition: ComponentDefinition,
1475        stream_name: str,
1476        stream_namespace: Optional[str],
1477        config: Config,
1478        message_repository: Optional[MessageRepository] = None,
1479        **kwargs: Any,
1480    ) -> ConcurrentCursor:
1481        # Per-partition incremental streams can dynamically create child cursors which will pass their current
1482        # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1483        # incoming state and connector_state_manager that is initialized when the component factory is created
1484        stream_state = (
1485            self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1486            if "stream_state" not in kwargs
1487            else kwargs["stream_state"]
1488        )
1489
1490        component_type = component_definition.get("type")
1491        if component_definition.get("type") != model_type.__name__:
1492            raise ValueError(
1493                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1494            )
1495
1496        incrementing_count_cursor_model = model_type.parse_obj(component_definition)
1497
1498        if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
1499            raise ValueError(
1500                f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
1501            )
1502
1503        interpolated_start_value = (
1504            InterpolatedString.create(
1505                incrementing_count_cursor_model.start_value,  # type: ignore
1506                parameters=incrementing_count_cursor_model.parameters or {},
1507            )
1508            if incrementing_count_cursor_model.start_value
1509            else 0
1510        )
1511
1512        interpolated_cursor_field = InterpolatedString.create(
1513            incrementing_count_cursor_model.cursor_field,
1514            parameters=incrementing_count_cursor_model.parameters or {},
1515        )
1516        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1517
1518        connector_state_converter = IncrementingCountStreamStateConverter(
1519            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1520        )
1521
1522        return ConcurrentCursor(
1523            stream_name=stream_name,
1524            stream_namespace=stream_namespace,
1525            stream_state=stream_state,
1526            message_repository=message_repository or self._message_repository,
1527            connector_state_manager=self._connector_state_manager,
1528            connector_state_converter=connector_state_converter,
1529            cursor_field=cursor_field,
1530            slice_boundary_fields=None,
1531            start=interpolated_start_value,  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1532            end_provider=connector_state_converter.get_end_provider(),  # type: ignore  # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1533        )
def create_concurrent_cursor_from_perpartition_cursor( self, state_manager: airbyte_cdk.ConnectorStateManager, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], stream_name: str, stream_namespace: Optional[str], config: Mapping[str, Any], stream_state: MutableMapping[str, Any], partition_router: airbyte_cdk.sources.declarative.partition_routers.PartitionRouter, stream_state_migrations: Optional[List[Any]] = None, **kwargs: Any) -> airbyte_cdk.sources.declarative.incremental.ConcurrentPerPartitionCursor:
1554    def create_concurrent_cursor_from_perpartition_cursor(
1555        self,
1556        state_manager: ConnectorStateManager,
1557        model_type: Type[BaseModel],
1558        component_definition: ComponentDefinition,
1559        stream_name: str,
1560        stream_namespace: Optional[str],
1561        config: Config,
1562        stream_state: MutableMapping[str, Any],
1563        partition_router: PartitionRouter,
1564        stream_state_migrations: Optional[List[Any]] = None,
1565        **kwargs: Any,
1566    ) -> ConcurrentPerPartitionCursor:
1567        component_type = component_definition.get("type")
1568        if component_definition.get("type") != model_type.__name__:
1569            raise ValueError(
1570                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1571            )
1572
1573        datetime_based_cursor_model = model_type.parse_obj(component_definition)
1574
1575        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1576            raise ValueError(
1577                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1578            )
1579
1580        interpolated_cursor_field = InterpolatedString.create(
1581            datetime_based_cursor_model.cursor_field,
1582            parameters=datetime_based_cursor_model.parameters or {},
1583        )
1584        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1585
1586        datetime_format = datetime_based_cursor_model.datetime_format
1587
1588        cursor_granularity = (
1589            parse_duration(datetime_based_cursor_model.cursor_granularity)
1590            if datetime_based_cursor_model.cursor_granularity
1591            else None
1592        )
1593
1594        connector_state_converter: DateTimeStreamStateConverter
1595        connector_state_converter = CustomFormatConcurrentStreamStateConverter(
1596            datetime_format=datetime_format,
1597            input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
1598            is_sequential_state=True,  # ConcurrentPerPartitionCursor only works with sequential state
1599            cursor_granularity=cursor_granularity,
1600        )
1601
1602        # Create the cursor factory
1603        cursor_factory = ConcurrentCursorFactory(
1604            partial(
1605                self.create_concurrent_cursor_from_datetime_based_cursor,
1606                state_manager=state_manager,
1607                model_type=model_type,
1608                component_definition=component_definition,
1609                stream_name=stream_name,
1610                stream_namespace=stream_namespace,
1611                config=config,
1612                message_repository=NoopMessageRepository(),
1613                stream_state_migrations=stream_state_migrations,
1614            )
1615        )
1616
1617        stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1618        # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
1619        use_global_cursor = isinstance(
1620            partition_router, GroupingPartitionRouter
1621        ) or component_definition.get("global_substream_cursor", False)
1622
1623        # Return the concurrent cursor and state converter
1624        return ConcurrentPerPartitionCursor(
1625            cursor_factory=cursor_factory,
1626            partition_router=partition_router,
1627            stream_name=stream_name,
1628            stream_namespace=stream_namespace,
1629            stream_state=stream_state,
1630            message_repository=self._message_repository,  # type: ignore
1631            connector_state_manager=state_manager,
1632            connector_state_converter=connector_state_converter,
1633            cursor_field=cursor_field,
1634            use_global_cursor=use_global_cursor,
1635        )
1637    @staticmethod
1638    def create_constant_backoff_strategy(
1639        model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
1640    ) -> ConstantBackoffStrategy:
1641        return ConstantBackoffStrategy(
1642            backoff_time_in_seconds=model.backoff_time_in_seconds,
1643            config=config,
1644            parameters=model.parameters or {},
1645        )
def create_cursor_pagination( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CursorPagination, config: Mapping[str, Any], decoder: airbyte_cdk.Decoder, **kwargs: Any) -> airbyte_cdk.CursorPaginationStrategy:
1647    def create_cursor_pagination(
1648        self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
1649    ) -> CursorPaginationStrategy:
1650        if isinstance(decoder, PaginationDecoderDecorator):
1651            inner_decoder = decoder.decoder
1652        else:
1653            inner_decoder = decoder
1654            decoder = PaginationDecoderDecorator(decoder=decoder)
1655
1656        if self._is_supported_decoder_for_pagination(inner_decoder):
1657            decoder_to_use = decoder
1658        else:
1659            raise ValueError(
1660                self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1661            )
1662
1663        return CursorPaginationStrategy(
1664            cursor_value=model.cursor_value,
1665            decoder=decoder_to_use,
1666            page_size=model.page_size,
1667            stop_condition=model.stop_condition,
1668            config=config,
1669            parameters=model.parameters or {},
1670        )
def create_custom_component(self, model: Any, config: Mapping[str, Any], **kwargs: Any) -> Any:
1672    def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any:
1673        """
1674        Generically creates a custom component based on the model type and a class_name reference to the custom Python class being
1675        instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
1676        :param model: The Pydantic model of the custom component being created
1677        :param config: The custom defined connector config
1678        :return: The declarative component built from the Pydantic model to be used at runtime
1679        """
1680        custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
1681        component_fields = get_type_hints(custom_component_class)
1682        model_args = model.dict()
1683        model_args["config"] = config
1684
1685        # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions
1686        # we defer to these arguments over the component's definition
1687        for key, arg in kwargs.items():
1688            model_args[key] = arg
1689
1690        # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not
1691        # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to
1692        # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components
1693        for model_field, model_value in model_args.items():
1694            # If a custom component field doesn't have a type set, we try to use the type hints to infer the type
1695            if (
1696                isinstance(model_value, dict)
1697                and "type" not in model_value
1698                and model_field in component_fields
1699            ):
1700                derived_type = self._derive_component_type_from_type_hints(
1701                    component_fields.get(model_field)
1702                )
1703                if derived_type:
1704                    model_value["type"] = derived_type
1705
1706            if self._is_component(model_value):
1707                model_args[model_field] = self._create_nested_component(
1708                    model, model_field, model_value, config
1709                )
1710            elif isinstance(model_value, list):
1711                vals = []
1712                for v in model_value:
1713                    if isinstance(v, dict) and "type" not in v and model_field in component_fields:
1714                        derived_type = self._derive_component_type_from_type_hints(
1715                            component_fields.get(model_field)
1716                        )
1717                        if derived_type:
1718                            v["type"] = derived_type
1719                    if self._is_component(v):
1720                        vals.append(self._create_nested_component(model, model_field, v, config))
1721                    else:
1722                        vals.append(v)
1723                model_args[model_field] = vals
1724
1725        kwargs = {
1726            class_field: model_args[class_field]
1727            for class_field in component_fields.keys()
1728            if class_field in model_args
1729        }
1730        return custom_component_class(**kwargs)

Generically creates a custom component based on the model type and a class_name reference to the custom Python class being instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor

Parameters
  • model: The Pydantic model of the custom component being created
  • config: The custom defined connector config
Returns

The declarative component built from the Pydantic model to be used at runtime

@staticmethod
def is_builtin_type(cls: Optional[Type[Any]]) -> bool:
1795    @staticmethod
1796    def is_builtin_type(cls: Optional[Type[Any]]) -> bool:
1797        if not cls:
1798            return False
1799        return cls.__module__ == "builtins"
def create_datetime_based_cursor( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DatetimeBasedCursor, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DatetimeBasedCursor:
1862    def create_datetime_based_cursor(
1863        self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any
1864    ) -> DatetimeBasedCursor:
1865        start_datetime: Union[str, MinMaxDatetime] = (
1866            model.start_datetime
1867            if isinstance(model.start_datetime, str)
1868            else self.create_min_max_datetime(model.start_datetime, config)
1869        )
1870        end_datetime: Union[str, MinMaxDatetime, None] = None
1871        if model.is_data_feed and model.end_datetime:
1872            raise ValueError("Data feed does not support end_datetime")
1873        if model.is_data_feed and model.is_client_side_incremental:
1874            raise ValueError(
1875                "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them."
1876            )
1877        if model.end_datetime:
1878            end_datetime = (
1879                model.end_datetime
1880                if isinstance(model.end_datetime, str)
1881                else self.create_min_max_datetime(model.end_datetime, config)
1882            )
1883
1884        end_time_option = (
1885            self._create_component_from_model(
1886                model.end_time_option, config, parameters=model.parameters or {}
1887            )
1888            if model.end_time_option
1889            else None
1890        )
1891        start_time_option = (
1892            self._create_component_from_model(
1893                model.start_time_option, config, parameters=model.parameters or {}
1894            )
1895            if model.start_time_option
1896            else None
1897        )
1898
1899        return DatetimeBasedCursor(
1900            cursor_field=model.cursor_field,
1901            cursor_datetime_formats=model.cursor_datetime_formats
1902            if model.cursor_datetime_formats
1903            else [],
1904            cursor_granularity=model.cursor_granularity,
1905            datetime_format=model.datetime_format,
1906            end_datetime=end_datetime,
1907            start_datetime=start_datetime,
1908            step=model.step,
1909            end_time_option=end_time_option,
1910            lookback_window=model.lookback_window,
1911            start_time_option=start_time_option,
1912            partition_field_end=model.partition_field_end,
1913            partition_field_start=model.partition_field_start,
1914            message_repository=self._message_repository,
1915            is_compare_strictly=model.is_compare_strictly,
1916            config=config,
1917            parameters=model.parameters or {},
1918        )
def create_declarative_stream( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DeclarativeStream, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DeclarativeStream:
1920    def create_declarative_stream(
1921        self, model: DeclarativeStreamModel, config: Config, **kwargs: Any
1922    ) -> DeclarativeStream:
1923        # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
1924        # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
1925        # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
1926        # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one.
1927        combined_slicers = self._merge_stream_slicers(model=model, config=config)
1928
1929        primary_key = model.primary_key.__root__ if model.primary_key else None
1930        stop_condition_on_cursor = (
1931            model.incremental_sync
1932            and hasattr(model.incremental_sync, "is_data_feed")
1933            and model.incremental_sync.is_data_feed
1934        )
1935        client_side_incremental_sync = None
1936        if (
1937            model.incremental_sync
1938            and hasattr(model.incremental_sync, "is_client_side_incremental")
1939            and model.incremental_sync.is_client_side_incremental
1940        ):
1941            supported_slicers = (
1942                DatetimeBasedCursor,
1943                GlobalSubstreamCursor,
1944                PerPartitionWithGlobalCursor,
1945            )
1946            if combined_slicers and not isinstance(combined_slicers, supported_slicers):
1947                raise ValueError(
1948                    "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1949                )
1950            cursor = (
1951                combined_slicers
1952                if isinstance(
1953                    combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1954                )
1955                else self._create_component_from_model(model=model.incremental_sync, config=config)
1956            )
1957
1958            client_side_incremental_sync = {"cursor": cursor}
1959
1960        if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1961            cursor_model = model.incremental_sync
1962
1963            end_time_option = (
1964                self._create_component_from_model(
1965                    cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
1966                )
1967                if cursor_model.end_time_option
1968                else None
1969            )
1970            start_time_option = (
1971                self._create_component_from_model(
1972                    cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
1973                )
1974                if cursor_model.start_time_option
1975                else None
1976            )
1977
1978            request_options_provider = DatetimeBasedRequestOptionsProvider(
1979                start_time_option=start_time_option,
1980                end_time_option=end_time_option,
1981                partition_field_start=cursor_model.partition_field_end,
1982                partition_field_end=cursor_model.partition_field_end,
1983                config=config,
1984                parameters=model.parameters or {},
1985            )
1986        elif model.incremental_sync and isinstance(
1987            model.incremental_sync, IncrementingCountCursorModel
1988        ):
1989            cursor_model: IncrementingCountCursorModel = model.incremental_sync  # type: ignore
1990
1991            start_time_option = (
1992                self._create_component_from_model(
1993                    cursor_model.start_value_option,  # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1994                    config,
1995                    parameters=cursor_model.parameters or {},
1996                )
1997                if cursor_model.start_value_option  # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1998                else None
1999            )
2000
2001            # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
2002            # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
2003            partition_field_start = "start"
2004
2005            request_options_provider = DatetimeBasedRequestOptionsProvider(
2006                start_time_option=start_time_option,
2007                partition_field_start=partition_field_start,
2008                config=config,
2009                parameters=model.parameters or {},
2010            )
2011        else:
2012            request_options_provider = None
2013
2014        transformations = []
2015        if model.transformations:
2016            for transformation_model in model.transformations:
2017                transformations.append(
2018                    self._create_component_from_model(model=transformation_model, config=config)
2019                )
2020        file_uploader = None
2021        if model.file_uploader:
2022            file_uploader = self._create_component_from_model(
2023                model=model.file_uploader, config=config
2024            )
2025
2026        retriever = self._create_component_from_model(
2027            model=model.retriever,
2028            config=config,
2029            name=model.name,
2030            primary_key=primary_key,
2031            stream_slicer=combined_slicers,
2032            request_options_provider=request_options_provider,
2033            stop_condition_on_cursor=stop_condition_on_cursor,
2034            client_side_incremental_sync=client_side_incremental_sync,
2035            transformations=transformations,
2036            file_uploader=file_uploader,
2037            incremental_sync=model.incremental_sync,
2038        )
2039        cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
2040
2041        if model.state_migrations:
2042            state_transformations = [
2043                self._create_component_from_model(state_migration, config, declarative_stream=model)
2044                for state_migration in model.state_migrations
2045            ]
2046        else:
2047            state_transformations = []
2048
2049        schema_loader: Union[
2050            CompositeSchemaLoader,
2051            DefaultSchemaLoader,
2052            DynamicSchemaLoader,
2053            InlineSchemaLoader,
2054            JsonFileSchemaLoader,
2055        ]
2056        if model.schema_loader and isinstance(model.schema_loader, list):
2057            nested_schema_loaders = [
2058                self._create_component_from_model(model=nested_schema_loader, config=config)
2059                for nested_schema_loader in model.schema_loader
2060            ]
2061            schema_loader = CompositeSchemaLoader(
2062                schema_loaders=nested_schema_loaders, parameters={}
2063            )
2064        elif model.schema_loader:
2065            schema_loader = self._create_component_from_model(
2066                model=model.schema_loader,  # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models
2067                config=config,
2068            )
2069        else:
2070            options = model.parameters or {}
2071            if "name" not in options:
2072                options["name"] = model.name
2073            schema_loader = DefaultSchemaLoader(config=config, parameters=options)
2074
2075        return DeclarativeStream(
2076            name=model.name or "",
2077            primary_key=primary_key,
2078            retriever=retriever,
2079            schema_loader=schema_loader,
2080            stream_cursor_field=cursor_field or "",
2081            state_migrations=state_transformations,
2082            config=config,
2083            parameters=model.parameters or {},
2084        )
2253    def create_default_error_handler(
2254        self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
2255    ) -> DefaultErrorHandler:
2256        backoff_strategies = []
2257        if model.backoff_strategies:
2258            for backoff_strategy_model in model.backoff_strategies:
2259                backoff_strategies.append(
2260                    self._create_component_from_model(model=backoff_strategy_model, config=config)
2261                )
2262
2263        response_filters = []
2264        if model.response_filters:
2265            for response_filter_model in model.response_filters:
2266                response_filters.append(
2267                    self._create_component_from_model(model=response_filter_model, config=config)
2268                )
2269        response_filters.append(
2270            HttpResponseFilter(config=config, parameters=model.parameters or {})
2271        )
2272
2273        return DefaultErrorHandler(
2274            backoff_strategies=backoff_strategies,
2275            max_retries=model.max_retries,
2276            response_filters=response_filters,
2277            config=config,
2278            parameters=model.parameters or {},
2279        )
2281    def create_default_paginator(
2282        self,
2283        model: DefaultPaginatorModel,
2284        config: Config,
2285        *,
2286        url_base: str,
2287        extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None,
2288        decoder: Optional[Decoder] = None,
2289        cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
2290    ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
2291        if decoder:
2292            if self._is_supported_decoder_for_pagination(decoder):
2293                decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
2294            else:
2295                raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder)))
2296        else:
2297            decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
2298        page_size_option = (
2299            self._create_component_from_model(model=model.page_size_option, config=config)
2300            if model.page_size_option
2301            else None
2302        )
2303        page_token_option = (
2304            self._create_component_from_model(model=model.page_token_option, config=config)
2305            if model.page_token_option
2306            else None
2307        )
2308        pagination_strategy = self._create_component_from_model(
2309            model=model.pagination_strategy,
2310            config=config,
2311            decoder=decoder_to_use,
2312            extractor_model=extractor_model,
2313        )
2314        if cursor_used_for_stop_condition:
2315            pagination_strategy = StopConditionPaginationStrategyDecorator(
2316                pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition)
2317            )
2318        paginator = DefaultPaginator(
2319            decoder=decoder_to_use,
2320            page_size_option=page_size_option,
2321            page_token_option=page_token_option,
2322            pagination_strategy=pagination_strategy,
2323            url_base=url_base,
2324            config=config,
2325            parameters=model.parameters or {},
2326        )
2327        if self._limit_pages_fetched_per_slice:
2328            return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice)
2329        return paginator
def create_dpath_extractor( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DpathExtractor, config: Mapping[str, Any], decoder: Optional[airbyte_cdk.Decoder] = None, **kwargs: Any) -> airbyte_cdk.DpathExtractor:
2331    def create_dpath_extractor(
2332        self,
2333        model: DpathExtractorModel,
2334        config: Config,
2335        decoder: Optional[Decoder] = None,
2336        **kwargs: Any,
2337    ) -> DpathExtractor:
2338        if decoder:
2339            decoder_to_use = decoder
2340        else:
2341            decoder_to_use = JsonDecoder(parameters={})
2342        model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path]
2343        return DpathExtractor(
2344            decoder=decoder_to_use,
2345            field_path=model_field_path,
2346            config=config,
2347            parameters=model.parameters or {},
2348        )
2350    @staticmethod
2351    def create_response_to_file_extractor(
2352        model: ResponseToFileExtractorModel,
2353        **kwargs: Any,
2354    ) -> ResponseToFileExtractor:
2355        return ResponseToFileExtractor(parameters=model.parameters or {})
2357    @staticmethod
2358    def create_exponential_backoff_strategy(
2359        model: ExponentialBackoffStrategyModel, config: Config
2360    ) -> ExponentialBackoffStrategy:
2361        return ExponentialBackoffStrategy(
2362            factor=model.factor or 5, parameters=model.parameters or {}, config=config
2363        )
2365    @staticmethod
2366    def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey:
2367        return GroupByKey(model.key, config=config, parameters=model.parameters or {})
def create_http_requester( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpRequester, config: Mapping[str, Any], decoder: airbyte_cdk.Decoder = JsonDecoder(), query_properties_key: Optional[str] = None, use_cache: Optional[bool] = None, *, name: str) -> airbyte_cdk.HttpRequester:
2369    def create_http_requester(
2370        self,
2371        model: HttpRequesterModel,
2372        config: Config,
2373        decoder: Decoder = JsonDecoder(parameters={}),
2374        query_properties_key: Optional[str] = None,
2375        use_cache: Optional[bool] = None,
2376        *,
2377        name: str,
2378    ) -> HttpRequester:
2379        authenticator = (
2380            self._create_component_from_model(
2381                model=model.authenticator,
2382                config=config,
2383                url_base=model.url or model.url_base,
2384                name=name,
2385                decoder=decoder,
2386            )
2387            if model.authenticator
2388            else None
2389        )
2390        error_handler = (
2391            self._create_component_from_model(model=model.error_handler, config=config)
2392            if model.error_handler
2393            else DefaultErrorHandler(
2394                backoff_strategies=[],
2395                response_filters=[],
2396                config=config,
2397                parameters=model.parameters or {},
2398            )
2399        )
2400
2401        api_budget = self._api_budget
2402
2403        # Removes QueryProperties components from the interpolated mappings because it has been designed
2404        # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly
2405        # instead of through jinja interpolation
2406        request_parameters: Optional[Union[str, Mapping[str, str]]]
2407        if isinstance(model.request_parameters, Mapping):
2408            request_parameters = self._remove_query_properties(model.request_parameters)
2409        else:
2410            request_parameters = model.request_parameters
2411
2412        request_options_provider = InterpolatedRequestOptionsProvider(
2413            request_body=model.request_body,
2414            request_body_data=model.request_body_data,
2415            request_body_json=model.request_body_json,
2416            request_headers=model.request_headers,
2417            request_parameters=request_parameters,
2418            query_properties_key=query_properties_key,
2419            config=config,
2420            parameters=model.parameters or {},
2421        )
2422
2423        assert model.use_cache is not None  # for mypy
2424        assert model.http_method is not None  # for mypy
2425
2426        should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache
2427
2428        return HttpRequester(
2429            name=name,
2430            url=model.url,
2431            url_base=model.url_base,
2432            path=model.path,
2433            authenticator=authenticator,
2434            error_handler=error_handler,
2435            api_budget=api_budget,
2436            http_method=HttpMethod[model.http_method.value],
2437            request_options_provider=request_options_provider,
2438            config=config,
2439            disable_retries=self._disable_retries,
2440            parameters=model.parameters or {},
2441            message_repository=self._message_repository,
2442            use_cache=should_use_cache,
2443            decoder=decoder,
2444            stream_response=decoder.is_stream_response() if decoder else False,
2445        )
@staticmethod
def create_http_response_filter( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpResponseFilter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.HttpResponseFilter:
2447    @staticmethod
2448    def create_http_response_filter(
2449        model: HttpResponseFilterModel, config: Config, **kwargs: Any
2450    ) -> HttpResponseFilter:
2451        if model.action:
2452            action = ResponseAction(model.action.value)
2453        else:
2454            action = None
2455
2456        failure_type = FailureType(model.failure_type.value) if model.failure_type else None
2457
2458        http_codes = (
2459            set(model.http_codes) if model.http_codes else set()
2460        )  # JSON schema notation has no set data type. The schema enforces an array of unique elements
2461
2462        return HttpResponseFilter(
2463            action=action,
2464            failure_type=failure_type,
2465            error_message=model.error_message or "",
2466            error_message_contains=model.error_message_contains or "",
2467            http_codes=http_codes,
2468            predicate=model.predicate or "",
2469            config=config,
2470            parameters=model.parameters or {},
2471        )
@staticmethod
def create_inline_schema_loader( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.InlineSchemaLoader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.InlineSchemaLoader:
2473    @staticmethod
2474    def create_inline_schema_loader(
2475        model: InlineSchemaLoaderModel, config: Config, **kwargs: Any
2476    ) -> InlineSchemaLoader:
2477        return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
def create_complex_field_type( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ComplexFieldType, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.ComplexFieldType:
2479    def create_complex_field_type(
2480        self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
2481    ) -> ComplexFieldType:
2482        items = (
2483            self._create_component_from_model(model=model.items, config=config)
2484            if isinstance(model.items, ComplexFieldTypeModel)
2485            else model.items
2486        )
2487
2488        return ComplexFieldType(field_type=model.field_type, items=items)
def create_types_map( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.TypesMap, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.TypesMap:
2490    def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
2491        target_type = (
2492            self._create_component_from_model(model=model.target_type, config=config)
2493            if isinstance(model.target_type, ComplexFieldTypeModel)
2494            else model.target_type
2495        )
2496
2497        return TypesMap(
2498            target_type=target_type,
2499            current_type=model.current_type,
2500            condition=model.condition if model.condition is not None else "True",
2501        )
def create_schema_type_identifier( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SchemaTypeIdentifier, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.SchemaTypeIdentifier:
2503    def create_schema_type_identifier(
2504        self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
2505    ) -> SchemaTypeIdentifier:
2506        types_mapping = []
2507        if model.types_mapping:
2508            types_mapping.extend(
2509                [
2510                    self._create_component_from_model(types_map, config=config)
2511                    for types_map in model.types_mapping
2512                ]
2513            )
2514        model_schema_pointer: List[Union[InterpolatedString, str]] = (
2515            [x for x in model.schema_pointer] if model.schema_pointer else []
2516        )
2517        model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
2518        model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
2519            [x for x in model.type_pointer] if model.type_pointer else None
2520        )
2521
2522        return SchemaTypeIdentifier(
2523            schema_pointer=model_schema_pointer,
2524            key_pointer=model_key_pointer,
2525            type_pointer=model_type_pointer,
2526            types_mapping=types_mapping,
2527            parameters=model.parameters or {},
2528        )
def create_dynamic_schema_loader( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DynamicSchemaLoader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.DynamicSchemaLoader:
2530    def create_dynamic_schema_loader(
2531        self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
2532    ) -> DynamicSchemaLoader:
2533        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
2534        combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
2535
2536        schema_transformations = []
2537        if model.schema_transformations:
2538            for transformation_model in model.schema_transformations:
2539                schema_transformations.append(
2540                    self._create_component_from_model(model=transformation_model, config=config)
2541                )
2542        name = "dynamic_properties"
2543        retriever = self._create_component_from_model(
2544            model=model.retriever,
2545            config=config,
2546            name=name,
2547            primary_key=None,
2548            stream_slicer=combined_slicers,
2549            transformations=[],
2550            use_cache=True,
2551            log_formatter=(
2552                lambda response: format_http_message(
2553                    response,
2554                    f"Schema loader '{name}' request",
2555                    f"Request performed in order to extract schema.",
2556                    name,
2557                    is_auxiliary=True,
2558                )
2559            ),
2560        )
2561        schema_type_identifier = self._create_component_from_model(
2562            model.schema_type_identifier, config=config, parameters=model.parameters or {}
2563        )
2564        schema_filter = (
2565            self._create_component_from_model(
2566                model.schema_filter, config=config, parameters=model.parameters or {}
2567            )
2568            if model.schema_filter is not None
2569            else None
2570        )
2571
2572        return DynamicSchemaLoader(
2573            retriever=retriever,
2574            config=config,
2575            schema_transformations=schema_transformations,
2576            schema_filter=schema_filter,
2577            schema_type_identifier=schema_type_identifier,
2578            parameters=model.parameters or {},
2579        )
@staticmethod
def create_json_decoder( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JsonDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
2581    @staticmethod
2582    def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2583        return JsonDecoder(parameters={})
def create_csv_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CsvDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
2585    def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2586        return CompositeRawDecoder(
2587            parser=ModelToComponentFactory._get_parser(model, config),
2588            stream_response=False if self._emit_connector_builder_messages else True,
2589        )
def create_jsonl_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JsonlDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
2591    def create_jsonl_decoder(
2592        self, model: JsonlDecoderModel, config: Config, **kwargs: Any
2593    ) -> Decoder:
2594        return CompositeRawDecoder(
2595            parser=ModelToComponentFactory._get_parser(model, config),
2596            stream_response=False if self._emit_connector_builder_messages else True,
2597        )
def create_gzip_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.GzipDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
2599    def create_gzip_decoder(
2600        self, model: GzipDecoderModel, config: Config, **kwargs: Any
2601    ) -> Decoder:
2602        _compressed_response_types = {
2603            "gzip",
2604            "x-gzip",
2605            "gzip, deflate",
2606            "x-gzip, deflate",
2607            "application/zip",
2608            "application/gzip",
2609            "application/x-gzip",
2610            "application/x-zip-compressed",
2611        }
2612
2613        gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config)  # type: ignore  # based on the model, we know this will be a GzipParser
2614
2615        if self._emit_connector_builder_messages:
2616            # This is very surprising but if the response is not streamed,
2617            # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw,
2618            # which uses urllib3 directly and does not uncompress the data.
2619            return CompositeRawDecoder(gzip_parser.inner_parser, False)
2620
2621        return CompositeRawDecoder.by_headers(
2622            [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
2623            stream_response=True,
2624            fallback_parser=gzip_parser.inner_parser,
2625        )
@staticmethod
def create_incrementing_count_cursor( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.IncrementingCountCursor, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DatetimeBasedCursor:
2627    @staticmethod
2628    def create_incrementing_count_cursor(
2629        model: IncrementingCountCursorModel, config: Config, **kwargs: Any
2630    ) -> DatetimeBasedCursor:
2631        # This should not actually get used anywhere at runtime, but needed to add this to pass checks since
2632        # we still parse models into components. The issue is that there's no runtime implementation of a
2633        # IncrementingCountCursor.
2634        # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
2635        return DatetimeBasedCursor(
2636            cursor_field=model.cursor_field,
2637            datetime_format="%Y-%m-%d",
2638            start_datetime="2024-12-12",
2639            config=config,
2640            parameters={},
2641        )
@staticmethod
def create_iterable_decoder( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.IterableDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.decoders.IterableDecoder:
2643    @staticmethod
2644    def create_iterable_decoder(
2645        model: IterableDecoderModel, config: Config, **kwargs: Any
2646    ) -> IterableDecoder:
2647        return IterableDecoder(parameters={})
@staticmethod
def create_xml_decoder( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.XmlDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.decoders.XmlDecoder:
2649    @staticmethod
2650    def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
2651        return XmlDecoder(parameters={})
def create_zipfile_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ZipfileDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.decoders.ZipfileDecoder:
2653    def create_zipfile_decoder(
2654        self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
2655    ) -> ZipfileDecoder:
2656        return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config))
@staticmethod
def create_json_file_schema_loader( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JsonFileSchemaLoader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.JsonFileSchemaLoader:
2682    @staticmethod
2683    def create_json_file_schema_loader(
2684        model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
2685    ) -> JsonFileSchemaLoader:
2686        return JsonFileSchemaLoader(
2687            file_path=model.file_path or "", config=config, parameters=model.parameters or {}
2688        )
@staticmethod
def create_jwt_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JwtAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.auth.JwtAuthenticator:
2690    @staticmethod
2691    def create_jwt_authenticator(
2692        model: JwtAuthenticatorModel, config: Config, **kwargs: Any
2693    ) -> JwtAuthenticator:
2694        jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None)
2695        jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None)
2696        return JwtAuthenticator(
2697            config=config,
2698            parameters=model.parameters or {},
2699            algorithm=JwtAlgorithm(model.algorithm.value),
2700            secret_key=model.secret_key,
2701            base64_encode_secret_key=model.base64_encode_secret_key,
2702            token_duration=model.token_duration,
2703            header_prefix=model.header_prefix,
2704            kid=jwt_headers.kid,
2705            typ=jwt_headers.typ,
2706            cty=jwt_headers.cty,
2707            iss=jwt_payload.iss,
2708            sub=jwt_payload.sub,
2709            aud=jwt_payload.aud,
2710            additional_jwt_headers=model.additional_jwt_headers,
2711            additional_jwt_payload=model.additional_jwt_payload,
2712        )
def create_list_partition_router( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ListPartitionRouter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.partition_routers.ListPartitionRouter:
2714    def create_list_partition_router(
2715        self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
2716    ) -> ListPartitionRouter:
2717        request_option = (
2718            self._create_component_from_model(model.request_option, config)
2719            if model.request_option
2720            else None
2721        )
2722        return ListPartitionRouter(
2723            cursor_field=model.cursor_field,
2724            request_option=request_option,
2725            values=model.values,
2726            config=config,
2727            parameters=model.parameters or {},
2728        )
@staticmethod
def create_min_max_datetime( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.MinMaxDatetime, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.MinMaxDatetime:
2730    @staticmethod
2731    def create_min_max_datetime(
2732        model: MinMaxDatetimeModel, config: Config, **kwargs: Any
2733    ) -> MinMaxDatetime:
2734        return MinMaxDatetime(
2735            datetime=model.datetime,
2736            datetime_format=model.datetime_format or "",
2737            max_datetime=model.max_datetime or "",
2738            min_datetime=model.min_datetime or "",
2739            parameters=model.parameters or {},
2740        )
@staticmethod
def create_no_auth( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.NoAuth, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.NoAuth:
2742    @staticmethod
2743    def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth:
2744        return NoAuth(parameters=model.parameters or {})
@staticmethod
def create_no_pagination( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.NoPagination, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.paginators.NoPagination:
2746    @staticmethod
2747    def create_no_pagination(
2748        model: NoPaginationModel, config: Config, **kwargs: Any
2749    ) -> NoPagination:
2750        return NoPagination(parameters={})
def create_oauth_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.OAuthAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DeclarativeOauth2Authenticator:
2752    def create_oauth_authenticator(
2753        self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
2754    ) -> DeclarativeOauth2Authenticator:
2755        profile_assertion = (
2756            self._create_component_from_model(model.profile_assertion, config=config)
2757            if model.profile_assertion
2758            else None
2759        )
2760
2761        if model.refresh_token_updater:
2762            # ignore type error because fixing it would have a lot of dependencies, revisit later
2763            return DeclarativeSingleUseRefreshTokenOauth2Authenticator(  # type: ignore
2764                config,
2765                InterpolatedString.create(
2766                    model.token_refresh_endpoint,  # type: ignore
2767                    parameters=model.parameters or {},
2768                ).eval(config),
2769                access_token_name=InterpolatedString.create(
2770                    model.access_token_name or "access_token", parameters=model.parameters or {}
2771                ).eval(config),
2772                refresh_token_name=model.refresh_token_updater.refresh_token_name,
2773                expires_in_name=InterpolatedString.create(
2774                    model.expires_in_name or "expires_in", parameters=model.parameters or {}
2775                ).eval(config),
2776                client_id_name=InterpolatedString.create(
2777                    model.client_id_name or "client_id", parameters=model.parameters or {}
2778                ).eval(config),
2779                client_id=InterpolatedString.create(
2780                    model.client_id, parameters=model.parameters or {}
2781                ).eval(config)
2782                if model.client_id
2783                else model.client_id,
2784                client_secret_name=InterpolatedString.create(
2785                    model.client_secret_name or "client_secret", parameters=model.parameters or {}
2786                ).eval(config),
2787                client_secret=InterpolatedString.create(
2788                    model.client_secret, parameters=model.parameters or {}
2789                ).eval(config)
2790                if model.client_secret
2791                else model.client_secret,
2792                access_token_config_path=model.refresh_token_updater.access_token_config_path,
2793                refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
2794                token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
2795                grant_type_name=InterpolatedString.create(
2796                    model.grant_type_name or "grant_type", parameters=model.parameters or {}
2797                ).eval(config),
2798                grant_type=InterpolatedString.create(
2799                    model.grant_type or "refresh_token", parameters=model.parameters or {}
2800                ).eval(config),
2801                refresh_request_body=InterpolatedMapping(
2802                    model.refresh_request_body or {}, parameters=model.parameters or {}
2803                ).eval(config),
2804                refresh_request_headers=InterpolatedMapping(
2805                    model.refresh_request_headers or {}, parameters=model.parameters or {}
2806                ).eval(config),
2807                scopes=model.scopes,
2808                token_expiry_date_format=model.token_expiry_date_format,
2809                token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
2810                message_repository=self._message_repository,
2811                refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes,
2812                refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key,
2813                refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values,
2814            )
2815        # ignore type error because fixing it would have a lot of dependencies, revisit later
2816        return DeclarativeOauth2Authenticator(  # type: ignore
2817            access_token_name=model.access_token_name or "access_token",
2818            access_token_value=model.access_token_value,
2819            client_id_name=model.client_id_name or "client_id",
2820            client_id=model.client_id,
2821            client_secret_name=model.client_secret_name or "client_secret",
2822            client_secret=model.client_secret,
2823            expires_in_name=model.expires_in_name or "expires_in",
2824            grant_type_name=model.grant_type_name or "grant_type",
2825            grant_type=model.grant_type or "refresh_token",
2826            refresh_request_body=model.refresh_request_body,
2827            refresh_request_headers=model.refresh_request_headers,
2828            refresh_token_name=model.refresh_token_name or "refresh_token",
2829            refresh_token=model.refresh_token,
2830            scopes=model.scopes,
2831            token_expiry_date=model.token_expiry_date,
2832            token_expiry_date_format=model.token_expiry_date_format,
2833            token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format),
2834            token_refresh_endpoint=model.token_refresh_endpoint,
2835            config=config,
2836            parameters=model.parameters or {},
2837            message_repository=self._message_repository,
2838            profile_assertion=profile_assertion,
2839            use_profile_assertion=model.use_profile_assertion,
2840        )
2842    def create_offset_increment(
2843        self,
2844        model: OffsetIncrementModel,
2845        config: Config,
2846        decoder: Decoder,
2847        extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None,
2848        **kwargs: Any,
2849    ) -> OffsetIncrement:
2850        if isinstance(decoder, PaginationDecoderDecorator):
2851            inner_decoder = decoder.decoder
2852        else:
2853            inner_decoder = decoder
2854            decoder = PaginationDecoderDecorator(decoder=decoder)
2855
2856        if self._is_supported_decoder_for_pagination(inner_decoder):
2857            decoder_to_use = decoder
2858        else:
2859            raise ValueError(
2860                self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
2861            )
2862
2863        # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever)
2864        # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the
2865        # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing
2866        # behavior by having two separate extractors with identical behavior since they use the same extractor model.
2867        # When we have more time to investigate we can look into reusing the same component.
2868        extractor = (
2869            self._create_component_from_model(
2870                model=extractor_model, config=config, decoder=decoder_to_use
2871            )
2872            if extractor_model
2873            else None
2874        )
2875
2876        return OffsetIncrement(
2877            page_size=model.page_size,
2878            config=config,
2879            decoder=decoder_to_use,
2880            extractor=extractor,
2881            inject_on_first_request=model.inject_on_first_request or False,
2882            parameters=model.parameters or {},
2883        )
@staticmethod
def create_page_increment( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.PageIncrement, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.PageIncrement:
2885    @staticmethod
2886    def create_page_increment(
2887        model: PageIncrementModel, config: Config, **kwargs: Any
2888    ) -> PageIncrement:
2889        return PageIncrement(
2890            page_size=model.page_size,
2891            config=config,
2892            start_from_page=model.start_from_page or 0,
2893            inject_on_first_request=model.inject_on_first_request or False,
2894            parameters=model.parameters or {},
2895        )
def create_parent_stream_config( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ParentStreamConfig, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.ParentStreamConfig:
2897    def create_parent_stream_config(
2898        self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2899    ) -> ParentStreamConfig:
2900        declarative_stream = self._create_component_from_model(
2901            model.stream, config=config, **kwargs
2902        )
2903        request_option = (
2904            self._create_component_from_model(model.request_option, config=config)
2905            if model.request_option
2906            else None
2907        )
2908
2909        if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer):
2910            raise ValueError(
2911                "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed."
2912            )
2913
2914        model_lazy_read_pointer: List[Union[InterpolatedString, str]] = (
2915            [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else []
2916        )
2917
2918        return ParentStreamConfig(
2919            parent_key=model.parent_key,
2920            request_option=request_option,
2921            stream=declarative_stream,
2922            partition_field=model.partition_field,
2923            config=config,
2924            incremental_dependency=model.incremental_dependency or False,
2925            parameters=model.parameters or {},
2926            extra_fields=model.extra_fields,
2927            lazy_read_pointer=model_lazy_read_pointer,
2928        )
2930    def create_properties_from_endpoint(
2931        self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any
2932    ) -> PropertiesFromEndpoint:
2933        retriever = self._create_component_from_model(
2934            model=model.retriever,
2935            config=config,
2936            name="dynamic_properties",
2937            primary_key=None,
2938            stream_slicer=None,
2939            transformations=[],
2940            use_cache=True,  # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different
2941        )
2942        return PropertiesFromEndpoint(
2943            property_field_path=model.property_field_path,
2944            retriever=retriever,
2945            config=config,
2946            parameters=model.parameters or {},
2947        )
2949    def create_property_chunking(
2950        self, model: PropertyChunkingModel, config: Config, **kwargs: Any
2951    ) -> PropertyChunking:
2952        record_merge_strategy = (
2953            self._create_component_from_model(
2954                model=model.record_merge_strategy, config=config, **kwargs
2955            )
2956            if model.record_merge_strategy
2957            else None
2958        )
2959
2960        property_limit_type: PropertyLimitType
2961        match model.property_limit_type:
2962            case PropertyLimitTypeModel.property_count:
2963                property_limit_type = PropertyLimitType.property_count
2964            case PropertyLimitTypeModel.characters:
2965                property_limit_type = PropertyLimitType.characters
2966            case _:
2967                raise ValueError(f"Invalid PropertyLimitType {property_limit_type}")
2968
2969        return PropertyChunking(
2970            property_limit_type=property_limit_type,
2971            property_limit=model.property_limit,
2972            record_merge_strategy=record_merge_strategy,
2973            config=config,
2974            parameters=model.parameters or {},
2975        )
2977    def create_query_properties(
2978        self, model: QueryPropertiesModel, config: Config, **kwargs: Any
2979    ) -> QueryProperties:
2980        if isinstance(model.property_list, list):
2981            property_list = model.property_list
2982        else:
2983            property_list = self._create_component_from_model(
2984                model=model.property_list, config=config, **kwargs
2985            )
2986
2987        property_chunking = (
2988            self._create_component_from_model(
2989                model=model.property_chunking, config=config, **kwargs
2990            )
2991            if model.property_chunking
2992            else None
2993        )
2994
2995        return QueryProperties(
2996            property_list=property_list,
2997            always_include_properties=model.always_include_properties,
2998            property_chunking=property_chunking,
2999            config=config,
3000            parameters=model.parameters or {},
3001        )
@staticmethod
def create_record_filter( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RecordFilter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.RecordFilter:
3003    @staticmethod
3004    def create_record_filter(
3005        model: RecordFilterModel, config: Config, **kwargs: Any
3006    ) -> RecordFilter:
3007        return RecordFilter(
3008            condition=model.condition or "", config=config, parameters=model.parameters or {}
3009        )
@staticmethod
def create_request_path( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RequestPath, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.request_path.RequestPath:
3011    @staticmethod
3012    def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath:
3013        return RequestPath(parameters={})
@staticmethod
def create_request_option( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RequestOption, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.RequestOption:
3015    @staticmethod
3016    def create_request_option(
3017        model: RequestOptionModel, config: Config, **kwargs: Any
3018    ) -> RequestOption:
3019        inject_into = RequestOptionType(model.inject_into.value)
3020        field_path: Optional[List[Union[InterpolatedString, str]]] = (
3021            [
3022                InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
3023                for segment in model.field_path
3024            ]
3025            if model.field_path
3026            else None
3027        )
3028        field_name = (
3029            InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
3030            if model.field_name
3031            else None
3032        )
3033        return RequestOption(
3034            field_name=field_name,
3035            field_path=field_path,
3036            inject_into=inject_into,
3037            parameters=kwargs.get("parameters", {}),
3038        )
def create_record_selector( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RecordSelector, config: Mapping[str, Any], *, name: str, transformations: Optional[List[airbyte_cdk.RecordTransformation]] = None, decoder: airbyte_cdk.Decoder | None = None, client_side_incremental_sync: Optional[Dict[str, Any]] = None, file_uploader: Optional[airbyte_cdk.sources.declarative.retrievers.file_uploader.DefaultFileUploader] = None, **kwargs: Any) -> airbyte_cdk.RecordSelector:
3040    def create_record_selector(
3041        self,
3042        model: RecordSelectorModel,
3043        config: Config,
3044        *,
3045        name: str,
3046        transformations: List[RecordTransformation] | None = None,
3047        decoder: Decoder | None = None,
3048        client_side_incremental_sync: Dict[str, Any] | None = None,
3049        file_uploader: Optional[DefaultFileUploader] = None,
3050        **kwargs: Any,
3051    ) -> RecordSelector:
3052        extractor = self._create_component_from_model(
3053            model=model.extractor, decoder=decoder, config=config
3054        )
3055        record_filter = (
3056            self._create_component_from_model(model.record_filter, config=config)
3057            if model.record_filter
3058            else None
3059        )
3060
3061        transform_before_filtering = (
3062            False if model.transform_before_filtering is None else model.transform_before_filtering
3063        )
3064        if client_side_incremental_sync:
3065            record_filter = ClientSideIncrementalRecordFilterDecorator(
3066                config=config,
3067                parameters=model.parameters,
3068                condition=model.record_filter.condition
3069                if (model.record_filter and hasattr(model.record_filter, "condition"))
3070                else None,
3071                **client_side_incremental_sync,
3072            )
3073            transform_before_filtering = (
3074                True
3075                if model.transform_before_filtering is None
3076                else model.transform_before_filtering
3077            )
3078
3079        if model.schema_normalization is None:
3080            # default to no schema normalization if not set
3081            model.schema_normalization = SchemaNormalizationModel.None_
3082
3083        schema_normalization = (
3084            TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
3085            if isinstance(model.schema_normalization, SchemaNormalizationModel)
3086            else self._create_component_from_model(model.schema_normalization, config=config)  # type: ignore[arg-type] # custom normalization model expected here
3087        )
3088
3089        return RecordSelector(
3090            extractor=extractor,
3091            name=name,
3092            config=config,
3093            record_filter=record_filter,
3094            transformations=transformations or [],
3095            file_uploader=file_uploader,
3096            schema_normalization=schema_normalization,
3097            parameters=model.parameters or {},
3098            transform_before_filtering=transform_before_filtering,
3099        )
@staticmethod
def create_remove_fields( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RemoveFields, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.RemoveFields:
3101    @staticmethod
3102    def create_remove_fields(
3103        model: RemoveFieldsModel, config: Config, **kwargs: Any
3104    ) -> RemoveFields:
3105        return RemoveFields(
3106            field_pointers=model.field_pointers, condition=model.condition or "", parameters={}
3107        )
def create_selective_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SelectiveAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DeclarativeAuthenticator:
3109    def create_selective_authenticator(
3110        self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any
3111    ) -> DeclarativeAuthenticator:
3112        authenticators = {
3113            name: self._create_component_from_model(model=auth, config=config)
3114            for name, auth in model.authenticators.items()
3115        }
3116        # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error
3117        return SelectiveAuthenticator(  # type: ignore[abstract]
3118            config=config,
3119            authenticators=authenticators,
3120            authenticator_selection_path=model.authenticator_selection_path,
3121            **kwargs,
3122        )
@staticmethod
def create_legacy_session_token_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.LegacySessionTokenAuthenticator, config: Mapping[str, Any], *, url_base: str, **kwargs: Any) -> airbyte_cdk.sources.declarative.auth.token.LegacySessionTokenAuthenticator:
3124    @staticmethod
3125    def create_legacy_session_token_authenticator(
3126        model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any
3127    ) -> LegacySessionTokenAuthenticator:
3128        return LegacySessionTokenAuthenticator(
3129            api_url=url_base,
3130            header=model.header,
3131            login_url=model.login_url,
3132            password=model.password or "",
3133            session_token=model.session_token or "",
3134            session_token_response_key=model.session_token_response_key or "",
3135            username=model.username or "",
3136            validate_session_url=model.validate_session_url,
3137            config=config,
3138            parameters=model.parameters or {},
3139        )
def create_simple_retriever( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SimpleRetriever, config: Mapping[str, Any], *, name: str, primary_key: Union[str, List[str], List[List[str]], NoneType], stream_slicer: Optional[airbyte_cdk.sources.declarative.stream_slicers.StreamSlicer], request_options_provider: Optional[airbyte_cdk.sources.declarative.requesters.request_options.RequestOptionsProvider] = None, stop_condition_on_cursor: bool = False, client_side_incremental_sync: Optional[Dict[str, Any]] = None, transformations: List[airbyte_cdk.RecordTransformation], file_uploader: Optional[airbyte_cdk.sources.declarative.retrievers.file_uploader.DefaultFileUploader] = None, incremental_sync: Union[airbyte_cdk.sources.declarative.models.declarative_component_schema.IncrementingCountCursor, airbyte_cdk.sources.declarative.models.declarative_component_schema.DatetimeBasedCursor, airbyte_cdk.sources.declarative.models.declarative_component_schema.CustomIncrementalSync, NoneType] = None, use_cache: Optional[bool] = None, log_formatter: Optional[Callable[[requests.models.Response], Any]] = None, **kwargs: Any) -> airbyte_cdk.SimpleRetriever:
3141    def create_simple_retriever(
3142        self,
3143        model: SimpleRetrieverModel,
3144        config: Config,
3145        *,
3146        name: str,
3147        primary_key: Optional[Union[str, List[str], List[List[str]]]],
3148        stream_slicer: Optional[StreamSlicer],
3149        request_options_provider: Optional[RequestOptionsProvider] = None,
3150        stop_condition_on_cursor: bool = False,
3151        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
3152        transformations: List[RecordTransformation],
3153        file_uploader: Optional[DefaultFileUploader] = None,
3154        incremental_sync: Optional[
3155            Union[
3156                IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
3157            ]
3158        ] = None,
3159        use_cache: Optional[bool] = None,
3160        log_formatter: Optional[Callable[[Response], Any]] = None,
3161        **kwargs: Any,
3162    ) -> SimpleRetriever:
3163        def _get_url() -> str:
3164            """
3165            Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever.
3166            This is needed because the URL is not set until the requester is created.
3167            """
3168
3169            _url: str = (
3170                model.requester.url
3171                if hasattr(model.requester, "url") and model.requester.url is not None
3172                else requester.get_url()
3173            )
3174            _url_base: str = (
3175                model.requester.url_base
3176                if hasattr(model.requester, "url_base") and model.requester.url_base is not None
3177                else requester.get_url_base()
3178            )
3179
3180            return _url or _url_base
3181
3182        decoder = (
3183            self._create_component_from_model(model=model.decoder, config=config)
3184            if model.decoder
3185            else JsonDecoder(parameters={})
3186        )
3187        record_selector = self._create_component_from_model(
3188            model=model.record_selector,
3189            name=name,
3190            config=config,
3191            decoder=decoder,
3192            transformations=transformations,
3193            client_side_incremental_sync=client_side_incremental_sync,
3194            file_uploader=file_uploader,
3195        )
3196
3197        query_properties: Optional[QueryProperties] = None
3198        query_properties_key: Optional[str] = None
3199        if self._query_properties_in_request_parameters(model.requester):
3200            # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple
3201            # places instead of default to request_parameters which isn't clearly documented
3202            if (
3203                hasattr(model.requester, "fetch_properties_from_endpoint")
3204                and model.requester.fetch_properties_from_endpoint
3205            ):
3206                raise ValueError(
3207                    f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters"
3208                )
3209
3210            query_properties_definitions = []
3211            for key, request_parameter in model.requester.request_parameters.items():  # type: ignore # request_parameters is already validated to be a Mapping using _query_properties_in_request_parameters()
3212                if isinstance(request_parameter, QueryPropertiesModel):
3213                    query_properties_key = key
3214                    query_properties_definitions.append(request_parameter)
3215
3216            if len(query_properties_definitions) > 1:
3217                raise ValueError(
3218                    f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages"
3219                )
3220
3221            if len(query_properties_definitions) == 1:
3222                query_properties = self._create_component_from_model(
3223                    model=query_properties_definitions[0], config=config
3224                )
3225        elif (
3226            hasattr(model.requester, "fetch_properties_from_endpoint")
3227            and model.requester.fetch_properties_from_endpoint
3228        ):
3229            # todo: Deprecate this condition once dependent connectors migrate to query_properties
3230            query_properties_definition = QueryPropertiesModel(
3231                type="QueryProperties",
3232                property_list=model.requester.fetch_properties_from_endpoint,
3233                always_include_properties=None,
3234                property_chunking=None,
3235            )  # type: ignore # $parameters has a default value
3236
3237            query_properties = self.create_query_properties(
3238                model=query_properties_definition,
3239                config=config,
3240            )
3241        elif hasattr(model.requester, "query_properties") and model.requester.query_properties:
3242            query_properties = self.create_query_properties(
3243                model=model.requester.query_properties,
3244                config=config,
3245            )
3246
3247        requester = self._create_component_from_model(
3248            model=model.requester,
3249            decoder=decoder,
3250            name=name,
3251            query_properties_key=query_properties_key,
3252            use_cache=use_cache,
3253            config=config,
3254        )
3255
3256        # Define cursor only if per partition or common incremental support is needed
3257        cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
3258
3259        if (
3260            not isinstance(stream_slicer, DatetimeBasedCursor)
3261            or type(stream_slicer) is not DatetimeBasedCursor
3262        ):
3263            # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
3264            # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
3265            # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
3266            # request_options_provider
3267            request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={})
3268        elif not request_options_provider:
3269            request_options_provider = DefaultRequestOptionsProvider(parameters={})
3270
3271        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3272        if self._should_limit_slices_fetched():
3273            stream_slicer = cast(
3274                StreamSlicer,
3275                StreamSlicerTestReadDecorator(
3276                    wrapped_slicer=stream_slicer,
3277                    maximum_number_of_slices=self._limit_slices_fetched or 5,
3278                ),
3279            )
3280
3281        cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None
3282        paginator = (
3283            self._create_component_from_model(
3284                model=model.paginator,
3285                config=config,
3286                url_base=_get_url(),
3287                extractor_model=model.record_selector.extractor,
3288                decoder=decoder,
3289                cursor_used_for_stop_condition=cursor_used_for_stop_condition,
3290            )
3291            if model.paginator
3292            else NoPagination(parameters={})
3293        )
3294
3295        ignore_stream_slicer_parameters_on_paginated_requests = (
3296            model.ignore_stream_slicer_parameters_on_paginated_requests or False
3297        )
3298
3299        if (
3300            model.partition_router
3301            and isinstance(model.partition_router, SubstreamPartitionRouterModel)
3302            and not bool(self._connector_state_manager.get_stream_state(name, None))
3303            and any(
3304                parent_stream_config.lazy_read_pointer
3305                for parent_stream_config in model.partition_router.parent_stream_configs
3306            )
3307        ):
3308            if incremental_sync:
3309                if incremental_sync.type != "DatetimeBasedCursor":
3310                    raise ValueError(
3311                        f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}."
3312                    )
3313
3314                elif incremental_sync.step or incremental_sync.cursor_granularity:
3315                    raise ValueError(
3316                        f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}."
3317                    )
3318
3319            if model.decoder and model.decoder.type != "JsonDecoder":
3320                raise ValueError(
3321                    f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}."
3322                )
3323
3324            return LazySimpleRetriever(
3325                name=name,
3326                paginator=paginator,
3327                primary_key=primary_key,
3328                requester=requester,
3329                record_selector=record_selector,
3330                stream_slicer=stream_slicer,
3331                request_option_provider=request_options_provider,
3332                cursor=cursor,
3333                config=config,
3334                ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3335                parameters=model.parameters or {},
3336            )
3337
3338        return SimpleRetriever(
3339            name=name,
3340            paginator=paginator,
3341            primary_key=primary_key,
3342            requester=requester,
3343            record_selector=record_selector,
3344            stream_slicer=stream_slicer,
3345            request_option_provider=request_options_provider,
3346            cursor=cursor,
3347            config=config,
3348            ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3349            additional_query_properties=query_properties,
3350            log_formatter=self._get_log_formatter(log_formatter, name),
3351            parameters=model.parameters or {},
3352        )
def create_state_delegating_stream( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.StateDelegatingStream, config: Mapping[str, Any], has_parent_state: Optional[bool] = None, **kwargs: Any) -> airbyte_cdk.DeclarativeStream:
3402    def create_state_delegating_stream(
3403        self,
3404        model: StateDelegatingStreamModel,
3405        config: Config,
3406        has_parent_state: Optional[bool] = None,
3407        **kwargs: Any,
3408    ) -> DeclarativeStream:
3409        if (
3410            model.full_refresh_stream.name != model.name
3411            or model.name != model.incremental_stream.name
3412        ):
3413            raise ValueError(
3414                f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
3415            )
3416
3417        stream_model = (
3418            model.incremental_stream
3419            if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
3420            else model.full_refresh_stream
3421        )
3422
3423        return self._create_component_from_model(stream_model, config=config, **kwargs)  # type: ignore[no-any-return]  # Will be created DeclarativeStream as stream_model is stream description
def create_async_retriever( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.AsyncRetriever, config: Mapping[str, Any], *, name: str, primary_key: Union[str, List[str], List[List[str]], NoneType], stream_slicer: Optional[airbyte_cdk.sources.declarative.stream_slicers.StreamSlicer], client_side_incremental_sync: Optional[Dict[str, Any]] = None, transformations: List[airbyte_cdk.RecordTransformation], **kwargs: Any) -> airbyte_cdk.sources.declarative.retrievers.AsyncRetriever:
3455    def create_async_retriever(
3456        self,
3457        model: AsyncRetrieverModel,
3458        config: Config,
3459        *,
3460        name: str,
3461        primary_key: Optional[
3462            Union[str, List[str], List[List[str]]]
3463        ],  # this seems to be needed to match create_simple_retriever
3464        stream_slicer: Optional[StreamSlicer],
3465        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
3466        transformations: List[RecordTransformation],
3467        **kwargs: Any,
3468    ) -> AsyncRetriever:
3469        def _get_download_retriever() -> SimpleRetriever:
3470            # We create a record selector for the download retriever
3471            # with no schema normalization and no transformations, neither record filter
3472            # as all this occurs in the record_selector of the AsyncRetriever
3473            record_selector = RecordSelector(
3474                extractor=download_extractor,
3475                name=name,
3476                record_filter=None,
3477                transformations=[],
3478                schema_normalization=TypeTransformer(TransformConfig.NoTransform),
3479                config=config,
3480                parameters={},
3481            )
3482            paginator = (
3483                self._create_component_from_model(
3484                    model=model.download_paginator,
3485                    decoder=decoder,
3486                    config=config,
3487                    url_base="",
3488                )
3489                if model.download_paginator
3490                else NoPagination(parameters={})
3491            )
3492
3493            return SimpleRetriever(
3494                requester=download_requester,
3495                record_selector=record_selector,
3496                primary_key=None,
3497                name=job_download_components_name,
3498                paginator=paginator,
3499                config=config,
3500                parameters={},
3501            )
3502
3503        def _get_job_timeout() -> datetime.timedelta:
3504            user_defined_timeout: Optional[int] = (
3505                int(
3506                    InterpolatedString.create(
3507                        str(model.polling_job_timeout),
3508                        parameters={},
3509                    ).eval(config)
3510                )
3511                if model.polling_job_timeout
3512                else None
3513            )
3514
3515            # check for user defined timeout during the test read or 15 minutes
3516            test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15)
3517            # default value for non-connector builder is 60 minutes.
3518            default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60)
3519
3520            return (
3521                test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout
3522            )
3523
3524        decoder = (
3525            self._create_component_from_model(model=model.decoder, config=config)
3526            if model.decoder
3527            else JsonDecoder(parameters={})
3528        )
3529        record_selector = self._create_component_from_model(
3530            model=model.record_selector,
3531            config=config,
3532            decoder=decoder,
3533            name=name,
3534            transformations=transformations,
3535            client_side_incremental_sync=client_side_incremental_sync,
3536        )
3537
3538        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
3539        if self._should_limit_slices_fetched():
3540            stream_slicer = cast(
3541                StreamSlicer,
3542                StreamSlicerTestReadDecorator(
3543                    wrapped_slicer=stream_slicer,
3544                    maximum_number_of_slices=self._limit_slices_fetched or 5,
3545                ),
3546            )
3547
3548        creation_requester = self._create_component_from_model(
3549            model=model.creation_requester,
3550            decoder=decoder,
3551            config=config,
3552            name=f"job creation - {name}",
3553        )
3554        polling_requester = self._create_component_from_model(
3555            model=model.polling_requester,
3556            decoder=decoder,
3557            config=config,
3558            name=f"job polling - {name}",
3559        )
3560        job_download_components_name = f"job download - {name}"
3561        download_decoder = (
3562            self._create_component_from_model(model=model.download_decoder, config=config)
3563            if model.download_decoder
3564            else JsonDecoder(parameters={})
3565        )
3566        download_extractor = (
3567            self._create_component_from_model(
3568                model=model.download_extractor,
3569                config=config,
3570                decoder=download_decoder,
3571                parameters=model.parameters,
3572            )
3573            if model.download_extractor
3574            else DpathExtractor(
3575                [],
3576                config=config,
3577                decoder=download_decoder,
3578                parameters=model.parameters or {},
3579            )
3580        )
3581        download_requester = self._create_component_from_model(
3582            model=model.download_requester,
3583            decoder=download_decoder,
3584            config=config,
3585            name=job_download_components_name,
3586        )
3587        download_retriever = _get_download_retriever()
3588        abort_requester = (
3589            self._create_component_from_model(
3590                model=model.abort_requester,
3591                decoder=decoder,
3592                config=config,
3593                name=f"job abort - {name}",
3594            )
3595            if model.abort_requester
3596            else None
3597        )
3598        delete_requester = (
3599            self._create_component_from_model(
3600                model=model.delete_requester,
3601                decoder=decoder,
3602                config=config,
3603                name=f"job delete - {name}",
3604            )
3605            if model.delete_requester
3606            else None
3607        )
3608        download_target_requester = (
3609            self._create_component_from_model(
3610                model=model.download_target_requester,
3611                decoder=decoder,
3612                config=config,
3613                name=f"job extract_url - {name}",
3614            )
3615            if model.download_target_requester
3616            else None
3617        )
3618        status_extractor = self._create_component_from_model(
3619            model=model.status_extractor, decoder=decoder, config=config, name=name
3620        )
3621        download_target_extractor = self._create_component_from_model(
3622            model=model.download_target_extractor,
3623            decoder=decoder,
3624            config=config,
3625            name=name,
3626        )
3627
3628        job_repository: AsyncJobRepository = AsyncHttpJobRepository(
3629            creation_requester=creation_requester,
3630            polling_requester=polling_requester,
3631            download_retriever=download_retriever,
3632            download_target_requester=download_target_requester,
3633            abort_requester=abort_requester,
3634            delete_requester=delete_requester,
3635            status_extractor=status_extractor,
3636            status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
3637            download_target_extractor=download_target_extractor,
3638            job_timeout=_get_job_timeout(),
3639        )
3640
3641        async_job_partition_router = AsyncJobPartitionRouter(
3642            job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
3643                job_repository,
3644                stream_slices,
3645                self._job_tracker,
3646                self._message_repository,
3647                # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
3648                has_bulk_parent=False,
3649                # set the `job_max_retry` to 1 for the `Connector Builder`` use-case.
3650                # `None` == default retry is set to 3 attempts, under the hood.
3651                job_max_retry=1 if self._emit_connector_builder_messages else None,
3652            ),
3653            stream_slicer=stream_slicer,
3654            config=config,
3655            parameters=model.parameters or {},
3656        )
3657
3658        return AsyncRetriever(
3659            record_selector=record_selector,
3660            stream_slicer=async_job_partition_router,
3661            config=config,
3662            parameters=model.parameters or {},
3663        )
def create_spec( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.Spec, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.spec.Spec:
3665    def create_spec(self, model: SpecModel, config: Config, **kwargs: Any) -> Spec:
3666        config_migrations = [
3667            self._create_component_from_model(migration, config)
3668            for migration in (
3669                model.config_normalization_rules.config_migrations
3670                if (
3671                    model.config_normalization_rules
3672                    and model.config_normalization_rules.config_migrations
3673                )
3674                else []
3675            )
3676        ]
3677        config_transformations = [
3678            self._create_component_from_model(transformation, config)
3679            for transformation in (
3680                model.config_normalization_rules.transformations
3681                if (
3682                    model.config_normalization_rules
3683                    and model.config_normalization_rules.transformations
3684                )
3685                else []
3686            )
3687        ]
3688        config_validations = [
3689            self._create_component_from_model(validation, config)
3690            for validation in (
3691                model.config_normalization_rules.validations
3692                if (
3693                    model.config_normalization_rules
3694                    and model.config_normalization_rules.validations
3695                )
3696                else []
3697            )
3698        ]
3699
3700        return Spec(
3701            connection_specification=model.connection_specification,
3702            documentation_url=model.documentation_url,
3703            advanced_auth=model.advanced_auth,
3704            parameters={},
3705            config_migrations=config_migrations,
3706            config_transformations=config_transformations,
3707            config_validations=config_validations,
3708        )
def create_substream_partition_router( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SubstreamPartitionRouter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.SubstreamPartitionRouter:
3710    def create_substream_partition_router(
3711        self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any
3712    ) -> SubstreamPartitionRouter:
3713        parent_stream_configs = []
3714        if model.parent_stream_configs:
3715            parent_stream_configs.extend(
3716                [
3717                    self._create_message_repository_substream_wrapper(
3718                        model=parent_stream_config, config=config, **kwargs
3719                    )
3720                    for parent_stream_config in model.parent_stream_configs
3721                ]
3722            )
3723
3724        return SubstreamPartitionRouter(
3725            parent_stream_configs=parent_stream_configs,
3726            parameters=model.parameters or {},
3727            config=config,
3728        )
3756    @staticmethod
3757    def create_wait_time_from_header(
3758        model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any
3759    ) -> WaitTimeFromHeaderBackoffStrategy:
3760        return WaitTimeFromHeaderBackoffStrategy(
3761            header=model.header,
3762            parameters=model.parameters or {},
3763            config=config,
3764            regex=model.regex,
3765            max_waiting_time_in_seconds=model.max_waiting_time_in_seconds
3766            if model.max_waiting_time_in_seconds is not None
3767            else None,
3768        )
3770    @staticmethod
3771    def create_wait_until_time_from_header(
3772        model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any
3773    ) -> WaitUntilTimeFromHeaderBackoffStrategy:
3774        return WaitUntilTimeFromHeaderBackoffStrategy(
3775            header=model.header,
3776            parameters=model.parameters or {},
3777            config=config,
3778            min_wait=model.min_wait,
3779            regex=model.regex,
3780        )
def get_message_repository(self) -> airbyte_cdk.MessageRepository:
3782    def get_message_repository(self) -> MessageRepository:
3783        return self._message_repository
@staticmethod
def create_components_mapping_definition( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ComponentMappingDefinition, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.resolvers.ComponentMappingDefinition:
3788    @staticmethod
3789    def create_components_mapping_definition(
3790        model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any
3791    ) -> ComponentMappingDefinition:
3792        interpolated_value = InterpolatedString.create(
3793            model.value, parameters=model.parameters or {}
3794        )
3795        field_path = [
3796            InterpolatedString.create(path, parameters=model.parameters or {})
3797            for path in model.field_path
3798        ]
3799        return ComponentMappingDefinition(
3800            field_path=field_path,  # type: ignore[arg-type] # field_path can be str and InterpolatedString
3801            value=interpolated_value,
3802            value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type),
3803            create_or_update=model.create_or_update,
3804            parameters=model.parameters or {},
3805        )
def create_http_components_resolver( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpComponentsResolver, config: Mapping[str, Any]) -> Any:
3807    def create_http_components_resolver(
3808        self, model: HttpComponentsResolverModel, config: Config
3809    ) -> Any:
3810        stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
3811        combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
3812
3813        retriever = self._create_component_from_model(
3814            model=model.retriever,
3815            config=config,
3816            name="",
3817            primary_key=None,
3818            stream_slicer=stream_slicer if stream_slicer else combined_slicers,
3819            transformations=[],
3820        )
3821
3822        components_mapping = [
3823            self._create_component_from_model(
3824                model=components_mapping_definition_model,
3825                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3826                    components_mapping_definition_model.value_type
3827                ),
3828                config=config,
3829            )
3830            for components_mapping_definition_model in model.components_mapping
3831        ]
3832
3833        return HttpComponentsResolver(
3834            retriever=retriever,
3835            config=config,
3836            components_mapping=components_mapping,
3837            parameters=model.parameters or {},
3838        )
@staticmethod
def create_stream_config( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.StreamConfig, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.resolvers.StreamConfig:
3840    @staticmethod
3841    def create_stream_config(
3842        model: StreamConfigModel, config: Config, **kwargs: Any
3843    ) -> StreamConfig:
3844        model_configs_pointer: List[Union[InterpolatedString, str]] = (
3845            [x for x in model.configs_pointer] if model.configs_pointer else []
3846        )
3847
3848        return StreamConfig(
3849            configs_pointer=model_configs_pointer,
3850            default_values=model.default_values,
3851            parameters=model.parameters or {},
3852        )
def create_config_components_resolver( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ConfigComponentsResolver, config: Mapping[str, Any]) -> Any:
3854    def create_config_components_resolver(
3855        self, model: ConfigComponentsResolverModel, config: Config
3856    ) -> Any:
3857        model_stream_configs = (
3858            model.stream_config if isinstance(model.stream_config, list) else [model.stream_config]
3859        )
3860
3861        stream_configs = [
3862            self._create_component_from_model(
3863                stream_config, config=config, parameters=model.parameters or {}
3864            )
3865            for stream_config in model_stream_configs
3866        ]
3867
3868        components_mapping = [
3869            self._create_component_from_model(
3870                model=components_mapping_definition_model,
3871                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3872                    components_mapping_definition_model.value_type
3873                ),
3874                config=config,
3875            )
3876            for components_mapping_definition_model in model.components_mapping
3877        ]
3878
3879        return ConfigComponentsResolver(
3880            stream_configs=stream_configs,
3881            config=config,
3882            components_mapping=components_mapping,
3883            parameters=model.parameters or {},
3884        )
3886    def create_parametrized_components_resolver(
3887        self, model: ParametrizedComponentsResolverModel, config: Config
3888    ) -> ParametrizedComponentsResolver:
3889        stream_parameters = StreamParametersDefinition(
3890            list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream
3891        )
3892        components_mapping = [
3893            self._create_component_from_model(
3894                model=components_mapping_definition_model,
3895                value_type=ModelToComponentFactory._json_schema_type_name_to_type(
3896                    components_mapping_definition_model.value_type
3897                ),
3898                config=config,
3899            )
3900            for components_mapping_definition_model in model.components_mapping
3901        ]
3902        return ParametrizedComponentsResolver(
3903            stream_parameters=stream_parameters,
3904            config=config,
3905            components_mapping=components_mapping,
3906            parameters=model.parameters or {},
3907        )
def create_http_api_budget( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HTTPAPIBudget, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.HttpAPIBudget:
3931    def create_http_api_budget(
3932        self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
3933    ) -> HttpAPIBudget:
3934        policies = [
3935            self._create_component_from_model(model=policy, config=config)
3936            for policy in model.policies
3937        ]
3938
3939        return HttpAPIBudget(
3940            policies=policies,
3941            ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
3942            ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
3943            status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
3944        )
def create_fixed_window_call_rate_policy( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.FixedWindowCallRatePolicy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.streams.call_rate.FixedWindowCallRatePolicy:
3946    def create_fixed_window_call_rate_policy(
3947        self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
3948    ) -> FixedWindowCallRatePolicy:
3949        matchers = [
3950            self._create_component_from_model(model=matcher, config=config)
3951            for matcher in model.matchers
3952        ]
3953
3954        # Set the initial reset timestamp to 10 days from now.
3955        # This value will be updated by the first request.
3956        return FixedWindowCallRatePolicy(
3957            next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
3958            period=parse_duration(model.period),
3959            call_limit=model.call_limit,
3960            matchers=matchers,
3961        )
def create_file_uploader( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.FileUploader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.retrievers.file_uploader.FileUploader:
3963    def create_file_uploader(
3964        self, model: FileUploaderModel, config: Config, **kwargs: Any
3965    ) -> FileUploader:
3966        name = "File Uploader"
3967        requester = self._create_component_from_model(
3968            model=model.requester,
3969            config=config,
3970            name=name,
3971            **kwargs,
3972        )
3973        download_target_extractor = self._create_component_from_model(
3974            model=model.download_target_extractor,
3975            config=config,
3976            name=name,
3977            **kwargs,
3978        )
3979        emit_connector_builder_messages = self._emit_connector_builder_messages
3980        file_uploader = DefaultFileUploader(
3981            requester=requester,
3982            download_target_extractor=download_target_extractor,
3983            config=config,
3984            file_writer=NoopFileWriter()
3985            if emit_connector_builder_messages
3986            else LocalFileSystemFileWriter(),
3987            parameters=model.parameters or {},
3988            filename_extractor=model.filename_extractor if model.filename_extractor else None,
3989        )
3990
3991        return (
3992            ConnectorBuilderFileUploader(file_uploader)
3993            if emit_connector_builder_messages
3994            else file_uploader
3995        )
def create_moving_window_call_rate_policy( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.MovingWindowCallRatePolicy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.MovingWindowCallRatePolicy:
3997    def create_moving_window_call_rate_policy(
3998        self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
3999    ) -> MovingWindowCallRatePolicy:
4000        rates = [
4001            self._create_component_from_model(model=rate, config=config) for rate in model.rates
4002        ]
4003        matchers = [
4004            self._create_component_from_model(model=matcher, config=config)
4005            for matcher in model.matchers
4006        ]
4007        return MovingWindowCallRatePolicy(
4008            rates=rates,
4009            matchers=matchers,
4010        )
def create_unlimited_call_rate_policy( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.UnlimitedCallRatePolicy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.streams.call_rate.UnlimitedCallRatePolicy:
4012    def create_unlimited_call_rate_policy(
4013        self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
4014    ) -> UnlimitedCallRatePolicy:
4015        matchers = [
4016            self._create_component_from_model(model=matcher, config=config)
4017            for matcher in model.matchers
4018        ]
4019
4020        return UnlimitedCallRatePolicy(
4021            matchers=matchers,
4022        )
def create_rate( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.Rate, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Rate:
4024    def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
4025        interpolated_limit = InterpolatedString.create(str(model.limit), parameters={})
4026        return Rate(
4027            limit=int(interpolated_limit.eval(config=config)),
4028            interval=parse_duration(model.interval),
4029        )
def create_http_request_matcher( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpRequestRegexMatcher, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.streams.call_rate.HttpRequestRegexMatcher:
4031    def create_http_request_matcher(
4032        self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
4033    ) -> HttpRequestRegexMatcher:
4034        return HttpRequestRegexMatcher(
4035            method=model.method,
4036            url_base=model.url_base,
4037            url_path_pattern=model.url_path_pattern,
4038            params=model.params,
4039            headers=model.headers,
4040        )
def set_api_budget( self, component_definition: Mapping[str, Any], config: Mapping[str, Any]) -> None:
4042    def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
4043        self._api_budget = self.create_component(
4044            model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
4045        )
def create_grouping_partition_router( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.GroupingPartitionRouter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.partition_routers.GroupingPartitionRouter:
4047    def create_grouping_partition_router(
4048        self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any
4049    ) -> GroupingPartitionRouter:
4050        underlying_router = self._create_component_from_model(
4051            model=model.underlying_partition_router, config=config
4052        )
4053        if model.group_size < 1:
4054            raise ValueError(f"Group size must be greater than 0, got {model.group_size}")
4055
4056        # Request options in underlying partition routers are not supported for GroupingPartitionRouter
4057        # because they are specific to individual partitions and cannot be aggregated or handled
4058        # when grouping, potentially leading to incorrect API calls. Any request customization
4059        # should be managed at the stream level through the requester's configuration.
4060        if isinstance(underlying_router, SubstreamPartitionRouter):
4061            if any(
4062                parent_config.request_option
4063                for parent_config in underlying_router.parent_stream_configs
4064            ):
4065                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
4066
4067        if isinstance(underlying_router, ListPartitionRouter):
4068            if underlying_router.request_option:
4069                raise ValueError("Request options are not supported for GroupingPartitionRouter.")
4070
4071        return GroupingPartitionRouter(
4072            group_size=model.group_size,
4073            underlying_partition_router=underlying_router,
4074            deduplicate=model.deduplicate if model.deduplicate is not None else True,
4075            config=config,
4076        )