airbyte_cdk.sources.declarative.parsers.model_to_component_factory
1# 2# Copyright (c) 2025 Airbyte, Inc., all rights reserved. 3# 4 5from __future__ import annotations 6 7import datetime 8import importlib 9import inspect 10import re 11from functools import partial 12from typing import ( 13 Any, 14 Callable, 15 Dict, 16 List, 17 Mapping, 18 MutableMapping, 19 Optional, 20 Type, 21 Union, 22 get_args, 23 get_origin, 24 get_type_hints, 25) 26 27from isodate import parse_duration 28from pydantic.v1 import BaseModel 29 30from airbyte_cdk.models import FailureType, Level 31from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager 32from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator 33from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker 34from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository 35from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus 36from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator 37from airbyte_cdk.sources.declarative.auth.declarative_authenticator import ( 38 DeclarativeAuthenticator, 39 NoAuth, 40) 41from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm 42from airbyte_cdk.sources.declarative.auth.oauth import ( 43 DeclarativeSingleUseRefreshTokenOauth2Authenticator, 44) 45from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator 46from airbyte_cdk.sources.declarative.auth.token import ( 47 ApiKeyAuthenticator, 48 BasicHttpAuthenticator, 49 BearerAuthenticator, 50 LegacySessionTokenAuthenticator, 51) 52from airbyte_cdk.sources.declarative.auth.token_provider import ( 53 InterpolatedStringTokenProvider, 54 SessionTokenProvider, 55 TokenProvider, 56) 57from airbyte_cdk.sources.declarative.checks import ( 58 CheckDynamicStream, 59 CheckStream, 60 DynamicStreamCheckConfig, 61) 62from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel 63from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime 64from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream 65from airbyte_cdk.sources.declarative.decoders import ( 66 Decoder, 67 IterableDecoder, 68 JsonDecoder, 69 PaginationDecoderDecorator, 70 XmlDecoder, 71 ZipfileDecoder, 72) 73from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import ( 74 CompositeRawDecoder, 75 CsvParser, 76 GzipParser, 77 JsonLineParser, 78 JsonParser, 79 Parser, 80) 81from airbyte_cdk.sources.declarative.extractors import ( 82 DpathExtractor, 83 RecordFilter, 84 RecordSelector, 85 ResponseToFileExtractor, 86) 87from airbyte_cdk.sources.declarative.extractors.record_filter import ( 88 ClientSideIncrementalRecordFilterDecorator, 89) 90from airbyte_cdk.sources.declarative.incremental import ( 91 ChildPartitionResumableFullRefreshCursor, 92 ConcurrentCursorFactory, 93 ConcurrentPerPartitionCursor, 94 CursorFactory, 95 DatetimeBasedCursor, 96 DeclarativeCursor, 97 GlobalSubstreamCursor, 98 PerPartitionCursor, 99 PerPartitionWithGlobalCursor, 100 ResumableFullRefreshCursor, 101) 102from airbyte_cdk.sources.declarative.interpolation import InterpolatedString 103from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping 104from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import ( 105 LegacyToPerPartitionStateMigration, 106) 107from airbyte_cdk.sources.declarative.models import ( 108 CustomStateMigration, 109 GzipDecoder, 110) 111from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 112 AddedFieldDefinition as AddedFieldDefinitionModel, 113) 114from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 115 AddFields as AddFieldsModel, 116) 117from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 118 ApiKeyAuthenticator as ApiKeyAuthenticatorModel, 119) 120from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 121 AsyncJobStatusMap as AsyncJobStatusMapModel, 122) 123from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 124 AsyncRetriever as AsyncRetrieverModel, 125) 126from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 127 BasicHttpAuthenticator as BasicHttpAuthenticatorModel, 128) 129from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 130 BearerAuthenticator as BearerAuthenticatorModel, 131) 132from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 133 CheckDynamicStream as CheckDynamicStreamModel, 134) 135from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 136 CheckStream as CheckStreamModel, 137) 138from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 139 ComplexFieldType as ComplexFieldTypeModel, 140) 141from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 142 ComponentMappingDefinition as ComponentMappingDefinitionModel, 143) 144from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 145 CompositeErrorHandler as CompositeErrorHandlerModel, 146) 147from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 148 ConcurrencyLevel as ConcurrencyLevelModel, 149) 150from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 151 ConfigComponentsResolver as ConfigComponentsResolverModel, 152) 153from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 154 ConstantBackoffStrategy as ConstantBackoffStrategyModel, 155) 156from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 157 CsvDecoder as CsvDecoderModel, 158) 159from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 160 CursorPagination as CursorPaginationModel, 161) 162from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 163 CustomAuthenticator as CustomAuthenticatorModel, 164) 165from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 166 CustomBackoffStrategy as CustomBackoffStrategyModel, 167) 168from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 169 CustomDecoder as CustomDecoderModel, 170) 171from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 172 CustomErrorHandler as CustomErrorHandlerModel, 173) 174from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 175 CustomIncrementalSync as CustomIncrementalSyncModel, 176) 177from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 178 CustomPaginationStrategy as CustomPaginationStrategyModel, 179) 180from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 181 CustomPartitionRouter as CustomPartitionRouterModel, 182) 183from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 184 CustomRecordExtractor as CustomRecordExtractorModel, 185) 186from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 187 CustomRecordFilter as CustomRecordFilterModel, 188) 189from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 190 CustomRequester as CustomRequesterModel, 191) 192from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 193 CustomRetriever as CustomRetrieverModel, 194) 195from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 196 CustomSchemaLoader as CustomSchemaLoader, 197) 198from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 199 CustomSchemaNormalization as CustomSchemaNormalizationModel, 200) 201from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 202 CustomTransformation as CustomTransformationModel, 203) 204from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 205 DatetimeBasedCursor as DatetimeBasedCursorModel, 206) 207from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 208 DeclarativeStream as DeclarativeStreamModel, 209) 210from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 211 DefaultErrorHandler as DefaultErrorHandlerModel, 212) 213from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 214 DefaultPaginator as DefaultPaginatorModel, 215) 216from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 217 DpathExtractor as DpathExtractorModel, 218) 219from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 220 DpathFlattenFields as DpathFlattenFieldsModel, 221) 222from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 223 DynamicSchemaLoader as DynamicSchemaLoaderModel, 224) 225from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 226 DynamicStreamCheckConfig as DynamicStreamCheckConfigModel, 227) 228from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 229 ExponentialBackoffStrategy as ExponentialBackoffStrategyModel, 230) 231from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 232 FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel, 233) 234from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 235 FlattenFields as FlattenFieldsModel, 236) 237from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 238 GroupByKeyMergeStrategy as GroupByKeyMergeStrategyModel, 239) 240from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 241 GroupingPartitionRouter as GroupingPartitionRouterModel, 242) 243from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 244 GzipDecoder as GzipDecoderModel, 245) 246from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 247 HTTPAPIBudget as HTTPAPIBudgetModel, 248) 249from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 250 HttpComponentsResolver as HttpComponentsResolverModel, 251) 252from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 253 HttpRequester as HttpRequesterModel, 254) 255from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 256 HttpRequestRegexMatcher as HttpRequestRegexMatcherModel, 257) 258from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 259 HttpResponseFilter as HttpResponseFilterModel, 260) 261from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 262 IncrementingCountCursor as IncrementingCountCursorModel, 263) 264from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 265 InlineSchemaLoader as InlineSchemaLoaderModel, 266) 267from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 268 IterableDecoder as IterableDecoderModel, 269) 270from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 271 JsonDecoder as JsonDecoderModel, 272) 273from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 274 JsonFileSchemaLoader as JsonFileSchemaLoaderModel, 275) 276from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 277 JsonlDecoder as JsonlDecoderModel, 278) 279from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 280 JwtAuthenticator as JwtAuthenticatorModel, 281) 282from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 283 JwtHeaders as JwtHeadersModel, 284) 285from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 286 JwtPayload as JwtPayloadModel, 287) 288from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 289 KeysReplace as KeysReplaceModel, 290) 291from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 292 KeysToLower as KeysToLowerModel, 293) 294from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 295 KeysToSnakeCase as KeysToSnakeCaseModel, 296) 297from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 298 LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel, 299) 300from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 301 LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel, 302) 303from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 304 ListPartitionRouter as ListPartitionRouterModel, 305) 306from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 307 MinMaxDatetime as MinMaxDatetimeModel, 308) 309from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 310 MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel, 311) 312from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 313 NoAuth as NoAuthModel, 314) 315from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 316 NoPagination as NoPaginationModel, 317) 318from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 319 OAuthAuthenticator as OAuthAuthenticatorModel, 320) 321from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 322 OffsetIncrement as OffsetIncrementModel, 323) 324from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 325 PageIncrement as PageIncrementModel, 326) 327from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 328 ParentStreamConfig as ParentStreamConfigModel, 329) 330from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 331 PropertiesFromEndpoint as PropertiesFromEndpointModel, 332) 333from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 334 PropertyChunking as PropertyChunkingModel, 335) 336from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 337 PropertyLimitType as PropertyLimitTypeModel, 338) 339from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 340 QueryProperties as QueryPropertiesModel, 341) 342from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 343 Rate as RateModel, 344) 345from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 346 RecordFilter as RecordFilterModel, 347) 348from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 349 RecordSelector as RecordSelectorModel, 350) 351from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 352 RemoveFields as RemoveFieldsModel, 353) 354from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 355 RequestOption as RequestOptionModel, 356) 357from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 358 RequestPath as RequestPathModel, 359) 360from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 361 ResponseToFileExtractor as ResponseToFileExtractorModel, 362) 363from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 364 SchemaNormalization as SchemaNormalizationModel, 365) 366from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 367 SchemaTypeIdentifier as SchemaTypeIdentifierModel, 368) 369from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 370 SelectiveAuthenticator as SelectiveAuthenticatorModel, 371) 372from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 373 SessionTokenAuthenticator as SessionTokenAuthenticatorModel, 374) 375from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 376 SimpleRetriever as SimpleRetrieverModel, 377) 378from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel 379from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 380 StateDelegatingStream as StateDelegatingStreamModel, 381) 382from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 383 StreamConfig as StreamConfigModel, 384) 385from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 386 SubstreamPartitionRouter as SubstreamPartitionRouterModel, 387) 388from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 389 TypesMap as TypesMapModel, 390) 391from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 392 UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel, 393) 394from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType 395from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 396 WaitTimeFromHeader as WaitTimeFromHeaderModel, 397) 398from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 399 WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel, 400) 401from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 402 XmlDecoder as XmlDecoderModel, 403) 404from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 405 ZipfileDecoder as ZipfileDecoderModel, 406) 407from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import ( 408 COMPONENTS_MODULE_NAME, 409 SDM_COMPONENTS_MODULE_NAME, 410) 411from airbyte_cdk.sources.declarative.partition_routers import ( 412 CartesianProductStreamSlicer, 413 GroupingPartitionRouter, 414 ListPartitionRouter, 415 PartitionRouter, 416 SinglePartitionRouter, 417 SubstreamPartitionRouter, 418) 419from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import ( 420 AsyncJobPartitionRouter, 421) 422from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ( 423 ParentStreamConfig, 424) 425from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption 426from airbyte_cdk.sources.declarative.requesters.error_handlers import ( 427 CompositeErrorHandler, 428 DefaultErrorHandler, 429 HttpResponseFilter, 430) 431from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import ( 432 ConstantBackoffStrategy, 433 ExponentialBackoffStrategy, 434 WaitTimeFromHeaderBackoffStrategy, 435 WaitUntilTimeFromHeaderBackoffStrategy, 436) 437from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository 438from airbyte_cdk.sources.declarative.requesters.paginators import ( 439 DefaultPaginator, 440 NoPagination, 441 PaginatorTestReadDecorator, 442) 443from airbyte_cdk.sources.declarative.requesters.paginators.strategies import ( 444 CursorPaginationStrategy, 445 CursorStopCondition, 446 OffsetIncrement, 447 PageIncrement, 448 StopConditionPaginationStrategyDecorator, 449) 450from airbyte_cdk.sources.declarative.requesters.query_properties import ( 451 PropertiesFromEndpoint, 452 PropertyChunking, 453 QueryProperties, 454) 455from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import ( 456 PropertyLimitType, 457) 458from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import ( 459 GroupByKey, 460) 461from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType 462from airbyte_cdk.sources.declarative.requesters.request_options import ( 463 DatetimeBasedRequestOptionsProvider, 464 DefaultRequestOptionsProvider, 465 InterpolatedRequestOptionsProvider, 466 RequestOptionsProvider, 467) 468from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath 469from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod 470from airbyte_cdk.sources.declarative.resolvers import ( 471 ComponentMappingDefinition, 472 ConfigComponentsResolver, 473 HttpComponentsResolver, 474 StreamConfig, 475) 476from airbyte_cdk.sources.declarative.retrievers import ( 477 AsyncRetriever, 478 LazySimpleRetriever, 479 SimpleRetriever, 480 SimpleRetrieverTestReadDecorator, 481) 482from airbyte_cdk.sources.declarative.schema import ( 483 ComplexFieldType, 484 DefaultSchemaLoader, 485 DynamicSchemaLoader, 486 InlineSchemaLoader, 487 JsonFileSchemaLoader, 488 SchemaTypeIdentifier, 489 TypesMap, 490) 491from airbyte_cdk.sources.declarative.spec import Spec 492from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer 493from airbyte_cdk.sources.declarative.transformations import ( 494 AddFields, 495 RecordTransformation, 496 RemoveFields, 497) 498from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition 499from airbyte_cdk.sources.declarative.transformations.dpath_flatten_fields import ( 500 DpathFlattenFields, 501 KeyTransformation, 502) 503from airbyte_cdk.sources.declarative.transformations.flatten_fields import ( 504 FlattenFields, 505) 506from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import ( 507 KeysReplaceTransformation, 508) 509from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import ( 510 KeysToLowerTransformation, 511) 512from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import ( 513 KeysToSnakeCaseTransformation, 514) 515from airbyte_cdk.sources.message import ( 516 InMemoryMessageRepository, 517 LogAppenderMessageRepositoryDecorator, 518 MessageRepository, 519 NoopMessageRepository, 520) 521from airbyte_cdk.sources.streams.call_rate import ( 522 APIBudget, 523 FixedWindowCallRatePolicy, 524 HttpAPIBudget, 525 HttpRequestRegexMatcher, 526 MovingWindowCallRatePolicy, 527 Rate, 528 UnlimitedCallRatePolicy, 529) 530from airbyte_cdk.sources.streams.concurrent.clamping import ( 531 ClampingEndProvider, 532 ClampingStrategy, 533 DayClampingStrategy, 534 MonthClampingStrategy, 535 NoClamping, 536 WeekClampingStrategy, 537 Weekday, 538) 539from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField 540from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import ( 541 CustomFormatConcurrentStreamStateConverter, 542 DateTimeStreamStateConverter, 543) 544from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import ( 545 IncrementingCountStreamStateConverter, 546) 547from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction 548from airbyte_cdk.sources.types import Config 549from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer 550 551ComponentDefinition = Mapping[str, Any] 552 553SCHEMA_TRANSFORMER_TYPE_MAPPING = { 554 SchemaNormalizationModel.None_: TransformConfig.NoTransform, 555 SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization, 556} 557 558 559class ModelToComponentFactory: 560 EPOCH_DATETIME_FORMAT = "%s" 561 562 def __init__( 563 self, 564 limit_pages_fetched_per_slice: Optional[int] = None, 565 limit_slices_fetched: Optional[int] = None, 566 emit_connector_builder_messages: bool = False, 567 disable_retries: bool = False, 568 disable_cache: bool = False, 569 disable_resumable_full_refresh: bool = False, 570 message_repository: Optional[MessageRepository] = None, 571 connector_state_manager: Optional[ConnectorStateManager] = None, 572 max_concurrent_async_job_count: Optional[int] = None, 573 ): 574 self._init_mappings() 575 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 576 self._limit_slices_fetched = limit_slices_fetched 577 self._emit_connector_builder_messages = emit_connector_builder_messages 578 self._disable_retries = disable_retries 579 self._disable_cache = disable_cache 580 self._disable_resumable_full_refresh = disable_resumable_full_refresh 581 self._message_repository = message_repository or InMemoryMessageRepository( 582 self._evaluate_log_level(emit_connector_builder_messages) 583 ) 584 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 585 self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None 586 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 587 588 def _init_mappings(self) -> None: 589 self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = { 590 AddedFieldDefinitionModel: self.create_added_field_definition, 591 AddFieldsModel: self.create_add_fields, 592 ApiKeyAuthenticatorModel: self.create_api_key_authenticator, 593 BasicHttpAuthenticatorModel: self.create_basic_http_authenticator, 594 BearerAuthenticatorModel: self.create_bearer_authenticator, 595 CheckStreamModel: self.create_check_stream, 596 DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config, 597 CheckDynamicStreamModel: self.create_check_dynamic_stream, 598 CompositeErrorHandlerModel: self.create_composite_error_handler, 599 ConcurrencyLevelModel: self.create_concurrency_level, 600 ConstantBackoffStrategyModel: self.create_constant_backoff_strategy, 601 CsvDecoderModel: self.create_csv_decoder, 602 CursorPaginationModel: self.create_cursor_pagination, 603 CustomAuthenticatorModel: self.create_custom_component, 604 CustomBackoffStrategyModel: self.create_custom_component, 605 CustomDecoderModel: self.create_custom_component, 606 CustomErrorHandlerModel: self.create_custom_component, 607 CustomIncrementalSyncModel: self.create_custom_component, 608 CustomRecordExtractorModel: self.create_custom_component, 609 CustomRecordFilterModel: self.create_custom_component, 610 CustomRequesterModel: self.create_custom_component, 611 CustomRetrieverModel: self.create_custom_component, 612 CustomSchemaLoader: self.create_custom_component, 613 CustomSchemaNormalizationModel: self.create_custom_component, 614 CustomStateMigration: self.create_custom_component, 615 CustomPaginationStrategyModel: self.create_custom_component, 616 CustomPartitionRouterModel: self.create_custom_component, 617 CustomTransformationModel: self.create_custom_component, 618 DatetimeBasedCursorModel: self.create_datetime_based_cursor, 619 DeclarativeStreamModel: self.create_declarative_stream, 620 DefaultErrorHandlerModel: self.create_default_error_handler, 621 DefaultPaginatorModel: self.create_default_paginator, 622 DpathExtractorModel: self.create_dpath_extractor, 623 ResponseToFileExtractorModel: self.create_response_to_file_extractor, 624 ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy, 625 SessionTokenAuthenticatorModel: self.create_session_token_authenticator, 626 GroupByKeyMergeStrategyModel: self.create_group_by_key, 627 HttpRequesterModel: self.create_http_requester, 628 HttpResponseFilterModel: self.create_http_response_filter, 629 InlineSchemaLoaderModel: self.create_inline_schema_loader, 630 JsonDecoderModel: self.create_json_decoder, 631 JsonlDecoderModel: self.create_jsonl_decoder, 632 GzipDecoderModel: self.create_gzip_decoder, 633 KeysToLowerModel: self.create_keys_to_lower_transformation, 634 KeysToSnakeCaseModel: self.create_keys_to_snake_transformation, 635 KeysReplaceModel: self.create_keys_replace_transformation, 636 FlattenFieldsModel: self.create_flatten_fields, 637 DpathFlattenFieldsModel: self.create_dpath_flatten_fields, 638 IterableDecoderModel: self.create_iterable_decoder, 639 IncrementingCountCursorModel: self.create_incrementing_count_cursor, 640 XmlDecoderModel: self.create_xml_decoder, 641 JsonFileSchemaLoaderModel: self.create_json_file_schema_loader, 642 DynamicSchemaLoaderModel: self.create_dynamic_schema_loader, 643 SchemaTypeIdentifierModel: self.create_schema_type_identifier, 644 TypesMapModel: self.create_types_map, 645 ComplexFieldTypeModel: self.create_complex_field_type, 646 JwtAuthenticatorModel: self.create_jwt_authenticator, 647 LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration, 648 ListPartitionRouterModel: self.create_list_partition_router, 649 MinMaxDatetimeModel: self.create_min_max_datetime, 650 NoAuthModel: self.create_no_auth, 651 NoPaginationModel: self.create_no_pagination, 652 OAuthAuthenticatorModel: self.create_oauth_authenticator, 653 OffsetIncrementModel: self.create_offset_increment, 654 PageIncrementModel: self.create_page_increment, 655 ParentStreamConfigModel: self.create_parent_stream_config, 656 PropertiesFromEndpointModel: self.create_properties_from_endpoint, 657 PropertyChunkingModel: self.create_property_chunking, 658 QueryPropertiesModel: self.create_query_properties, 659 RecordFilterModel: self.create_record_filter, 660 RecordSelectorModel: self.create_record_selector, 661 RemoveFieldsModel: self.create_remove_fields, 662 RequestPathModel: self.create_request_path, 663 RequestOptionModel: self.create_request_option, 664 LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator, 665 SelectiveAuthenticatorModel: self.create_selective_authenticator, 666 SimpleRetrieverModel: self.create_simple_retriever, 667 StateDelegatingStreamModel: self.create_state_delegating_stream, 668 SpecModel: self.create_spec, 669 SubstreamPartitionRouterModel: self.create_substream_partition_router, 670 WaitTimeFromHeaderModel: self.create_wait_time_from_header, 671 WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header, 672 AsyncRetrieverModel: self.create_async_retriever, 673 HttpComponentsResolverModel: self.create_http_components_resolver, 674 ConfigComponentsResolverModel: self.create_config_components_resolver, 675 StreamConfigModel: self.create_stream_config, 676 ComponentMappingDefinitionModel: self.create_components_mapping_definition, 677 ZipfileDecoderModel: self.create_zipfile_decoder, 678 HTTPAPIBudgetModel: self.create_http_api_budget, 679 FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy, 680 MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy, 681 UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy, 682 RateModel: self.create_rate, 683 HttpRequestRegexMatcherModel: self.create_http_request_matcher, 684 GroupingPartitionRouterModel: self.create_grouping_partition_router, 685 } 686 687 # Needed for the case where we need to perform a second parse on the fields of a custom component 688 self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR} 689 690 def create_component( 691 self, 692 model_type: Type[BaseModel], 693 component_definition: ComponentDefinition, 694 config: Config, 695 **kwargs: Any, 696 ) -> Any: 697 """ 698 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 699 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 700 creating declarative components from that model. 701 702 :param model_type: The type of declarative component that is being initialized 703 :param component_definition: The mapping that represents a declarative component 704 :param config: The connector config that is provided by the customer 705 :return: The declarative component to be used at runtime 706 """ 707 708 component_type = component_definition.get("type") 709 if component_definition.get("type") != model_type.__name__: 710 raise ValueError( 711 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 712 ) 713 714 declarative_component_model = model_type.parse_obj(component_definition) 715 716 if not isinstance(declarative_component_model, model_type): 717 raise ValueError( 718 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 719 ) 720 721 return self._create_component_from_model( 722 model=declarative_component_model, config=config, **kwargs 723 ) 724 725 def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: 726 if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: 727 raise ValueError( 728 f"{model.__class__} with attributes {model} is not a valid component type" 729 ) 730 component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) 731 if not component_constructor: 732 raise ValueError(f"Could not find constructor for {model.__class__}") 733 return component_constructor(model=model, config=config, **kwargs) 734 735 @staticmethod 736 def create_added_field_definition( 737 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 738 ) -> AddedFieldDefinition: 739 interpolated_value = InterpolatedString.create( 740 model.value, parameters=model.parameters or {} 741 ) 742 return AddedFieldDefinition( 743 path=model.path, 744 value=interpolated_value, 745 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 746 parameters=model.parameters or {}, 747 ) 748 749 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 750 added_field_definitions = [ 751 self._create_component_from_model( 752 model=added_field_definition_model, 753 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 754 added_field_definition_model.value_type 755 ), 756 config=config, 757 ) 758 for added_field_definition_model in model.fields 759 ] 760 return AddFields( 761 fields=added_field_definitions, 762 condition=model.condition or "", 763 parameters=model.parameters or {}, 764 ) 765 766 def create_keys_to_lower_transformation( 767 self, model: KeysToLowerModel, config: Config, **kwargs: Any 768 ) -> KeysToLowerTransformation: 769 return KeysToLowerTransformation() 770 771 def create_keys_to_snake_transformation( 772 self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any 773 ) -> KeysToSnakeCaseTransformation: 774 return KeysToSnakeCaseTransformation() 775 776 def create_keys_replace_transformation( 777 self, model: KeysReplaceModel, config: Config, **kwargs: Any 778 ) -> KeysReplaceTransformation: 779 return KeysReplaceTransformation( 780 old=model.old, new=model.new, parameters=model.parameters or {} 781 ) 782 783 def create_flatten_fields( 784 self, model: FlattenFieldsModel, config: Config, **kwargs: Any 785 ) -> FlattenFields: 786 return FlattenFields( 787 flatten_lists=model.flatten_lists if model.flatten_lists is not None else True 788 ) 789 790 def create_dpath_flatten_fields( 791 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 792 ) -> DpathFlattenFields: 793 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 794 key_transformation = ( 795 KeyTransformation( 796 config=config, 797 prefix=model.key_transformation.prefix, 798 suffix=model.key_transformation.suffix, 799 parameters=model.parameters or {}, 800 ) 801 if model.key_transformation is not None 802 else None 803 ) 804 return DpathFlattenFields( 805 config=config, 806 field_path=model_field_path, 807 delete_origin_value=model.delete_origin_value 808 if model.delete_origin_value is not None 809 else False, 810 replace_record=model.replace_record if model.replace_record is not None else False, 811 key_transformation=key_transformation, 812 parameters=model.parameters or {}, 813 ) 814 815 @staticmethod 816 def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: 817 if not value_type: 818 return None 819 names_to_types = { 820 ValueType.string: str, 821 ValueType.number: float, 822 ValueType.integer: int, 823 ValueType.boolean: bool, 824 } 825 return names_to_types[value_type] 826 827 def create_api_key_authenticator( 828 self, 829 model: ApiKeyAuthenticatorModel, 830 config: Config, 831 token_provider: Optional[TokenProvider] = None, 832 **kwargs: Any, 833 ) -> ApiKeyAuthenticator: 834 if model.inject_into is None and model.header is None: 835 raise ValueError( 836 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 837 ) 838 839 if model.inject_into is not None and model.header is not None: 840 raise ValueError( 841 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 842 ) 843 844 if token_provider is not None and model.api_token != "": 845 raise ValueError( 846 "If token_provider is set, api_token is ignored and has to be set to empty string." 847 ) 848 849 request_option = ( 850 self._create_component_from_model( 851 model.inject_into, config, parameters=model.parameters or {} 852 ) 853 if model.inject_into 854 else RequestOption( 855 inject_into=RequestOptionType.header, 856 field_name=model.header or "", 857 parameters=model.parameters or {}, 858 ) 859 ) 860 861 return ApiKeyAuthenticator( 862 token_provider=( 863 token_provider 864 if token_provider is not None 865 else InterpolatedStringTokenProvider( 866 api_token=model.api_token or "", 867 config=config, 868 parameters=model.parameters or {}, 869 ) 870 ), 871 request_option=request_option, 872 config=config, 873 parameters=model.parameters or {}, 874 ) 875 876 def create_legacy_to_per_partition_state_migration( 877 self, 878 model: LegacyToPerPartitionStateMigrationModel, 879 config: Mapping[str, Any], 880 declarative_stream: DeclarativeStreamModel, 881 ) -> LegacyToPerPartitionStateMigration: 882 retriever = declarative_stream.retriever 883 if not isinstance(retriever, SimpleRetrieverModel): 884 raise ValueError( 885 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}" 886 ) 887 partition_router = retriever.partition_router 888 if not isinstance( 889 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 890 ): 891 raise ValueError( 892 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 893 ) 894 if not hasattr(partition_router, "parent_stream_configs"): 895 raise ValueError( 896 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 897 ) 898 899 if not hasattr(declarative_stream, "incremental_sync"): 900 raise ValueError( 901 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 902 ) 903 904 return LegacyToPerPartitionStateMigration( 905 partition_router, # type: ignore # was already checked above 906 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 907 config, 908 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 909 ) 910 911 def create_session_token_authenticator( 912 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 913 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 914 decoder = ( 915 self._create_component_from_model(model=model.decoder, config=config) 916 if model.decoder 917 else JsonDecoder(parameters={}) 918 ) 919 login_requester = self._create_component_from_model( 920 model=model.login_requester, 921 config=config, 922 name=f"{name}_login_requester", 923 decoder=decoder, 924 ) 925 token_provider = SessionTokenProvider( 926 login_requester=login_requester, 927 session_token_path=model.session_token_path, 928 expiration_duration=parse_duration(model.expiration_duration) 929 if model.expiration_duration 930 else None, 931 parameters=model.parameters or {}, 932 message_repository=self._message_repository, 933 decoder=decoder, 934 ) 935 if model.request_authentication.type == "Bearer": 936 return ModelToComponentFactory.create_bearer_authenticator( 937 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 938 config, 939 token_provider=token_provider, 940 ) 941 else: 942 return self.create_api_key_authenticator( 943 ApiKeyAuthenticatorModel( 944 type="ApiKeyAuthenticator", 945 api_token="", 946 inject_into=model.request_authentication.inject_into, 947 ), # type: ignore # $parameters and headers default to None 948 config=config, 949 token_provider=token_provider, 950 ) 951 952 @staticmethod 953 def create_basic_http_authenticator( 954 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 955 ) -> BasicHttpAuthenticator: 956 return BasicHttpAuthenticator( 957 password=model.password or "", 958 username=model.username, 959 config=config, 960 parameters=model.parameters or {}, 961 ) 962 963 @staticmethod 964 def create_bearer_authenticator( 965 model: BearerAuthenticatorModel, 966 config: Config, 967 token_provider: Optional[TokenProvider] = None, 968 **kwargs: Any, 969 ) -> BearerAuthenticator: 970 if token_provider is not None and model.api_token != "": 971 raise ValueError( 972 "If token_provider is set, api_token is ignored and has to be set to empty string." 973 ) 974 return BearerAuthenticator( 975 token_provider=( 976 token_provider 977 if token_provider is not None 978 else InterpolatedStringTokenProvider( 979 api_token=model.api_token or "", 980 config=config, 981 parameters=model.parameters or {}, 982 ) 983 ), 984 config=config, 985 parameters=model.parameters or {}, 986 ) 987 988 @staticmethod 989 def create_dynamic_stream_check_config( 990 model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any 991 ) -> DynamicStreamCheckConfig: 992 return DynamicStreamCheckConfig( 993 dynamic_stream_name=model.dynamic_stream_name, 994 stream_count=model.stream_count or 0, 995 ) 996 997 def create_check_stream( 998 self, model: CheckStreamModel, config: Config, **kwargs: Any 999 ) -> CheckStream: 1000 if model.dynamic_streams_check_configs is None and model.stream_names is None: 1001 raise ValueError( 1002 "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream" 1003 ) 1004 1005 dynamic_streams_check_configs = ( 1006 [ 1007 self._create_component_from_model(model=dynamic_stream_check_config, config=config) 1008 for dynamic_stream_check_config in model.dynamic_streams_check_configs 1009 ] 1010 if model.dynamic_streams_check_configs 1011 else [] 1012 ) 1013 1014 return CheckStream( 1015 stream_names=model.stream_names or [], 1016 dynamic_streams_check_configs=dynamic_streams_check_configs, 1017 parameters={}, 1018 ) 1019 1020 @staticmethod 1021 def create_check_dynamic_stream( 1022 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 1023 ) -> CheckDynamicStream: 1024 assert model.use_check_availability is not None # for mypy 1025 1026 use_check_availability = model.use_check_availability 1027 1028 return CheckDynamicStream( 1029 stream_count=model.stream_count, 1030 use_check_availability=use_check_availability, 1031 parameters={}, 1032 ) 1033 1034 def create_composite_error_handler( 1035 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 1036 ) -> CompositeErrorHandler: 1037 error_handlers = [ 1038 self._create_component_from_model(model=error_handler_model, config=config) 1039 for error_handler_model in model.error_handlers 1040 ] 1041 return CompositeErrorHandler( 1042 error_handlers=error_handlers, parameters=model.parameters or {} 1043 ) 1044 1045 @staticmethod 1046 def create_concurrency_level( 1047 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 1048 ) -> ConcurrencyLevel: 1049 return ConcurrencyLevel( 1050 default_concurrency=model.default_concurrency, 1051 max_concurrency=model.max_concurrency, 1052 config=config, 1053 parameters={}, 1054 ) 1055 1056 @staticmethod 1057 def apply_stream_state_migrations( 1058 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 1059 ) -> MutableMapping[str, Any]: 1060 if stream_state_migrations: 1061 for state_migration in stream_state_migrations: 1062 if state_migration.should_migrate(stream_state): 1063 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 1064 stream_state = dict(state_migration.migrate(stream_state)) 1065 return stream_state 1066 1067 def create_concurrent_cursor_from_datetime_based_cursor( 1068 self, 1069 model_type: Type[BaseModel], 1070 component_definition: ComponentDefinition, 1071 stream_name: str, 1072 stream_namespace: Optional[str], 1073 config: Config, 1074 message_repository: Optional[MessageRepository] = None, 1075 runtime_lookback_window: Optional[datetime.timedelta] = None, 1076 stream_state_migrations: Optional[List[Any]] = None, 1077 **kwargs: Any, 1078 ) -> ConcurrentCursor: 1079 # Per-partition incremental streams can dynamically create child cursors which will pass their current 1080 # state via the stream_state keyword argument. Incremental syncs without parent streams use the 1081 # incoming state and connector_state_manager that is initialized when the component factory is created 1082 stream_state = ( 1083 self._connector_state_manager.get_stream_state(stream_name, stream_namespace) 1084 if "stream_state" not in kwargs 1085 else kwargs["stream_state"] 1086 ) 1087 stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state) 1088 1089 component_type = component_definition.get("type") 1090 if component_definition.get("type") != model_type.__name__: 1091 raise ValueError( 1092 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1093 ) 1094 1095 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1096 1097 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1098 raise ValueError( 1099 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1100 ) 1101 1102 interpolated_cursor_field = InterpolatedString.create( 1103 datetime_based_cursor_model.cursor_field, 1104 parameters=datetime_based_cursor_model.parameters or {}, 1105 ) 1106 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1107 1108 interpolated_partition_field_start = InterpolatedString.create( 1109 datetime_based_cursor_model.partition_field_start or "start_time", 1110 parameters=datetime_based_cursor_model.parameters or {}, 1111 ) 1112 interpolated_partition_field_end = InterpolatedString.create( 1113 datetime_based_cursor_model.partition_field_end or "end_time", 1114 parameters=datetime_based_cursor_model.parameters or {}, 1115 ) 1116 1117 slice_boundary_fields = ( 1118 interpolated_partition_field_start.eval(config=config), 1119 interpolated_partition_field_end.eval(config=config), 1120 ) 1121 1122 datetime_format = datetime_based_cursor_model.datetime_format 1123 1124 cursor_granularity = ( 1125 parse_duration(datetime_based_cursor_model.cursor_granularity) 1126 if datetime_based_cursor_model.cursor_granularity 1127 else None 1128 ) 1129 1130 lookback_window = None 1131 interpolated_lookback_window = ( 1132 InterpolatedString.create( 1133 datetime_based_cursor_model.lookback_window, 1134 parameters=datetime_based_cursor_model.parameters or {}, 1135 ) 1136 if datetime_based_cursor_model.lookback_window 1137 else None 1138 ) 1139 if interpolated_lookback_window: 1140 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1141 if evaluated_lookback_window: 1142 lookback_window = parse_duration(evaluated_lookback_window) 1143 1144 connector_state_converter: DateTimeStreamStateConverter 1145 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1146 datetime_format=datetime_format, 1147 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1148 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1149 cursor_granularity=cursor_granularity, 1150 ) 1151 1152 # Adjusts the stream state by applying the runtime lookback window. 1153 # This is used to ensure correct state handling in case of failed partitions. 1154 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1155 if runtime_lookback_window and stream_state_value: 1156 new_stream_state = ( 1157 connector_state_converter.parse_timestamp(stream_state_value) 1158 - runtime_lookback_window 1159 ) 1160 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1161 new_stream_state 1162 ) 1163 1164 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1165 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1166 start_date_runtime_value = self.create_min_max_datetime( 1167 model=datetime_based_cursor_model.start_datetime, config=config 1168 ) 1169 else: 1170 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1171 1172 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1173 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1174 end_date_runtime_value = self.create_min_max_datetime( 1175 model=datetime_based_cursor_model.end_datetime, config=config 1176 ) 1177 else: 1178 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1179 1180 interpolated_start_date = MinMaxDatetime.create( 1181 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1182 parameters=datetime_based_cursor_model.parameters, 1183 ) 1184 interpolated_end_date = ( 1185 None 1186 if not end_date_runtime_value 1187 else MinMaxDatetime.create( 1188 end_date_runtime_value, datetime_based_cursor_model.parameters 1189 ) 1190 ) 1191 1192 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1193 if not interpolated_start_date.datetime_format: 1194 interpolated_start_date.datetime_format = datetime_format 1195 if interpolated_end_date and not interpolated_end_date.datetime_format: 1196 interpolated_end_date.datetime_format = datetime_format 1197 1198 start_date = interpolated_start_date.get_datetime(config=config) 1199 end_date_provider = ( 1200 partial(interpolated_end_date.get_datetime, config) 1201 if interpolated_end_date 1202 else connector_state_converter.get_end_provider() 1203 ) 1204 1205 if ( 1206 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1207 ) or ( 1208 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1209 ): 1210 raise ValueError( 1211 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1212 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1213 ) 1214 1215 # When step is not defined, default to a step size from the starting date to the present moment 1216 step_length = datetime.timedelta.max 1217 interpolated_step = ( 1218 InterpolatedString.create( 1219 datetime_based_cursor_model.step, 1220 parameters=datetime_based_cursor_model.parameters or {}, 1221 ) 1222 if datetime_based_cursor_model.step 1223 else None 1224 ) 1225 if interpolated_step: 1226 evaluated_step = interpolated_step.eval(config) 1227 if evaluated_step: 1228 step_length = parse_duration(evaluated_step) 1229 1230 clamping_strategy: ClampingStrategy = NoClamping() 1231 if datetime_based_cursor_model.clamping: 1232 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1233 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1234 # object which we want to keep agnostic of being low-code 1235 target = InterpolatedString( 1236 string=datetime_based_cursor_model.clamping.target, 1237 parameters=datetime_based_cursor_model.parameters or {}, 1238 ) 1239 evaluated_target = target.eval(config=config) 1240 match evaluated_target: 1241 case "DAY": 1242 clamping_strategy = DayClampingStrategy() 1243 end_date_provider = ClampingEndProvider( 1244 DayClampingStrategy(is_ceiling=False), 1245 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1246 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1247 ) 1248 case "WEEK": 1249 if ( 1250 not datetime_based_cursor_model.clamping.target_details 1251 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1252 ): 1253 raise ValueError( 1254 "Given WEEK clamping, weekday needs to be provided as target_details" 1255 ) 1256 weekday = self._assemble_weekday( 1257 datetime_based_cursor_model.clamping.target_details["weekday"] 1258 ) 1259 clamping_strategy = WeekClampingStrategy(weekday) 1260 end_date_provider = ClampingEndProvider( 1261 WeekClampingStrategy(weekday, is_ceiling=False), 1262 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1263 granularity=cursor_granularity or datetime.timedelta(days=1), 1264 ) 1265 case "MONTH": 1266 clamping_strategy = MonthClampingStrategy() 1267 end_date_provider = ClampingEndProvider( 1268 MonthClampingStrategy(is_ceiling=False), 1269 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1270 granularity=cursor_granularity or datetime.timedelta(days=1), 1271 ) 1272 case _: 1273 raise ValueError( 1274 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1275 ) 1276 1277 return ConcurrentCursor( 1278 stream_name=stream_name, 1279 stream_namespace=stream_namespace, 1280 stream_state=stream_state, 1281 message_repository=message_repository or self._message_repository, 1282 connector_state_manager=self._connector_state_manager, 1283 connector_state_converter=connector_state_converter, 1284 cursor_field=cursor_field, 1285 slice_boundary_fields=slice_boundary_fields, 1286 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1287 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1288 lookback_window=lookback_window, 1289 slice_range=step_length, 1290 cursor_granularity=cursor_granularity, 1291 clamping_strategy=clamping_strategy, 1292 ) 1293 1294 def create_concurrent_cursor_from_incrementing_count_cursor( 1295 self, 1296 model_type: Type[BaseModel], 1297 component_definition: ComponentDefinition, 1298 stream_name: str, 1299 stream_namespace: Optional[str], 1300 config: Config, 1301 message_repository: Optional[MessageRepository] = None, 1302 **kwargs: Any, 1303 ) -> ConcurrentCursor: 1304 # Per-partition incremental streams can dynamically create child cursors which will pass their current 1305 # state via the stream_state keyword argument. Incremental syncs without parent streams use the 1306 # incoming state and connector_state_manager that is initialized when the component factory is created 1307 stream_state = ( 1308 self._connector_state_manager.get_stream_state(stream_name, stream_namespace) 1309 if "stream_state" not in kwargs 1310 else kwargs["stream_state"] 1311 ) 1312 1313 component_type = component_definition.get("type") 1314 if component_definition.get("type") != model_type.__name__: 1315 raise ValueError( 1316 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1317 ) 1318 1319 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1320 1321 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1322 raise ValueError( 1323 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1324 ) 1325 1326 interpolated_start_value = ( 1327 InterpolatedString.create( 1328 incrementing_count_cursor_model.start_value, # type: ignore 1329 parameters=incrementing_count_cursor_model.parameters or {}, 1330 ) 1331 if incrementing_count_cursor_model.start_value 1332 else 0 1333 ) 1334 1335 interpolated_cursor_field = InterpolatedString.create( 1336 incrementing_count_cursor_model.cursor_field, 1337 parameters=incrementing_count_cursor_model.parameters or {}, 1338 ) 1339 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1340 1341 connector_state_converter = IncrementingCountStreamStateConverter( 1342 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1343 ) 1344 1345 return ConcurrentCursor( 1346 stream_name=stream_name, 1347 stream_namespace=stream_namespace, 1348 stream_state=stream_state, 1349 message_repository=message_repository or self._message_repository, 1350 connector_state_manager=self._connector_state_manager, 1351 connector_state_converter=connector_state_converter, 1352 cursor_field=cursor_field, 1353 slice_boundary_fields=None, 1354 start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1355 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1356 ) 1357 1358 def _assemble_weekday(self, weekday: str) -> Weekday: 1359 match weekday: 1360 case "MONDAY": 1361 return Weekday.MONDAY 1362 case "TUESDAY": 1363 return Weekday.TUESDAY 1364 case "WEDNESDAY": 1365 return Weekday.WEDNESDAY 1366 case "THURSDAY": 1367 return Weekday.THURSDAY 1368 case "FRIDAY": 1369 return Weekday.FRIDAY 1370 case "SATURDAY": 1371 return Weekday.SATURDAY 1372 case "SUNDAY": 1373 return Weekday.SUNDAY 1374 case _: 1375 raise ValueError(f"Unknown weekday {weekday}") 1376 1377 def create_concurrent_cursor_from_perpartition_cursor( 1378 self, 1379 state_manager: ConnectorStateManager, 1380 model_type: Type[BaseModel], 1381 component_definition: ComponentDefinition, 1382 stream_name: str, 1383 stream_namespace: Optional[str], 1384 config: Config, 1385 stream_state: MutableMapping[str, Any], 1386 partition_router: PartitionRouter, 1387 stream_state_migrations: Optional[List[Any]] = None, 1388 **kwargs: Any, 1389 ) -> ConcurrentPerPartitionCursor: 1390 component_type = component_definition.get("type") 1391 if component_definition.get("type") != model_type.__name__: 1392 raise ValueError( 1393 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1394 ) 1395 1396 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1397 1398 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1399 raise ValueError( 1400 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1401 ) 1402 1403 interpolated_cursor_field = InterpolatedString.create( 1404 datetime_based_cursor_model.cursor_field, 1405 parameters=datetime_based_cursor_model.parameters or {}, 1406 ) 1407 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1408 1409 datetime_format = datetime_based_cursor_model.datetime_format 1410 1411 cursor_granularity = ( 1412 parse_duration(datetime_based_cursor_model.cursor_granularity) 1413 if datetime_based_cursor_model.cursor_granularity 1414 else None 1415 ) 1416 1417 connector_state_converter: DateTimeStreamStateConverter 1418 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1419 datetime_format=datetime_format, 1420 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1421 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1422 cursor_granularity=cursor_granularity, 1423 ) 1424 1425 # Create the cursor factory 1426 cursor_factory = ConcurrentCursorFactory( 1427 partial( 1428 self.create_concurrent_cursor_from_datetime_based_cursor, 1429 state_manager=state_manager, 1430 model_type=model_type, 1431 component_definition=component_definition, 1432 stream_name=stream_name, 1433 stream_namespace=stream_namespace, 1434 config=config, 1435 message_repository=NoopMessageRepository(), 1436 stream_state_migrations=stream_state_migrations, 1437 ) 1438 ) 1439 stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state) 1440 1441 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state 1442 use_global_cursor = isinstance( 1443 partition_router, GroupingPartitionRouter 1444 ) or component_definition.get("global_substream_cursor", False) 1445 1446 # Return the concurrent cursor and state converter 1447 return ConcurrentPerPartitionCursor( 1448 cursor_factory=cursor_factory, 1449 partition_router=partition_router, 1450 stream_name=stream_name, 1451 stream_namespace=stream_namespace, 1452 stream_state=stream_state, 1453 message_repository=self._message_repository, # type: ignore 1454 connector_state_manager=state_manager, 1455 connector_state_converter=connector_state_converter, 1456 cursor_field=cursor_field, 1457 use_global_cursor=use_global_cursor, 1458 ) 1459 1460 @staticmethod 1461 def create_constant_backoff_strategy( 1462 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1463 ) -> ConstantBackoffStrategy: 1464 return ConstantBackoffStrategy( 1465 backoff_time_in_seconds=model.backoff_time_in_seconds, 1466 config=config, 1467 parameters=model.parameters or {}, 1468 ) 1469 1470 def create_cursor_pagination( 1471 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1472 ) -> CursorPaginationStrategy: 1473 if isinstance(decoder, PaginationDecoderDecorator): 1474 inner_decoder = decoder.decoder 1475 else: 1476 inner_decoder = decoder 1477 decoder = PaginationDecoderDecorator(decoder=decoder) 1478 1479 if self._is_supported_decoder_for_pagination(inner_decoder): 1480 decoder_to_use = decoder 1481 else: 1482 raise ValueError( 1483 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1484 ) 1485 1486 return CursorPaginationStrategy( 1487 cursor_value=model.cursor_value, 1488 decoder=decoder_to_use, 1489 page_size=model.page_size, 1490 stop_condition=model.stop_condition, 1491 config=config, 1492 parameters=model.parameters or {}, 1493 ) 1494 1495 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1496 """ 1497 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1498 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1499 :param model: The Pydantic model of the custom component being created 1500 :param config: The custom defined connector config 1501 :return: The declarative component built from the Pydantic model to be used at runtime 1502 """ 1503 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1504 component_fields = get_type_hints(custom_component_class) 1505 model_args = model.dict() 1506 model_args["config"] = config 1507 1508 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1509 # we defer to these arguments over the component's definition 1510 for key, arg in kwargs.items(): 1511 model_args[key] = arg 1512 1513 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1514 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1515 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1516 for model_field, model_value in model_args.items(): 1517 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1518 if ( 1519 isinstance(model_value, dict) 1520 and "type" not in model_value 1521 and model_field in component_fields 1522 ): 1523 derived_type = self._derive_component_type_from_type_hints( 1524 component_fields.get(model_field) 1525 ) 1526 if derived_type: 1527 model_value["type"] = derived_type 1528 1529 if self._is_component(model_value): 1530 model_args[model_field] = self._create_nested_component( 1531 model, model_field, model_value, config 1532 ) 1533 elif isinstance(model_value, list): 1534 vals = [] 1535 for v in model_value: 1536 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1537 derived_type = self._derive_component_type_from_type_hints( 1538 component_fields.get(model_field) 1539 ) 1540 if derived_type: 1541 v["type"] = derived_type 1542 if self._is_component(v): 1543 vals.append(self._create_nested_component(model, model_field, v, config)) 1544 else: 1545 vals.append(v) 1546 model_args[model_field] = vals 1547 1548 kwargs = { 1549 class_field: model_args[class_field] 1550 for class_field in component_fields.keys() 1551 if class_field in model_args 1552 } 1553 return custom_component_class(**kwargs) 1554 1555 @staticmethod 1556 def _get_class_from_fully_qualified_class_name( 1557 full_qualified_class_name: str, 1558 ) -> Any: 1559 """Get a class from its fully qualified name. 1560 1561 If a custom components module is needed, we assume it is already registered - probably 1562 as `source_declarative_manifest.components` or `components`. 1563 1564 Args: 1565 full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). 1566 1567 Returns: 1568 Any: The class object. 1569 1570 Raises: 1571 ValueError: If the class cannot be loaded. 1572 """ 1573 split = full_qualified_class_name.split(".") 1574 module_name_full = ".".join(split[:-1]) 1575 class_name = split[-1] 1576 1577 try: 1578 module_ref = importlib.import_module(module_name_full) 1579 except ModuleNotFoundError as e: 1580 if split[0] == "source_declarative_manifest": 1581 # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append 1582 try: 1583 import os 1584 1585 module_name_with_source_declarative_manifest = ".".join(split[1:-1]) 1586 module_ref = importlib.import_module( 1587 module_name_with_source_declarative_manifest 1588 ) 1589 except ModuleNotFoundError: 1590 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1591 else: 1592 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1593 1594 try: 1595 return getattr(module_ref, class_name) 1596 except AttributeError as e: 1597 raise ValueError( 1598 f"Could not load class `{class_name}` from module `{module_name_full}`.", 1599 ) from e 1600 1601 @staticmethod 1602 def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: 1603 interface = field_type 1604 while True: 1605 origin = get_origin(interface) 1606 if origin: 1607 # Unnest types until we reach the raw type 1608 # List[T] -> T 1609 # Optional[List[T]] -> T 1610 args = get_args(interface) 1611 interface = args[0] 1612 else: 1613 break 1614 if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface): 1615 return interface.__name__ 1616 return None 1617 1618 @staticmethod 1619 def is_builtin_type(cls: Optional[Type[Any]]) -> bool: 1620 if not cls: 1621 return False 1622 return cls.__module__ == "builtins" 1623 1624 @staticmethod 1625 def _extract_missing_parameters(error: TypeError) -> List[str]: 1626 parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error)) 1627 if parameter_search: 1628 return re.findall(r"\'(.+?)\'", parameter_search.group(1)) 1629 else: 1630 return [] 1631 1632 def _create_nested_component( 1633 self, model: Any, model_field: str, model_value: Any, config: Config 1634 ) -> Any: 1635 type_name = model_value.get("type", None) 1636 if not type_name: 1637 # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent 1638 return model_value 1639 1640 model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None) 1641 if model_type: 1642 parsed_model = model_type.parse_obj(model_value) 1643 try: 1644 # To improve usability of the language, certain fields are shared between components. This can come in the form of 1645 # a parent component passing some of its fields to a child component or the parent extracting fields from other child 1646 # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base 1647 # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created 1648 # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that 1649 # are needed by a component and could not be shared. 1650 model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__) 1651 constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs 1652 model_parameters = model_value.get("$parameters", {}) 1653 matching_parameters = { 1654 kwarg: model_parameters[kwarg] 1655 for kwarg in constructor_kwargs 1656 if kwarg in model_parameters 1657 } 1658 return self._create_component_from_model( 1659 model=parsed_model, config=config, **matching_parameters 1660 ) 1661 except TypeError as error: 1662 missing_parameters = self._extract_missing_parameters(error) 1663 if missing_parameters: 1664 raise ValueError( 1665 f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " 1666 + ", ".join( 1667 ( 1668 f"{type_name}.$parameters.{parameter}" 1669 for parameter in missing_parameters 1670 ) 1671 ) 1672 ) 1673 raise TypeError( 1674 f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" 1675 ) 1676 else: 1677 raise ValueError( 1678 f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" 1679 ) 1680 1681 @staticmethod 1682 def _is_component(model_value: Any) -> bool: 1683 return isinstance(model_value, dict) and model_value.get("type") is not None 1684 1685 def create_datetime_based_cursor( 1686 self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any 1687 ) -> DatetimeBasedCursor: 1688 start_datetime: Union[str, MinMaxDatetime] = ( 1689 model.start_datetime 1690 if isinstance(model.start_datetime, str) 1691 else self.create_min_max_datetime(model.start_datetime, config) 1692 ) 1693 end_datetime: Union[str, MinMaxDatetime, None] = None 1694 if model.is_data_feed and model.end_datetime: 1695 raise ValueError("Data feed does not support end_datetime") 1696 if model.is_data_feed and model.is_client_side_incremental: 1697 raise ValueError( 1698 "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them." 1699 ) 1700 if model.end_datetime: 1701 end_datetime = ( 1702 model.end_datetime 1703 if isinstance(model.end_datetime, str) 1704 else self.create_min_max_datetime(model.end_datetime, config) 1705 ) 1706 1707 end_time_option = ( 1708 self._create_component_from_model( 1709 model.end_time_option, config, parameters=model.parameters or {} 1710 ) 1711 if model.end_time_option 1712 else None 1713 ) 1714 start_time_option = ( 1715 self._create_component_from_model( 1716 model.start_time_option, config, parameters=model.parameters or {} 1717 ) 1718 if model.start_time_option 1719 else None 1720 ) 1721 1722 return DatetimeBasedCursor( 1723 cursor_field=model.cursor_field, 1724 cursor_datetime_formats=model.cursor_datetime_formats 1725 if model.cursor_datetime_formats 1726 else [], 1727 cursor_granularity=model.cursor_granularity, 1728 datetime_format=model.datetime_format, 1729 end_datetime=end_datetime, 1730 start_datetime=start_datetime, 1731 step=model.step, 1732 end_time_option=end_time_option, 1733 lookback_window=model.lookback_window, 1734 start_time_option=start_time_option, 1735 partition_field_end=model.partition_field_end, 1736 partition_field_start=model.partition_field_start, 1737 message_repository=self._message_repository, 1738 is_compare_strictly=model.is_compare_strictly, 1739 config=config, 1740 parameters=model.parameters or {}, 1741 ) 1742 1743 def create_declarative_stream( 1744 self, model: DeclarativeStreamModel, config: Config, **kwargs: Any 1745 ) -> DeclarativeStream: 1746 # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field 1747 # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the 1748 # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in 1749 # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one. 1750 combined_slicers = self._merge_stream_slicers(model=model, config=config) 1751 1752 primary_key = model.primary_key.__root__ if model.primary_key else None 1753 stop_condition_on_cursor = ( 1754 model.incremental_sync 1755 and hasattr(model.incremental_sync, "is_data_feed") 1756 and model.incremental_sync.is_data_feed 1757 ) 1758 client_side_incremental_sync = None 1759 if ( 1760 model.incremental_sync 1761 and hasattr(model.incremental_sync, "is_client_side_incremental") 1762 and model.incremental_sync.is_client_side_incremental 1763 ): 1764 supported_slicers = ( 1765 DatetimeBasedCursor, 1766 GlobalSubstreamCursor, 1767 PerPartitionWithGlobalCursor, 1768 ) 1769 if combined_slicers and not isinstance(combined_slicers, supported_slicers): 1770 raise ValueError( 1771 "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead" 1772 ) 1773 cursor = ( 1774 combined_slicers 1775 if isinstance( 1776 combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor) 1777 ) 1778 else self._create_component_from_model(model=model.incremental_sync, config=config) 1779 ) 1780 1781 client_side_incremental_sync = {"cursor": cursor} 1782 1783 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1784 cursor_model = model.incremental_sync 1785 1786 end_time_option = ( 1787 self._create_component_from_model( 1788 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 1789 ) 1790 if cursor_model.end_time_option 1791 else None 1792 ) 1793 start_time_option = ( 1794 self._create_component_from_model( 1795 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 1796 ) 1797 if cursor_model.start_time_option 1798 else None 1799 ) 1800 1801 request_options_provider = DatetimeBasedRequestOptionsProvider( 1802 start_time_option=start_time_option, 1803 end_time_option=end_time_option, 1804 partition_field_start=cursor_model.partition_field_end, 1805 partition_field_end=cursor_model.partition_field_end, 1806 config=config, 1807 parameters=model.parameters or {}, 1808 ) 1809 elif model.incremental_sync and isinstance( 1810 model.incremental_sync, IncrementingCountCursorModel 1811 ): 1812 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 1813 1814 start_time_option = ( 1815 self._create_component_from_model( 1816 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 1817 config, 1818 parameters=cursor_model.parameters or {}, 1819 ) 1820 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 1821 else None 1822 ) 1823 1824 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 1825 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 1826 partition_field_start = "start" 1827 1828 request_options_provider = DatetimeBasedRequestOptionsProvider( 1829 start_time_option=start_time_option, 1830 partition_field_start=partition_field_start, 1831 config=config, 1832 parameters=model.parameters or {}, 1833 ) 1834 else: 1835 request_options_provider = None 1836 1837 transformations = [] 1838 if model.transformations: 1839 for transformation_model in model.transformations: 1840 transformations.append( 1841 self._create_component_from_model(model=transformation_model, config=config) 1842 ) 1843 1844 retriever = self._create_component_from_model( 1845 model=model.retriever, 1846 config=config, 1847 name=model.name, 1848 primary_key=primary_key, 1849 stream_slicer=combined_slicers, 1850 request_options_provider=request_options_provider, 1851 stop_condition_on_cursor=stop_condition_on_cursor, 1852 client_side_incremental_sync=client_side_incremental_sync, 1853 transformations=transformations, 1854 incremental_sync=model.incremental_sync, 1855 ) 1856 cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None 1857 1858 if model.state_migrations: 1859 state_transformations = [ 1860 self._create_component_from_model(state_migration, config, declarative_stream=model) 1861 for state_migration in model.state_migrations 1862 ] 1863 else: 1864 state_transformations = [] 1865 1866 if model.schema_loader: 1867 schema_loader = self._create_component_from_model( 1868 model=model.schema_loader, config=config 1869 ) 1870 else: 1871 options = model.parameters or {} 1872 if "name" not in options: 1873 options["name"] = model.name 1874 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 1875 1876 return DeclarativeStream( 1877 name=model.name or "", 1878 primary_key=primary_key, 1879 retriever=retriever, 1880 schema_loader=schema_loader, 1881 stream_cursor_field=cursor_field or "", 1882 state_migrations=state_transformations, 1883 config=config, 1884 parameters=model.parameters or {}, 1885 ) 1886 1887 def _build_stream_slicer_from_partition_router( 1888 self, 1889 model: Union[ 1890 AsyncRetrieverModel, 1891 CustomRetrieverModel, 1892 SimpleRetrieverModel, 1893 ], 1894 config: Config, 1895 stream_name: Optional[str] = None, 1896 ) -> Optional[PartitionRouter]: 1897 if ( 1898 hasattr(model, "partition_router") 1899 and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel) 1900 and model.partition_router 1901 ): 1902 stream_slicer_model = model.partition_router 1903 if isinstance(stream_slicer_model, list): 1904 return CartesianProductStreamSlicer( 1905 [ 1906 self._create_component_from_model( 1907 model=slicer, config=config, stream_name=stream_name or "" 1908 ) 1909 for slicer in stream_slicer_model 1910 ], 1911 parameters={}, 1912 ) 1913 else: 1914 return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router 1915 model=stream_slicer_model, config=config, stream_name=stream_name or "" 1916 ) 1917 return None 1918 1919 def _build_incremental_cursor( 1920 self, 1921 model: DeclarativeStreamModel, 1922 stream_slicer: Optional[PartitionRouter], 1923 config: Config, 1924 ) -> Optional[StreamSlicer]: 1925 if model.incremental_sync and stream_slicer: 1926 if model.retriever.type == "AsyncRetriever": 1927 return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 1928 state_manager=self._connector_state_manager, 1929 model_type=DatetimeBasedCursorModel, 1930 component_definition=model.incremental_sync.__dict__, 1931 stream_name=model.name or "", 1932 stream_namespace=None, 1933 config=config or {}, 1934 stream_state={}, 1935 partition_router=stream_slicer, 1936 ) 1937 1938 incremental_sync_model = model.incremental_sync 1939 cursor_component = self._create_component_from_model( 1940 model=incremental_sync_model, config=config 1941 ) 1942 is_global_cursor = ( 1943 hasattr(incremental_sync_model, "global_substream_cursor") 1944 and incremental_sync_model.global_substream_cursor 1945 ) 1946 1947 if is_global_cursor: 1948 return GlobalSubstreamCursor( 1949 stream_cursor=cursor_component, partition_router=stream_slicer 1950 ) 1951 return PerPartitionWithGlobalCursor( 1952 cursor_factory=CursorFactory( 1953 lambda: self._create_component_from_model( 1954 model=incremental_sync_model, config=config 1955 ), 1956 ), 1957 partition_router=stream_slicer, 1958 stream_cursor=cursor_component, 1959 ) 1960 elif model.incremental_sync: 1961 if model.retriever.type == "AsyncRetriever": 1962 return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 1963 model_type=DatetimeBasedCursorModel, 1964 component_definition=model.incremental_sync.__dict__, 1965 stream_name=model.name or "", 1966 stream_namespace=None, 1967 config=config or {}, 1968 stream_state_migrations=model.state_migrations, 1969 ) 1970 return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync 1971 return None 1972 1973 def _build_resumable_cursor( 1974 self, 1975 model: Union[ 1976 AsyncRetrieverModel, 1977 CustomRetrieverModel, 1978 SimpleRetrieverModel, 1979 ], 1980 stream_slicer: Optional[PartitionRouter], 1981 ) -> Optional[StreamSlicer]: 1982 if hasattr(model, "paginator") and model.paginator and not stream_slicer: 1983 # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor` 1984 return ResumableFullRefreshCursor(parameters={}) 1985 elif stream_slicer: 1986 # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor` 1987 return PerPartitionCursor( 1988 cursor_factory=CursorFactory( 1989 create_function=partial(ChildPartitionResumableFullRefreshCursor, {}) 1990 ), 1991 partition_router=stream_slicer, 1992 ) 1993 return None 1994 1995 def _merge_stream_slicers( 1996 self, model: DeclarativeStreamModel, config: Config 1997 ) -> Optional[StreamSlicer]: 1998 retriever_model = model.retriever 1999 2000 stream_slicer = self._build_stream_slicer_from_partition_router( 2001 retriever_model, config, stream_name=model.name 2002 ) 2003 2004 if retriever_model.type == "AsyncRetriever": 2005 is_not_datetime_cursor = ( 2006 model.incremental_sync.type != "DatetimeBasedCursor" 2007 if model.incremental_sync 2008 else None 2009 ) 2010 is_partition_router = ( 2011 bool(retriever_model.partition_router) if model.incremental_sync else None 2012 ) 2013 2014 if is_not_datetime_cursor: 2015 # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the 2016 # support or unordered slices (for example, when we trigger reports for January and February, the report 2017 # in February can be completed first). Once we have support for custom concurrent cursor or have a new 2018 # implementation available in the CDK, we can enable more cursors here. 2019 raise ValueError( 2020 "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet." 2021 ) 2022 2023 if is_partition_router and not stream_slicer: 2024 # Note that this development is also done in parallel to the per partition development which once merged 2025 # we could support here by calling create_concurrent_cursor_from_perpartition_cursor 2026 raise ValueError("Per partition state is not supported yet for AsyncRetriever.") 2027 2028 if model.incremental_sync: 2029 return self._build_incremental_cursor(model, stream_slicer, config) 2030 2031 return ( 2032 stream_slicer 2033 if self._disable_resumable_full_refresh 2034 else self._build_resumable_cursor(retriever_model, stream_slicer) 2035 ) 2036 2037 def create_default_error_handler( 2038 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 2039 ) -> DefaultErrorHandler: 2040 backoff_strategies = [] 2041 if model.backoff_strategies: 2042 for backoff_strategy_model in model.backoff_strategies: 2043 backoff_strategies.append( 2044 self._create_component_from_model(model=backoff_strategy_model, config=config) 2045 ) 2046 2047 response_filters = [] 2048 if model.response_filters: 2049 for response_filter_model in model.response_filters: 2050 response_filters.append( 2051 self._create_component_from_model(model=response_filter_model, config=config) 2052 ) 2053 response_filters.append( 2054 HttpResponseFilter(config=config, parameters=model.parameters or {}) 2055 ) 2056 2057 return DefaultErrorHandler( 2058 backoff_strategies=backoff_strategies, 2059 max_retries=model.max_retries, 2060 response_filters=response_filters, 2061 config=config, 2062 parameters=model.parameters or {}, 2063 ) 2064 2065 def create_default_paginator( 2066 self, 2067 model: DefaultPaginatorModel, 2068 config: Config, 2069 *, 2070 url_base: str, 2071 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2072 decoder: Optional[Decoder] = None, 2073 cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None, 2074 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 2075 if decoder: 2076 if self._is_supported_decoder_for_pagination(decoder): 2077 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 2078 else: 2079 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 2080 else: 2081 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 2082 page_size_option = ( 2083 self._create_component_from_model(model=model.page_size_option, config=config) 2084 if model.page_size_option 2085 else None 2086 ) 2087 page_token_option = ( 2088 self._create_component_from_model(model=model.page_token_option, config=config) 2089 if model.page_token_option 2090 else None 2091 ) 2092 pagination_strategy = self._create_component_from_model( 2093 model=model.pagination_strategy, 2094 config=config, 2095 decoder=decoder_to_use, 2096 extractor_model=extractor_model, 2097 ) 2098 if cursor_used_for_stop_condition: 2099 pagination_strategy = StopConditionPaginationStrategyDecorator( 2100 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2101 ) 2102 paginator = DefaultPaginator( 2103 decoder=decoder_to_use, 2104 page_size_option=page_size_option, 2105 page_token_option=page_token_option, 2106 pagination_strategy=pagination_strategy, 2107 url_base=url_base, 2108 config=config, 2109 parameters=model.parameters or {}, 2110 ) 2111 if self._limit_pages_fetched_per_slice: 2112 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2113 return paginator 2114 2115 def create_dpath_extractor( 2116 self, 2117 model: DpathExtractorModel, 2118 config: Config, 2119 decoder: Optional[Decoder] = None, 2120 **kwargs: Any, 2121 ) -> DpathExtractor: 2122 if decoder: 2123 decoder_to_use = decoder 2124 else: 2125 decoder_to_use = JsonDecoder(parameters={}) 2126 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2127 return DpathExtractor( 2128 decoder=decoder_to_use, 2129 field_path=model_field_path, 2130 config=config, 2131 parameters=model.parameters or {}, 2132 ) 2133 2134 @staticmethod 2135 def create_response_to_file_extractor( 2136 model: ResponseToFileExtractorModel, 2137 **kwargs: Any, 2138 ) -> ResponseToFileExtractor: 2139 return ResponseToFileExtractor(parameters=model.parameters or {}) 2140 2141 @staticmethod 2142 def create_exponential_backoff_strategy( 2143 model: ExponentialBackoffStrategyModel, config: Config 2144 ) -> ExponentialBackoffStrategy: 2145 return ExponentialBackoffStrategy( 2146 factor=model.factor or 5, parameters=model.parameters or {}, config=config 2147 ) 2148 2149 @staticmethod 2150 def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey: 2151 return GroupByKey(model.key, config=config, parameters=model.parameters or {}) 2152 2153 def create_http_requester( 2154 self, 2155 model: HttpRequesterModel, 2156 config: Config, 2157 decoder: Decoder = JsonDecoder(parameters={}), 2158 query_properties_key: Optional[str] = None, 2159 use_cache: Optional[bool] = None, 2160 *, 2161 name: str, 2162 ) -> HttpRequester: 2163 authenticator = ( 2164 self._create_component_from_model( 2165 model=model.authenticator, 2166 config=config, 2167 url_base=model.url_base, 2168 name=name, 2169 decoder=decoder, 2170 ) 2171 if model.authenticator 2172 else None 2173 ) 2174 error_handler = ( 2175 self._create_component_from_model(model=model.error_handler, config=config) 2176 if model.error_handler 2177 else DefaultErrorHandler( 2178 backoff_strategies=[], 2179 response_filters=[], 2180 config=config, 2181 parameters=model.parameters or {}, 2182 ) 2183 ) 2184 2185 api_budget = self._api_budget 2186 2187 request_options_provider = InterpolatedRequestOptionsProvider( 2188 request_body_data=model.request_body_data, 2189 request_body_json=model.request_body_json, 2190 request_headers=model.request_headers, 2191 request_parameters=model.request_parameters, 2192 query_properties_key=query_properties_key, 2193 config=config, 2194 parameters=model.parameters or {}, 2195 ) 2196 2197 assert model.use_cache is not None # for mypy 2198 assert model.http_method is not None # for mypy 2199 2200 should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache 2201 2202 return HttpRequester( 2203 name=name, 2204 url_base=model.url_base, 2205 path=model.path, 2206 authenticator=authenticator, 2207 error_handler=error_handler, 2208 api_budget=api_budget, 2209 http_method=HttpMethod[model.http_method.value], 2210 request_options_provider=request_options_provider, 2211 config=config, 2212 disable_retries=self._disable_retries, 2213 parameters=model.parameters or {}, 2214 message_repository=self._message_repository, 2215 use_cache=should_use_cache, 2216 decoder=decoder, 2217 stream_response=decoder.is_stream_response() if decoder else False, 2218 ) 2219 2220 @staticmethod 2221 def create_http_response_filter( 2222 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2223 ) -> HttpResponseFilter: 2224 if model.action: 2225 action = ResponseAction(model.action.value) 2226 else: 2227 action = None 2228 2229 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2230 2231 http_codes = ( 2232 set(model.http_codes) if model.http_codes else set() 2233 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2234 2235 return HttpResponseFilter( 2236 action=action, 2237 failure_type=failure_type, 2238 error_message=model.error_message or "", 2239 error_message_contains=model.error_message_contains or "", 2240 http_codes=http_codes, 2241 predicate=model.predicate or "", 2242 config=config, 2243 parameters=model.parameters or {}, 2244 ) 2245 2246 @staticmethod 2247 def create_inline_schema_loader( 2248 model: InlineSchemaLoaderModel, config: Config, **kwargs: Any 2249 ) -> InlineSchemaLoader: 2250 return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) 2251 2252 def create_complex_field_type( 2253 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2254 ) -> ComplexFieldType: 2255 items = ( 2256 self._create_component_from_model(model=model.items, config=config) 2257 if isinstance(model.items, ComplexFieldTypeModel) 2258 else model.items 2259 ) 2260 2261 return ComplexFieldType(field_type=model.field_type, items=items) 2262 2263 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2264 target_type = ( 2265 self._create_component_from_model(model=model.target_type, config=config) 2266 if isinstance(model.target_type, ComplexFieldTypeModel) 2267 else model.target_type 2268 ) 2269 2270 return TypesMap( 2271 target_type=target_type, 2272 current_type=model.current_type, 2273 condition=model.condition if model.condition is not None else "True", 2274 ) 2275 2276 def create_schema_type_identifier( 2277 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2278 ) -> SchemaTypeIdentifier: 2279 types_mapping = [] 2280 if model.types_mapping: 2281 types_mapping.extend( 2282 [ 2283 self._create_component_from_model(types_map, config=config) 2284 for types_map in model.types_mapping 2285 ] 2286 ) 2287 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2288 [x for x in model.schema_pointer] if model.schema_pointer else [] 2289 ) 2290 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2291 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2292 [x for x in model.type_pointer] if model.type_pointer else None 2293 ) 2294 2295 return SchemaTypeIdentifier( 2296 schema_pointer=model_schema_pointer, 2297 key_pointer=model_key_pointer, 2298 type_pointer=model_type_pointer, 2299 types_mapping=types_mapping, 2300 parameters=model.parameters or {}, 2301 ) 2302 2303 def create_dynamic_schema_loader( 2304 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2305 ) -> DynamicSchemaLoader: 2306 stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config) 2307 combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer) 2308 2309 schema_transformations = [] 2310 if model.schema_transformations: 2311 for transformation_model in model.schema_transformations: 2312 schema_transformations.append( 2313 self._create_component_from_model(model=transformation_model, config=config) 2314 ) 2315 2316 retriever = self._create_component_from_model( 2317 model=model.retriever, 2318 config=config, 2319 name="dynamic_properties", 2320 primary_key=None, 2321 stream_slicer=combined_slicers, 2322 transformations=[], 2323 use_cache=True, 2324 ) 2325 schema_type_identifier = self._create_component_from_model( 2326 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2327 ) 2328 return DynamicSchemaLoader( 2329 retriever=retriever, 2330 config=config, 2331 schema_transformations=schema_transformations, 2332 schema_type_identifier=schema_type_identifier, 2333 parameters=model.parameters or {}, 2334 ) 2335 2336 @staticmethod 2337 def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2338 return JsonDecoder(parameters={}) 2339 2340 def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2341 return CompositeRawDecoder( 2342 parser=ModelToComponentFactory._get_parser(model, config), 2343 stream_response=False if self._emit_connector_builder_messages else True, 2344 ) 2345 2346 def create_jsonl_decoder( 2347 self, model: JsonlDecoderModel, config: Config, **kwargs: Any 2348 ) -> Decoder: 2349 return CompositeRawDecoder( 2350 parser=ModelToComponentFactory._get_parser(model, config), 2351 stream_response=False if self._emit_connector_builder_messages else True, 2352 ) 2353 2354 def create_gzip_decoder( 2355 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2356 ) -> Decoder: 2357 _compressed_response_types = { 2358 "gzip", 2359 "x-gzip", 2360 "gzip, deflate", 2361 "x-gzip, deflate", 2362 "application/zip", 2363 "application/gzip", 2364 "application/x-gzip", 2365 "application/x-zip-compressed", 2366 } 2367 2368 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2369 2370 if self._emit_connector_builder_messages: 2371 # This is very surprising but if the response is not streamed, 2372 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2373 # which uses urllib3 directly and does not uncompress the data. 2374 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2375 2376 return CompositeRawDecoder.by_headers( 2377 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2378 stream_response=True, 2379 fallback_parser=gzip_parser.inner_parser, 2380 ) 2381 2382 @staticmethod 2383 def create_incrementing_count_cursor( 2384 model: IncrementingCountCursorModel, config: Config, **kwargs: Any 2385 ) -> DatetimeBasedCursor: 2386 # This should not actually get used anywhere at runtime, but needed to add this to pass checks since 2387 # we still parse models into components. The issue is that there's no runtime implementation of a 2388 # IncrementingCountCursor. 2389 # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor. 2390 return DatetimeBasedCursor( 2391 cursor_field=model.cursor_field, 2392 datetime_format="%Y-%m-%d", 2393 start_datetime="2024-12-12", 2394 config=config, 2395 parameters={}, 2396 ) 2397 2398 @staticmethod 2399 def create_iterable_decoder( 2400 model: IterableDecoderModel, config: Config, **kwargs: Any 2401 ) -> IterableDecoder: 2402 return IterableDecoder(parameters={}) 2403 2404 @staticmethod 2405 def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder: 2406 return XmlDecoder(parameters={}) 2407 2408 def create_zipfile_decoder( 2409 self, model: ZipfileDecoderModel, config: Config, **kwargs: Any 2410 ) -> ZipfileDecoder: 2411 return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config)) 2412 2413 @staticmethod 2414 def _get_parser(model: BaseModel, config: Config) -> Parser: 2415 if isinstance(model, JsonDecoderModel): 2416 # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases 2417 return JsonParser() 2418 elif isinstance(model, JsonlDecoderModel): 2419 return JsonLineParser() 2420 elif isinstance(model, CsvDecoderModel): 2421 return CsvParser(encoding=model.encoding, delimiter=model.delimiter) 2422 elif isinstance(model, GzipDecoderModel): 2423 return GzipParser( 2424 inner_parser=ModelToComponentFactory._get_parser(model.decoder, config) 2425 ) 2426 elif isinstance( 2427 model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel) 2428 ): 2429 raise ValueError(f"Decoder type {model} does not have parser associated to it") 2430 2431 raise ValueError(f"Unknown decoder type {model}") 2432 2433 @staticmethod 2434 def create_json_file_schema_loader( 2435 model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any 2436 ) -> JsonFileSchemaLoader: 2437 return JsonFileSchemaLoader( 2438 file_path=model.file_path or "", config=config, parameters=model.parameters or {} 2439 ) 2440 2441 @staticmethod 2442 def create_jwt_authenticator( 2443 model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2444 ) -> JwtAuthenticator: 2445 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2446 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2447 return JwtAuthenticator( 2448 config=config, 2449 parameters=model.parameters or {}, 2450 algorithm=JwtAlgorithm(model.algorithm.value), 2451 secret_key=model.secret_key, 2452 base64_encode_secret_key=model.base64_encode_secret_key, 2453 token_duration=model.token_duration, 2454 header_prefix=model.header_prefix, 2455 kid=jwt_headers.kid, 2456 typ=jwt_headers.typ, 2457 cty=jwt_headers.cty, 2458 iss=jwt_payload.iss, 2459 sub=jwt_payload.sub, 2460 aud=jwt_payload.aud, 2461 additional_jwt_headers=model.additional_jwt_headers, 2462 additional_jwt_payload=model.additional_jwt_payload, 2463 ) 2464 2465 def create_list_partition_router( 2466 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2467 ) -> ListPartitionRouter: 2468 request_option = ( 2469 self._create_component_from_model(model.request_option, config) 2470 if model.request_option 2471 else None 2472 ) 2473 return ListPartitionRouter( 2474 cursor_field=model.cursor_field, 2475 request_option=request_option, 2476 values=model.values, 2477 config=config, 2478 parameters=model.parameters or {}, 2479 ) 2480 2481 @staticmethod 2482 def create_min_max_datetime( 2483 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2484 ) -> MinMaxDatetime: 2485 return MinMaxDatetime( 2486 datetime=model.datetime, 2487 datetime_format=model.datetime_format or "", 2488 max_datetime=model.max_datetime or "", 2489 min_datetime=model.min_datetime or "", 2490 parameters=model.parameters or {}, 2491 ) 2492 2493 @staticmethod 2494 def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: 2495 return NoAuth(parameters=model.parameters or {}) 2496 2497 @staticmethod 2498 def create_no_pagination( 2499 model: NoPaginationModel, config: Config, **kwargs: Any 2500 ) -> NoPagination: 2501 return NoPagination(parameters={}) 2502 2503 def create_oauth_authenticator( 2504 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2505 ) -> DeclarativeOauth2Authenticator: 2506 profile_assertion = ( 2507 self._create_component_from_model(model.profile_assertion, config=config) 2508 if model.profile_assertion 2509 else None 2510 ) 2511 2512 if model.refresh_token_updater: 2513 # ignore type error because fixing it would have a lot of dependencies, revisit later 2514 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2515 config, 2516 InterpolatedString.create( 2517 model.token_refresh_endpoint, # type: ignore 2518 parameters=model.parameters or {}, 2519 ).eval(config), 2520 access_token_name=InterpolatedString.create( 2521 model.access_token_name or "access_token", parameters=model.parameters or {} 2522 ).eval(config), 2523 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2524 expires_in_name=InterpolatedString.create( 2525 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2526 ).eval(config), 2527 client_id_name=InterpolatedString.create( 2528 model.client_id_name or "client_id", parameters=model.parameters or {} 2529 ).eval(config), 2530 client_id=InterpolatedString.create( 2531 model.client_id, parameters=model.parameters or {} 2532 ).eval(config) 2533 if model.client_id 2534 else model.client_id, 2535 client_secret_name=InterpolatedString.create( 2536 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2537 ).eval(config), 2538 client_secret=InterpolatedString.create( 2539 model.client_secret, parameters=model.parameters or {} 2540 ).eval(config) 2541 if model.client_secret 2542 else model.client_secret, 2543 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2544 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2545 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2546 grant_type_name=InterpolatedString.create( 2547 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2548 ).eval(config), 2549 grant_type=InterpolatedString.create( 2550 model.grant_type or "refresh_token", parameters=model.parameters or {} 2551 ).eval(config), 2552 refresh_request_body=InterpolatedMapping( 2553 model.refresh_request_body or {}, parameters=model.parameters or {} 2554 ).eval(config), 2555 refresh_request_headers=InterpolatedMapping( 2556 model.refresh_request_headers or {}, parameters=model.parameters or {} 2557 ).eval(config), 2558 scopes=model.scopes, 2559 token_expiry_date_format=model.token_expiry_date_format, 2560 message_repository=self._message_repository, 2561 refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes, 2562 refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key, 2563 refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values, 2564 ) 2565 # ignore type error because fixing it would have a lot of dependencies, revisit later 2566 return DeclarativeOauth2Authenticator( # type: ignore 2567 access_token_name=model.access_token_name or "access_token", 2568 access_token_value=model.access_token_value, 2569 client_id_name=model.client_id_name or "client_id", 2570 client_id=model.client_id, 2571 client_secret_name=model.client_secret_name or "client_secret", 2572 client_secret=model.client_secret, 2573 expires_in_name=model.expires_in_name or "expires_in", 2574 grant_type_name=model.grant_type_name or "grant_type", 2575 grant_type=model.grant_type or "refresh_token", 2576 refresh_request_body=model.refresh_request_body, 2577 refresh_request_headers=model.refresh_request_headers, 2578 refresh_token_name=model.refresh_token_name or "refresh_token", 2579 refresh_token=model.refresh_token, 2580 scopes=model.scopes, 2581 token_expiry_date=model.token_expiry_date, 2582 token_expiry_date_format=model.token_expiry_date_format, 2583 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2584 token_refresh_endpoint=model.token_refresh_endpoint, 2585 config=config, 2586 parameters=model.parameters or {}, 2587 message_repository=self._message_repository, 2588 profile_assertion=profile_assertion, 2589 use_profile_assertion=model.use_profile_assertion, 2590 ) 2591 2592 def create_offset_increment( 2593 self, 2594 model: OffsetIncrementModel, 2595 config: Config, 2596 decoder: Decoder, 2597 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2598 **kwargs: Any, 2599 ) -> OffsetIncrement: 2600 if isinstance(decoder, PaginationDecoderDecorator): 2601 inner_decoder = decoder.decoder 2602 else: 2603 inner_decoder = decoder 2604 decoder = PaginationDecoderDecorator(decoder=decoder) 2605 2606 if self._is_supported_decoder_for_pagination(inner_decoder): 2607 decoder_to_use = decoder 2608 else: 2609 raise ValueError( 2610 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2611 ) 2612 2613 # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever) 2614 # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the 2615 # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing 2616 # behavior by having two separate extractors with identical behavior since they use the same extractor model. 2617 # When we have more time to investigate we can look into reusing the same component. 2618 extractor = ( 2619 self._create_component_from_model( 2620 model=extractor_model, config=config, decoder=decoder_to_use 2621 ) 2622 if extractor_model 2623 else None 2624 ) 2625 2626 return OffsetIncrement( 2627 page_size=model.page_size, 2628 config=config, 2629 decoder=decoder_to_use, 2630 extractor=extractor, 2631 inject_on_first_request=model.inject_on_first_request or False, 2632 parameters=model.parameters or {}, 2633 ) 2634 2635 @staticmethod 2636 def create_page_increment( 2637 model: PageIncrementModel, config: Config, **kwargs: Any 2638 ) -> PageIncrement: 2639 return PageIncrement( 2640 page_size=model.page_size, 2641 config=config, 2642 start_from_page=model.start_from_page or 0, 2643 inject_on_first_request=model.inject_on_first_request or False, 2644 parameters=model.parameters or {}, 2645 ) 2646 2647 def create_parent_stream_config( 2648 self, model: ParentStreamConfigModel, config: Config, **kwargs: Any 2649 ) -> ParentStreamConfig: 2650 declarative_stream = self._create_component_from_model( 2651 model.stream, config=config, **kwargs 2652 ) 2653 request_option = ( 2654 self._create_component_from_model(model.request_option, config=config) 2655 if model.request_option 2656 else None 2657 ) 2658 2659 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2660 raise ValueError( 2661 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2662 ) 2663 2664 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2665 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2666 ) 2667 2668 return ParentStreamConfig( 2669 parent_key=model.parent_key, 2670 request_option=request_option, 2671 stream=declarative_stream, 2672 partition_field=model.partition_field, 2673 config=config, 2674 incremental_dependency=model.incremental_dependency or False, 2675 parameters=model.parameters or {}, 2676 extra_fields=model.extra_fields, 2677 lazy_read_pointer=model_lazy_read_pointer, 2678 ) 2679 2680 def create_properties_from_endpoint( 2681 self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any 2682 ) -> PropertiesFromEndpoint: 2683 retriever = self._create_component_from_model( 2684 model=model.retriever, 2685 config=config, 2686 name="dynamic_properties", 2687 primary_key=None, 2688 stream_slicer=None, 2689 transformations=[], 2690 use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different 2691 ) 2692 return PropertiesFromEndpoint( 2693 property_field_path=model.property_field_path, 2694 retriever=retriever, 2695 config=config, 2696 parameters=model.parameters or {}, 2697 ) 2698 2699 def create_property_chunking( 2700 self, model: PropertyChunkingModel, config: Config, **kwargs: Any 2701 ) -> PropertyChunking: 2702 record_merge_strategy = ( 2703 self._create_component_from_model( 2704 model=model.record_merge_strategy, config=config, **kwargs 2705 ) 2706 if model.record_merge_strategy 2707 else None 2708 ) 2709 2710 property_limit_type: PropertyLimitType 2711 match model.property_limit_type: 2712 case PropertyLimitTypeModel.property_count: 2713 property_limit_type = PropertyLimitType.property_count 2714 case PropertyLimitTypeModel.characters: 2715 property_limit_type = PropertyLimitType.characters 2716 case _: 2717 raise ValueError(f"Invalid PropertyLimitType {property_limit_type}") 2718 2719 return PropertyChunking( 2720 property_limit_type=property_limit_type, 2721 property_limit=model.property_limit, 2722 record_merge_strategy=record_merge_strategy, 2723 config=config, 2724 parameters=model.parameters or {}, 2725 ) 2726 2727 def create_query_properties( 2728 self, model: QueryPropertiesModel, config: Config, **kwargs: Any 2729 ) -> QueryProperties: 2730 if isinstance(model.property_list, list): 2731 property_list = model.property_list 2732 else: 2733 property_list = self._create_component_from_model( 2734 model=model.property_list, config=config, **kwargs 2735 ) 2736 2737 property_chunking = ( 2738 self._create_component_from_model( 2739 model=model.property_chunking, config=config, **kwargs 2740 ) 2741 if model.property_chunking 2742 else None 2743 ) 2744 2745 return QueryProperties( 2746 property_list=property_list, 2747 always_include_properties=model.always_include_properties, 2748 property_chunking=property_chunking, 2749 config=config, 2750 parameters=model.parameters or {}, 2751 ) 2752 2753 @staticmethod 2754 def create_record_filter( 2755 model: RecordFilterModel, config: Config, **kwargs: Any 2756 ) -> RecordFilter: 2757 return RecordFilter( 2758 condition=model.condition or "", config=config, parameters=model.parameters or {} 2759 ) 2760 2761 @staticmethod 2762 def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: 2763 return RequestPath(parameters={}) 2764 2765 @staticmethod 2766 def create_request_option( 2767 model: RequestOptionModel, config: Config, **kwargs: Any 2768 ) -> RequestOption: 2769 inject_into = RequestOptionType(model.inject_into.value) 2770 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 2771 [ 2772 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 2773 for segment in model.field_path 2774 ] 2775 if model.field_path 2776 else None 2777 ) 2778 field_name = ( 2779 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 2780 if model.field_name 2781 else None 2782 ) 2783 return RequestOption( 2784 field_name=field_name, 2785 field_path=field_path, 2786 inject_into=inject_into, 2787 parameters=kwargs.get("parameters", {}), 2788 ) 2789 2790 def create_record_selector( 2791 self, 2792 model: RecordSelectorModel, 2793 config: Config, 2794 *, 2795 name: str, 2796 transformations: List[RecordTransformation] | None = None, 2797 decoder: Decoder | None = None, 2798 client_side_incremental_sync: Dict[str, Any] | None = None, 2799 **kwargs: Any, 2800 ) -> RecordSelector: 2801 extractor = self._create_component_from_model( 2802 model=model.extractor, decoder=decoder, config=config 2803 ) 2804 record_filter = ( 2805 self._create_component_from_model(model.record_filter, config=config) 2806 if model.record_filter 2807 else None 2808 ) 2809 2810 assert model.transform_before_filtering is not None # for mypy 2811 2812 transform_before_filtering = model.transform_before_filtering 2813 if client_side_incremental_sync: 2814 record_filter = ClientSideIncrementalRecordFilterDecorator( 2815 config=config, 2816 parameters=model.parameters, 2817 condition=model.record_filter.condition 2818 if (model.record_filter and hasattr(model.record_filter, "condition")) 2819 else None, 2820 **client_side_incremental_sync, 2821 ) 2822 transform_before_filtering = True 2823 2824 schema_normalization = ( 2825 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 2826 if isinstance(model.schema_normalization, SchemaNormalizationModel) 2827 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 2828 ) 2829 2830 return RecordSelector( 2831 extractor=extractor, 2832 name=name, 2833 config=config, 2834 record_filter=record_filter, 2835 transformations=transformations or [], 2836 schema_normalization=schema_normalization, 2837 parameters=model.parameters or {}, 2838 transform_before_filtering=transform_before_filtering, 2839 ) 2840 2841 @staticmethod 2842 def create_remove_fields( 2843 model: RemoveFieldsModel, config: Config, **kwargs: Any 2844 ) -> RemoveFields: 2845 return RemoveFields( 2846 field_pointers=model.field_pointers, condition=model.condition or "", parameters={} 2847 ) 2848 2849 def create_selective_authenticator( 2850 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 2851 ) -> DeclarativeAuthenticator: 2852 authenticators = { 2853 name: self._create_component_from_model(model=auth, config=config) 2854 for name, auth in model.authenticators.items() 2855 } 2856 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 2857 return SelectiveAuthenticator( # type: ignore[abstract] 2858 config=config, 2859 authenticators=authenticators, 2860 authenticator_selection_path=model.authenticator_selection_path, 2861 **kwargs, 2862 ) 2863 2864 @staticmethod 2865 def create_legacy_session_token_authenticator( 2866 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 2867 ) -> LegacySessionTokenAuthenticator: 2868 return LegacySessionTokenAuthenticator( 2869 api_url=url_base, 2870 header=model.header, 2871 login_url=model.login_url, 2872 password=model.password or "", 2873 session_token=model.session_token or "", 2874 session_token_response_key=model.session_token_response_key or "", 2875 username=model.username or "", 2876 validate_session_url=model.validate_session_url, 2877 config=config, 2878 parameters=model.parameters or {}, 2879 ) 2880 2881 def create_simple_retriever( 2882 self, 2883 model: SimpleRetrieverModel, 2884 config: Config, 2885 *, 2886 name: str, 2887 primary_key: Optional[Union[str, List[str], List[List[str]]]], 2888 stream_slicer: Optional[StreamSlicer], 2889 request_options_provider: Optional[RequestOptionsProvider] = None, 2890 stop_condition_on_cursor: bool = False, 2891 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 2892 transformations: List[RecordTransformation], 2893 incremental_sync: Optional[ 2894 Union[ 2895 IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel 2896 ] 2897 ] = None, 2898 use_cache: Optional[bool] = None, 2899 **kwargs: Any, 2900 ) -> SimpleRetriever: 2901 decoder = ( 2902 self._create_component_from_model(model=model.decoder, config=config) 2903 if model.decoder 2904 else JsonDecoder(parameters={}) 2905 ) 2906 record_selector = self._create_component_from_model( 2907 model=model.record_selector, 2908 name=name, 2909 config=config, 2910 decoder=decoder, 2911 transformations=transformations, 2912 client_side_incremental_sync=client_side_incremental_sync, 2913 ) 2914 2915 query_properties: Optional[QueryProperties] = None 2916 query_properties_key: Optional[str] = None 2917 if ( 2918 hasattr(model.requester, "request_parameters") 2919 and model.requester.request_parameters 2920 and isinstance(model.requester.request_parameters, Mapping) 2921 ): 2922 query_properties_definitions = [] 2923 for key, request_parameter in model.requester.request_parameters.items(): 2924 # When translating JSON schema into Pydantic models, enforcing types for arrays containing both 2925 # concrete string complex object definitions like QueryProperties would get resolved to Union[str, Any]. 2926 # This adds the extra validation that we couldn't get for free in Pydantic model generation 2927 if ( 2928 isinstance(request_parameter, Mapping) 2929 and request_parameter.get("type") == "QueryProperties" 2930 ): 2931 query_properties_key = key 2932 query_properties_definitions.append(request_parameter) 2933 elif not isinstance(request_parameter, str): 2934 raise ValueError( 2935 f"Each element of request_parameters should be of type str or QueryProperties, but received {request_parameter.get('type')}" 2936 ) 2937 2938 if len(query_properties_definitions) > 1: 2939 raise ValueError( 2940 f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages" 2941 ) 2942 2943 if len(query_properties_definitions) == 1: 2944 query_properties = self.create_component( 2945 model_type=QueryPropertiesModel, 2946 component_definition=query_properties_definitions[0], 2947 config=config, 2948 ) 2949 2950 # Removes QueryProperties components from the interpolated mappings because it will be resolved in 2951 # the provider from the slice directly instead of through jinja interpolation 2952 if isinstance(model.requester.request_parameters, Mapping): 2953 model.requester.request_parameters = self._remove_query_properties( 2954 model.requester.request_parameters 2955 ) 2956 2957 requester = self._create_component_from_model( 2958 model=model.requester, 2959 decoder=decoder, 2960 name=name, 2961 query_properties_key=query_properties_key, 2962 use_cache=use_cache, 2963 config=config, 2964 ) 2965 url_base = ( 2966 model.requester.url_base 2967 if hasattr(model.requester, "url_base") 2968 else requester.get_url_base() 2969 ) 2970 2971 # Define cursor only if per partition or common incremental support is needed 2972 cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None 2973 2974 if ( 2975 not isinstance(stream_slicer, DatetimeBasedCursor) 2976 or type(stream_slicer) is not DatetimeBasedCursor 2977 ): 2978 # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods). 2979 # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement 2980 # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's 2981 # request_options_provider 2982 request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={}) 2983 elif not request_options_provider: 2984 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 2985 2986 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 2987 2988 cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None 2989 paginator = ( 2990 self._create_component_from_model( 2991 model=model.paginator, 2992 config=config, 2993 url_base=url_base, 2994 extractor_model=model.record_selector.extractor, 2995 decoder=decoder, 2996 cursor_used_for_stop_condition=cursor_used_for_stop_condition, 2997 ) 2998 if model.paginator 2999 else NoPagination(parameters={}) 3000 ) 3001 3002 ignore_stream_slicer_parameters_on_paginated_requests = ( 3003 model.ignore_stream_slicer_parameters_on_paginated_requests or False 3004 ) 3005 3006 if ( 3007 model.partition_router 3008 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 3009 and not bool(self._connector_state_manager.get_stream_state(name, None)) 3010 and any( 3011 parent_stream_config.lazy_read_pointer 3012 for parent_stream_config in model.partition_router.parent_stream_configs 3013 ) 3014 ): 3015 if incremental_sync: 3016 if incremental_sync.type != "DatetimeBasedCursor": 3017 raise ValueError( 3018 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 3019 ) 3020 3021 elif incremental_sync.step or incremental_sync.cursor_granularity: 3022 raise ValueError( 3023 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 3024 ) 3025 3026 if model.decoder and model.decoder.type != "JsonDecoder": 3027 raise ValueError( 3028 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 3029 ) 3030 3031 return LazySimpleRetriever( 3032 name=name, 3033 paginator=paginator, 3034 primary_key=primary_key, 3035 requester=requester, 3036 record_selector=record_selector, 3037 stream_slicer=stream_slicer, 3038 request_option_provider=request_options_provider, 3039 cursor=cursor, 3040 config=config, 3041 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3042 parameters=model.parameters or {}, 3043 ) 3044 3045 if self._limit_slices_fetched or self._emit_connector_builder_messages: 3046 return SimpleRetrieverTestReadDecorator( 3047 name=name, 3048 paginator=paginator, 3049 primary_key=primary_key, 3050 requester=requester, 3051 record_selector=record_selector, 3052 stream_slicer=stream_slicer, 3053 request_option_provider=request_options_provider, 3054 cursor=cursor, 3055 config=config, 3056 maximum_number_of_slices=self._limit_slices_fetched or 5, 3057 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3058 parameters=model.parameters or {}, 3059 ) 3060 return SimpleRetriever( 3061 name=name, 3062 paginator=paginator, 3063 primary_key=primary_key, 3064 requester=requester, 3065 record_selector=record_selector, 3066 stream_slicer=stream_slicer, 3067 request_option_provider=request_options_provider, 3068 cursor=cursor, 3069 config=config, 3070 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3071 additional_query_properties=query_properties, 3072 parameters=model.parameters or {}, 3073 ) 3074 3075 @staticmethod 3076 def _remove_query_properties( 3077 request_parameters: Mapping[str, Union[Any, str]], 3078 ) -> Mapping[str, Union[Any, str]]: 3079 return { 3080 parameter_field: request_parameter 3081 for parameter_field, request_parameter in request_parameters.items() 3082 if not isinstance(request_parameter, Mapping) 3083 or not request_parameter.get("type") == "QueryProperties" 3084 } 3085 3086 def create_state_delegating_stream( 3087 self, 3088 model: StateDelegatingStreamModel, 3089 config: Config, 3090 has_parent_state: Optional[bool] = None, 3091 **kwargs: Any, 3092 ) -> DeclarativeStream: 3093 if ( 3094 model.full_refresh_stream.name != model.name 3095 or model.name != model.incremental_stream.name 3096 ): 3097 raise ValueError( 3098 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 3099 ) 3100 3101 stream_model = ( 3102 model.incremental_stream 3103 if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state 3104 else model.full_refresh_stream 3105 ) 3106 3107 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description 3108 3109 def _create_async_job_status_mapping( 3110 self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any 3111 ) -> Mapping[str, AsyncJobStatus]: 3112 api_status_to_cdk_status = {} 3113 for cdk_status, api_statuses in model.dict().items(): 3114 if cdk_status == "type": 3115 # This is an element of the dict because of the typing of the CDK but it is not a CDK status 3116 continue 3117 3118 for status in api_statuses: 3119 if status in api_status_to_cdk_status: 3120 raise ValueError( 3121 f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once" 3122 ) 3123 api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status) 3124 return api_status_to_cdk_status 3125 3126 def _get_async_job_status(self, status: str) -> AsyncJobStatus: 3127 match status: 3128 case "running": 3129 return AsyncJobStatus.RUNNING 3130 case "completed": 3131 return AsyncJobStatus.COMPLETED 3132 case "failed": 3133 return AsyncJobStatus.FAILED 3134 case "timeout": 3135 return AsyncJobStatus.TIMED_OUT 3136 case _: 3137 raise ValueError(f"Unsupported CDK status {status}") 3138 3139 def create_async_retriever( 3140 self, 3141 model: AsyncRetrieverModel, 3142 config: Config, 3143 *, 3144 name: str, 3145 primary_key: Optional[ 3146 Union[str, List[str], List[List[str]]] 3147 ], # this seems to be needed to match create_simple_retriever 3148 stream_slicer: Optional[StreamSlicer], 3149 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3150 transformations: List[RecordTransformation], 3151 **kwargs: Any, 3152 ) -> AsyncRetriever: 3153 def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever: 3154 record_selector = RecordSelector( 3155 extractor=download_extractor, 3156 name=name, 3157 record_filter=None, 3158 transformations=transformations, 3159 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 3160 config=config, 3161 parameters={}, 3162 ) 3163 paginator = ( 3164 self._create_component_from_model( 3165 model=model.download_paginator, 3166 decoder=decoder, 3167 config=config, 3168 url_base="", 3169 ) 3170 if model.download_paginator 3171 else NoPagination(parameters={}) 3172 ) 3173 maximum_number_of_slices = self._limit_slices_fetched or 5 3174 3175 if self._limit_slices_fetched or self._emit_connector_builder_messages: 3176 return SimpleRetrieverTestReadDecorator( 3177 requester=download_requester, 3178 record_selector=record_selector, 3179 primary_key=None, 3180 name=job_download_components_name, 3181 paginator=paginator, 3182 config=config, 3183 parameters={}, 3184 maximum_number_of_slices=maximum_number_of_slices, 3185 ) 3186 3187 return SimpleRetriever( 3188 requester=download_requester, 3189 record_selector=record_selector, 3190 primary_key=None, 3191 name=job_download_components_name, 3192 paginator=paginator, 3193 config=config, 3194 parameters={}, 3195 ) 3196 3197 def _get_job_timeout() -> datetime.timedelta: 3198 user_defined_timeout: Optional[int] = ( 3199 int( 3200 InterpolatedString.create( 3201 str(model.polling_job_timeout), 3202 parameters={}, 3203 ).eval(config) 3204 ) 3205 if model.polling_job_timeout 3206 else None 3207 ) 3208 3209 # check for user defined timeout during the test read or 15 minutes 3210 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 3211 # default value for non-connector builder is 60 minutes. 3212 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 3213 3214 return ( 3215 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 3216 ) 3217 3218 decoder = ( 3219 self._create_component_from_model(model=model.decoder, config=config) 3220 if model.decoder 3221 else JsonDecoder(parameters={}) 3222 ) 3223 record_selector = self._create_component_from_model( 3224 model=model.record_selector, 3225 config=config, 3226 decoder=decoder, 3227 name=name, 3228 transformations=transformations, 3229 client_side_incremental_sync=client_side_incremental_sync, 3230 ) 3231 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 3232 creation_requester = self._create_component_from_model( 3233 model=model.creation_requester, 3234 decoder=decoder, 3235 config=config, 3236 name=f"job creation - {name}", 3237 ) 3238 polling_requester = self._create_component_from_model( 3239 model=model.polling_requester, 3240 decoder=decoder, 3241 config=config, 3242 name=f"job polling - {name}", 3243 ) 3244 job_download_components_name = f"job download - {name}" 3245 download_decoder = ( 3246 self._create_component_from_model(model=model.download_decoder, config=config) 3247 if model.download_decoder 3248 else JsonDecoder(parameters={}) 3249 ) 3250 download_extractor = ( 3251 self._create_component_from_model( 3252 model=model.download_extractor, 3253 config=config, 3254 decoder=download_decoder, 3255 parameters=model.parameters, 3256 ) 3257 if model.download_extractor 3258 else DpathExtractor( 3259 [], 3260 config=config, 3261 decoder=download_decoder, 3262 parameters=model.parameters or {}, 3263 ) 3264 ) 3265 download_requester = self._create_component_from_model( 3266 model=model.download_requester, 3267 decoder=download_decoder, 3268 config=config, 3269 name=job_download_components_name, 3270 ) 3271 download_retriever = _get_download_retriever() 3272 abort_requester = ( 3273 self._create_component_from_model( 3274 model=model.abort_requester, 3275 decoder=decoder, 3276 config=config, 3277 name=f"job abort - {name}", 3278 ) 3279 if model.abort_requester 3280 else None 3281 ) 3282 delete_requester = ( 3283 self._create_component_from_model( 3284 model=model.delete_requester, 3285 decoder=decoder, 3286 config=config, 3287 name=f"job delete - {name}", 3288 ) 3289 if model.delete_requester 3290 else None 3291 ) 3292 download_target_requester = ( 3293 self._create_component_from_model( 3294 model=model.download_target_requester, 3295 decoder=decoder, 3296 config=config, 3297 name=f"job extract_url - {name}", 3298 ) 3299 if model.download_target_requester 3300 else None 3301 ) 3302 status_extractor = self._create_component_from_model( 3303 model=model.status_extractor, decoder=decoder, config=config, name=name 3304 ) 3305 download_target_extractor = self._create_component_from_model( 3306 model=model.download_target_extractor, 3307 decoder=decoder, 3308 config=config, 3309 name=name, 3310 ) 3311 3312 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3313 creation_requester=creation_requester, 3314 polling_requester=polling_requester, 3315 download_retriever=download_retriever, 3316 download_target_requester=download_target_requester, 3317 abort_requester=abort_requester, 3318 delete_requester=delete_requester, 3319 status_extractor=status_extractor, 3320 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3321 download_target_extractor=download_target_extractor, 3322 job_timeout=_get_job_timeout(), 3323 ) 3324 3325 async_job_partition_router = AsyncJobPartitionRouter( 3326 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3327 job_repository, 3328 stream_slices, 3329 self._job_tracker, 3330 self._message_repository, 3331 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3332 has_bulk_parent=False, 3333 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3334 # `None` == default retry is set to 3 attempts, under the hood. 3335 job_max_retry=1 if self._emit_connector_builder_messages else None, 3336 ), 3337 stream_slicer=stream_slicer, 3338 config=config, 3339 parameters=model.parameters or {}, 3340 ) 3341 3342 return AsyncRetriever( 3343 record_selector=record_selector, 3344 stream_slicer=async_job_partition_router, 3345 config=config, 3346 parameters=model.parameters or {}, 3347 ) 3348 3349 @staticmethod 3350 def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3351 return Spec( 3352 connection_specification=model.connection_specification, 3353 documentation_url=model.documentation_url, 3354 advanced_auth=model.advanced_auth, 3355 parameters={}, 3356 ) 3357 3358 def create_substream_partition_router( 3359 self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any 3360 ) -> SubstreamPartitionRouter: 3361 parent_stream_configs = [] 3362 if model.parent_stream_configs: 3363 parent_stream_configs.extend( 3364 [ 3365 self._create_message_repository_substream_wrapper( 3366 model=parent_stream_config, config=config, **kwargs 3367 ) 3368 for parent_stream_config in model.parent_stream_configs 3369 ] 3370 ) 3371 3372 return SubstreamPartitionRouter( 3373 parent_stream_configs=parent_stream_configs, 3374 parameters=model.parameters or {}, 3375 config=config, 3376 ) 3377 3378 def _create_message_repository_substream_wrapper( 3379 self, model: ParentStreamConfigModel, config: Config, **kwargs: Any 3380 ) -> Any: 3381 substream_factory = ModelToComponentFactory( 3382 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3383 limit_slices_fetched=self._limit_slices_fetched, 3384 emit_connector_builder_messages=self._emit_connector_builder_messages, 3385 disable_retries=self._disable_retries, 3386 disable_cache=self._disable_cache, 3387 message_repository=LogAppenderMessageRepositoryDecorator( 3388 {"airbyte_cdk": {"stream": {"is_substream": True}}, "http": {"is_auxiliary": True}}, 3389 self._message_repository, 3390 self._evaluate_log_level(self._emit_connector_builder_messages), 3391 ), 3392 ) 3393 3394 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3395 has_parent_state = bool( 3396 self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None) 3397 if model.incremental_dependency 3398 else False 3399 ) 3400 return substream_factory._create_component_from_model( 3401 model=model, config=config, has_parent_state=has_parent_state, **kwargs 3402 ) 3403 3404 @staticmethod 3405 def create_wait_time_from_header( 3406 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 3407 ) -> WaitTimeFromHeaderBackoffStrategy: 3408 return WaitTimeFromHeaderBackoffStrategy( 3409 header=model.header, 3410 parameters=model.parameters or {}, 3411 config=config, 3412 regex=model.regex, 3413 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 3414 if model.max_waiting_time_in_seconds is not None 3415 else None, 3416 ) 3417 3418 @staticmethod 3419 def create_wait_until_time_from_header( 3420 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 3421 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 3422 return WaitUntilTimeFromHeaderBackoffStrategy( 3423 header=model.header, 3424 parameters=model.parameters or {}, 3425 config=config, 3426 min_wait=model.min_wait, 3427 regex=model.regex, 3428 ) 3429 3430 def get_message_repository(self) -> MessageRepository: 3431 return self._message_repository 3432 3433 def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: 3434 return Level.DEBUG if emit_connector_builder_messages else Level.INFO 3435 3436 @staticmethod 3437 def create_components_mapping_definition( 3438 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 3439 ) -> ComponentMappingDefinition: 3440 interpolated_value = InterpolatedString.create( 3441 model.value, parameters=model.parameters or {} 3442 ) 3443 field_path = [ 3444 InterpolatedString.create(path, parameters=model.parameters or {}) 3445 for path in model.field_path 3446 ] 3447 return ComponentMappingDefinition( 3448 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 3449 value=interpolated_value, 3450 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 3451 parameters=model.parameters or {}, 3452 ) 3453 3454 def create_http_components_resolver( 3455 self, model: HttpComponentsResolverModel, config: Config 3456 ) -> Any: 3457 stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config) 3458 combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer) 3459 3460 retriever = self._create_component_from_model( 3461 model=model.retriever, 3462 config=config, 3463 name="", 3464 primary_key=None, 3465 stream_slicer=stream_slicer if stream_slicer else combined_slicers, 3466 transformations=[], 3467 ) 3468 3469 components_mapping = [ 3470 self._create_component_from_model( 3471 model=components_mapping_definition_model, 3472 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3473 components_mapping_definition_model.value_type 3474 ), 3475 config=config, 3476 ) 3477 for components_mapping_definition_model in model.components_mapping 3478 ] 3479 3480 return HttpComponentsResolver( 3481 retriever=retriever, 3482 config=config, 3483 components_mapping=components_mapping, 3484 parameters=model.parameters or {}, 3485 ) 3486 3487 @staticmethod 3488 def create_stream_config( 3489 model: StreamConfigModel, config: Config, **kwargs: Any 3490 ) -> StreamConfig: 3491 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 3492 [x for x in model.configs_pointer] if model.configs_pointer else [] 3493 ) 3494 3495 return StreamConfig( 3496 configs_pointer=model_configs_pointer, 3497 parameters=model.parameters or {}, 3498 ) 3499 3500 def create_config_components_resolver( 3501 self, model: ConfigComponentsResolverModel, config: Config 3502 ) -> Any: 3503 stream_config = self._create_component_from_model( 3504 model.stream_config, config=config, parameters=model.parameters or {} 3505 ) 3506 3507 components_mapping = [ 3508 self._create_component_from_model( 3509 model=components_mapping_definition_model, 3510 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3511 components_mapping_definition_model.value_type 3512 ), 3513 config=config, 3514 ) 3515 for components_mapping_definition_model in model.components_mapping 3516 ] 3517 3518 return ConfigComponentsResolver( 3519 stream_config=stream_config, 3520 config=config, 3521 components_mapping=components_mapping, 3522 parameters=model.parameters or {}, 3523 ) 3524 3525 _UNSUPPORTED_DECODER_ERROR = ( 3526 "Specified decoder of {decoder_type} is not supported for pagination." 3527 "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead." 3528 "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`." 3529 ) 3530 3531 def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool: 3532 if isinstance(decoder, (JsonDecoder, XmlDecoder)): 3533 return True 3534 elif isinstance(decoder, CompositeRawDecoder): 3535 return self._is_supported_parser_for_pagination(decoder.parser) 3536 else: 3537 return False 3538 3539 def _is_supported_parser_for_pagination(self, parser: Parser) -> bool: 3540 if isinstance(parser, JsonParser): 3541 return True 3542 elif isinstance(parser, GzipParser): 3543 return isinstance(parser.inner_parser, JsonParser) 3544 else: 3545 return False 3546 3547 def create_http_api_budget( 3548 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 3549 ) -> HttpAPIBudget: 3550 policies = [ 3551 self._create_component_from_model(model=policy, config=config) 3552 for policy in model.policies 3553 ] 3554 3555 return HttpAPIBudget( 3556 policies=policies, 3557 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 3558 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 3559 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 3560 ) 3561 3562 def create_fixed_window_call_rate_policy( 3563 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 3564 ) -> FixedWindowCallRatePolicy: 3565 matchers = [ 3566 self._create_component_from_model(model=matcher, config=config) 3567 for matcher in model.matchers 3568 ] 3569 3570 # Set the initial reset timestamp to 10 days from now. 3571 # This value will be updated by the first request. 3572 return FixedWindowCallRatePolicy( 3573 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 3574 period=parse_duration(model.period), 3575 call_limit=model.call_limit, 3576 matchers=matchers, 3577 ) 3578 3579 def create_moving_window_call_rate_policy( 3580 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 3581 ) -> MovingWindowCallRatePolicy: 3582 rates = [ 3583 self._create_component_from_model(model=rate, config=config) for rate in model.rates 3584 ] 3585 matchers = [ 3586 self._create_component_from_model(model=matcher, config=config) 3587 for matcher in model.matchers 3588 ] 3589 return MovingWindowCallRatePolicy( 3590 rates=rates, 3591 matchers=matchers, 3592 ) 3593 3594 def create_unlimited_call_rate_policy( 3595 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 3596 ) -> UnlimitedCallRatePolicy: 3597 matchers = [ 3598 self._create_component_from_model(model=matcher, config=config) 3599 for matcher in model.matchers 3600 ] 3601 3602 return UnlimitedCallRatePolicy( 3603 matchers=matchers, 3604 ) 3605 3606 def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate: 3607 interpolated_limit = InterpolatedString.create(str(model.limit), parameters={}) 3608 return Rate( 3609 limit=int(interpolated_limit.eval(config=config)), 3610 interval=parse_duration(model.interval), 3611 ) 3612 3613 def create_http_request_matcher( 3614 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 3615 ) -> HttpRequestRegexMatcher: 3616 return HttpRequestRegexMatcher( 3617 method=model.method, 3618 url_base=model.url_base, 3619 url_path_pattern=model.url_path_pattern, 3620 params=model.params, 3621 headers=model.headers, 3622 ) 3623 3624 def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None: 3625 self._api_budget = self.create_component( 3626 model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config 3627 ) 3628 3629 def create_grouping_partition_router( 3630 self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any 3631 ) -> GroupingPartitionRouter: 3632 underlying_router = self._create_component_from_model( 3633 model=model.underlying_partition_router, config=config 3634 ) 3635 if model.group_size < 1: 3636 raise ValueError(f"Group size must be greater than 0, got {model.group_size}") 3637 3638 # Request options in underlying partition routers are not supported for GroupingPartitionRouter 3639 # because they are specific to individual partitions and cannot be aggregated or handled 3640 # when grouping, potentially leading to incorrect API calls. Any request customization 3641 # should be managed at the stream level through the requester's configuration. 3642 if isinstance(underlying_router, SubstreamPartitionRouter): 3643 if any( 3644 parent_config.request_option 3645 for parent_config in underlying_router.parent_stream_configs 3646 ): 3647 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 3648 3649 if isinstance(underlying_router, ListPartitionRouter): 3650 if underlying_router.request_option: 3651 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 3652 3653 return GroupingPartitionRouter( 3654 group_size=model.group_size, 3655 underlying_partition_router=underlying_router, 3656 deduplicate=model.deduplicate if model.deduplicate is not None else True, 3657 config=config, 3658 )
ComponentDefinition =
typing.Mapping[str, typing.Any]
SCHEMA_TRANSFORMER_TYPE_MAPPING =
{<SchemaNormalization.None_: 'None'>: <TransformConfig.NoTransform: 1>, <SchemaNormalization.Default: 'Default'>: <TransformConfig.DefaultSchemaNormalization: 2>}
class
ModelToComponentFactory:
560class ModelToComponentFactory: 561 EPOCH_DATETIME_FORMAT = "%s" 562 563 def __init__( 564 self, 565 limit_pages_fetched_per_slice: Optional[int] = None, 566 limit_slices_fetched: Optional[int] = None, 567 emit_connector_builder_messages: bool = False, 568 disable_retries: bool = False, 569 disable_cache: bool = False, 570 disable_resumable_full_refresh: bool = False, 571 message_repository: Optional[MessageRepository] = None, 572 connector_state_manager: Optional[ConnectorStateManager] = None, 573 max_concurrent_async_job_count: Optional[int] = None, 574 ): 575 self._init_mappings() 576 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 577 self._limit_slices_fetched = limit_slices_fetched 578 self._emit_connector_builder_messages = emit_connector_builder_messages 579 self._disable_retries = disable_retries 580 self._disable_cache = disable_cache 581 self._disable_resumable_full_refresh = disable_resumable_full_refresh 582 self._message_repository = message_repository or InMemoryMessageRepository( 583 self._evaluate_log_level(emit_connector_builder_messages) 584 ) 585 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 586 self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None 587 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 588 589 def _init_mappings(self) -> None: 590 self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = { 591 AddedFieldDefinitionModel: self.create_added_field_definition, 592 AddFieldsModel: self.create_add_fields, 593 ApiKeyAuthenticatorModel: self.create_api_key_authenticator, 594 BasicHttpAuthenticatorModel: self.create_basic_http_authenticator, 595 BearerAuthenticatorModel: self.create_bearer_authenticator, 596 CheckStreamModel: self.create_check_stream, 597 DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config, 598 CheckDynamicStreamModel: self.create_check_dynamic_stream, 599 CompositeErrorHandlerModel: self.create_composite_error_handler, 600 ConcurrencyLevelModel: self.create_concurrency_level, 601 ConstantBackoffStrategyModel: self.create_constant_backoff_strategy, 602 CsvDecoderModel: self.create_csv_decoder, 603 CursorPaginationModel: self.create_cursor_pagination, 604 CustomAuthenticatorModel: self.create_custom_component, 605 CustomBackoffStrategyModel: self.create_custom_component, 606 CustomDecoderModel: self.create_custom_component, 607 CustomErrorHandlerModel: self.create_custom_component, 608 CustomIncrementalSyncModel: self.create_custom_component, 609 CustomRecordExtractorModel: self.create_custom_component, 610 CustomRecordFilterModel: self.create_custom_component, 611 CustomRequesterModel: self.create_custom_component, 612 CustomRetrieverModel: self.create_custom_component, 613 CustomSchemaLoader: self.create_custom_component, 614 CustomSchemaNormalizationModel: self.create_custom_component, 615 CustomStateMigration: self.create_custom_component, 616 CustomPaginationStrategyModel: self.create_custom_component, 617 CustomPartitionRouterModel: self.create_custom_component, 618 CustomTransformationModel: self.create_custom_component, 619 DatetimeBasedCursorModel: self.create_datetime_based_cursor, 620 DeclarativeStreamModel: self.create_declarative_stream, 621 DefaultErrorHandlerModel: self.create_default_error_handler, 622 DefaultPaginatorModel: self.create_default_paginator, 623 DpathExtractorModel: self.create_dpath_extractor, 624 ResponseToFileExtractorModel: self.create_response_to_file_extractor, 625 ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy, 626 SessionTokenAuthenticatorModel: self.create_session_token_authenticator, 627 GroupByKeyMergeStrategyModel: self.create_group_by_key, 628 HttpRequesterModel: self.create_http_requester, 629 HttpResponseFilterModel: self.create_http_response_filter, 630 InlineSchemaLoaderModel: self.create_inline_schema_loader, 631 JsonDecoderModel: self.create_json_decoder, 632 JsonlDecoderModel: self.create_jsonl_decoder, 633 GzipDecoderModel: self.create_gzip_decoder, 634 KeysToLowerModel: self.create_keys_to_lower_transformation, 635 KeysToSnakeCaseModel: self.create_keys_to_snake_transformation, 636 KeysReplaceModel: self.create_keys_replace_transformation, 637 FlattenFieldsModel: self.create_flatten_fields, 638 DpathFlattenFieldsModel: self.create_dpath_flatten_fields, 639 IterableDecoderModel: self.create_iterable_decoder, 640 IncrementingCountCursorModel: self.create_incrementing_count_cursor, 641 XmlDecoderModel: self.create_xml_decoder, 642 JsonFileSchemaLoaderModel: self.create_json_file_schema_loader, 643 DynamicSchemaLoaderModel: self.create_dynamic_schema_loader, 644 SchemaTypeIdentifierModel: self.create_schema_type_identifier, 645 TypesMapModel: self.create_types_map, 646 ComplexFieldTypeModel: self.create_complex_field_type, 647 JwtAuthenticatorModel: self.create_jwt_authenticator, 648 LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration, 649 ListPartitionRouterModel: self.create_list_partition_router, 650 MinMaxDatetimeModel: self.create_min_max_datetime, 651 NoAuthModel: self.create_no_auth, 652 NoPaginationModel: self.create_no_pagination, 653 OAuthAuthenticatorModel: self.create_oauth_authenticator, 654 OffsetIncrementModel: self.create_offset_increment, 655 PageIncrementModel: self.create_page_increment, 656 ParentStreamConfigModel: self.create_parent_stream_config, 657 PropertiesFromEndpointModel: self.create_properties_from_endpoint, 658 PropertyChunkingModel: self.create_property_chunking, 659 QueryPropertiesModel: self.create_query_properties, 660 RecordFilterModel: self.create_record_filter, 661 RecordSelectorModel: self.create_record_selector, 662 RemoveFieldsModel: self.create_remove_fields, 663 RequestPathModel: self.create_request_path, 664 RequestOptionModel: self.create_request_option, 665 LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator, 666 SelectiveAuthenticatorModel: self.create_selective_authenticator, 667 SimpleRetrieverModel: self.create_simple_retriever, 668 StateDelegatingStreamModel: self.create_state_delegating_stream, 669 SpecModel: self.create_spec, 670 SubstreamPartitionRouterModel: self.create_substream_partition_router, 671 WaitTimeFromHeaderModel: self.create_wait_time_from_header, 672 WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header, 673 AsyncRetrieverModel: self.create_async_retriever, 674 HttpComponentsResolverModel: self.create_http_components_resolver, 675 ConfigComponentsResolverModel: self.create_config_components_resolver, 676 StreamConfigModel: self.create_stream_config, 677 ComponentMappingDefinitionModel: self.create_components_mapping_definition, 678 ZipfileDecoderModel: self.create_zipfile_decoder, 679 HTTPAPIBudgetModel: self.create_http_api_budget, 680 FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy, 681 MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy, 682 UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy, 683 RateModel: self.create_rate, 684 HttpRequestRegexMatcherModel: self.create_http_request_matcher, 685 GroupingPartitionRouterModel: self.create_grouping_partition_router, 686 } 687 688 # Needed for the case where we need to perform a second parse on the fields of a custom component 689 self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR} 690 691 def create_component( 692 self, 693 model_type: Type[BaseModel], 694 component_definition: ComponentDefinition, 695 config: Config, 696 **kwargs: Any, 697 ) -> Any: 698 """ 699 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 700 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 701 creating declarative components from that model. 702 703 :param model_type: The type of declarative component that is being initialized 704 :param component_definition: The mapping that represents a declarative component 705 :param config: The connector config that is provided by the customer 706 :return: The declarative component to be used at runtime 707 """ 708 709 component_type = component_definition.get("type") 710 if component_definition.get("type") != model_type.__name__: 711 raise ValueError( 712 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 713 ) 714 715 declarative_component_model = model_type.parse_obj(component_definition) 716 717 if not isinstance(declarative_component_model, model_type): 718 raise ValueError( 719 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 720 ) 721 722 return self._create_component_from_model( 723 model=declarative_component_model, config=config, **kwargs 724 ) 725 726 def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: 727 if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: 728 raise ValueError( 729 f"{model.__class__} with attributes {model} is not a valid component type" 730 ) 731 component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) 732 if not component_constructor: 733 raise ValueError(f"Could not find constructor for {model.__class__}") 734 return component_constructor(model=model, config=config, **kwargs) 735 736 @staticmethod 737 def create_added_field_definition( 738 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 739 ) -> AddedFieldDefinition: 740 interpolated_value = InterpolatedString.create( 741 model.value, parameters=model.parameters or {} 742 ) 743 return AddedFieldDefinition( 744 path=model.path, 745 value=interpolated_value, 746 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 747 parameters=model.parameters or {}, 748 ) 749 750 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 751 added_field_definitions = [ 752 self._create_component_from_model( 753 model=added_field_definition_model, 754 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 755 added_field_definition_model.value_type 756 ), 757 config=config, 758 ) 759 for added_field_definition_model in model.fields 760 ] 761 return AddFields( 762 fields=added_field_definitions, 763 condition=model.condition or "", 764 parameters=model.parameters or {}, 765 ) 766 767 def create_keys_to_lower_transformation( 768 self, model: KeysToLowerModel, config: Config, **kwargs: Any 769 ) -> KeysToLowerTransformation: 770 return KeysToLowerTransformation() 771 772 def create_keys_to_snake_transformation( 773 self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any 774 ) -> KeysToSnakeCaseTransformation: 775 return KeysToSnakeCaseTransformation() 776 777 def create_keys_replace_transformation( 778 self, model: KeysReplaceModel, config: Config, **kwargs: Any 779 ) -> KeysReplaceTransformation: 780 return KeysReplaceTransformation( 781 old=model.old, new=model.new, parameters=model.parameters or {} 782 ) 783 784 def create_flatten_fields( 785 self, model: FlattenFieldsModel, config: Config, **kwargs: Any 786 ) -> FlattenFields: 787 return FlattenFields( 788 flatten_lists=model.flatten_lists if model.flatten_lists is not None else True 789 ) 790 791 def create_dpath_flatten_fields( 792 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 793 ) -> DpathFlattenFields: 794 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 795 key_transformation = ( 796 KeyTransformation( 797 config=config, 798 prefix=model.key_transformation.prefix, 799 suffix=model.key_transformation.suffix, 800 parameters=model.parameters or {}, 801 ) 802 if model.key_transformation is not None 803 else None 804 ) 805 return DpathFlattenFields( 806 config=config, 807 field_path=model_field_path, 808 delete_origin_value=model.delete_origin_value 809 if model.delete_origin_value is not None 810 else False, 811 replace_record=model.replace_record if model.replace_record is not None else False, 812 key_transformation=key_transformation, 813 parameters=model.parameters or {}, 814 ) 815 816 @staticmethod 817 def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: 818 if not value_type: 819 return None 820 names_to_types = { 821 ValueType.string: str, 822 ValueType.number: float, 823 ValueType.integer: int, 824 ValueType.boolean: bool, 825 } 826 return names_to_types[value_type] 827 828 def create_api_key_authenticator( 829 self, 830 model: ApiKeyAuthenticatorModel, 831 config: Config, 832 token_provider: Optional[TokenProvider] = None, 833 **kwargs: Any, 834 ) -> ApiKeyAuthenticator: 835 if model.inject_into is None and model.header is None: 836 raise ValueError( 837 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 838 ) 839 840 if model.inject_into is not None and model.header is not None: 841 raise ValueError( 842 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 843 ) 844 845 if token_provider is not None and model.api_token != "": 846 raise ValueError( 847 "If token_provider is set, api_token is ignored and has to be set to empty string." 848 ) 849 850 request_option = ( 851 self._create_component_from_model( 852 model.inject_into, config, parameters=model.parameters or {} 853 ) 854 if model.inject_into 855 else RequestOption( 856 inject_into=RequestOptionType.header, 857 field_name=model.header or "", 858 parameters=model.parameters or {}, 859 ) 860 ) 861 862 return ApiKeyAuthenticator( 863 token_provider=( 864 token_provider 865 if token_provider is not None 866 else InterpolatedStringTokenProvider( 867 api_token=model.api_token or "", 868 config=config, 869 parameters=model.parameters or {}, 870 ) 871 ), 872 request_option=request_option, 873 config=config, 874 parameters=model.parameters or {}, 875 ) 876 877 def create_legacy_to_per_partition_state_migration( 878 self, 879 model: LegacyToPerPartitionStateMigrationModel, 880 config: Mapping[str, Any], 881 declarative_stream: DeclarativeStreamModel, 882 ) -> LegacyToPerPartitionStateMigration: 883 retriever = declarative_stream.retriever 884 if not isinstance(retriever, SimpleRetrieverModel): 885 raise ValueError( 886 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}" 887 ) 888 partition_router = retriever.partition_router 889 if not isinstance( 890 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 891 ): 892 raise ValueError( 893 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 894 ) 895 if not hasattr(partition_router, "parent_stream_configs"): 896 raise ValueError( 897 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 898 ) 899 900 if not hasattr(declarative_stream, "incremental_sync"): 901 raise ValueError( 902 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 903 ) 904 905 return LegacyToPerPartitionStateMigration( 906 partition_router, # type: ignore # was already checked above 907 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 908 config, 909 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 910 ) 911 912 def create_session_token_authenticator( 913 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 914 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 915 decoder = ( 916 self._create_component_from_model(model=model.decoder, config=config) 917 if model.decoder 918 else JsonDecoder(parameters={}) 919 ) 920 login_requester = self._create_component_from_model( 921 model=model.login_requester, 922 config=config, 923 name=f"{name}_login_requester", 924 decoder=decoder, 925 ) 926 token_provider = SessionTokenProvider( 927 login_requester=login_requester, 928 session_token_path=model.session_token_path, 929 expiration_duration=parse_duration(model.expiration_duration) 930 if model.expiration_duration 931 else None, 932 parameters=model.parameters or {}, 933 message_repository=self._message_repository, 934 decoder=decoder, 935 ) 936 if model.request_authentication.type == "Bearer": 937 return ModelToComponentFactory.create_bearer_authenticator( 938 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 939 config, 940 token_provider=token_provider, 941 ) 942 else: 943 return self.create_api_key_authenticator( 944 ApiKeyAuthenticatorModel( 945 type="ApiKeyAuthenticator", 946 api_token="", 947 inject_into=model.request_authentication.inject_into, 948 ), # type: ignore # $parameters and headers default to None 949 config=config, 950 token_provider=token_provider, 951 ) 952 953 @staticmethod 954 def create_basic_http_authenticator( 955 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 956 ) -> BasicHttpAuthenticator: 957 return BasicHttpAuthenticator( 958 password=model.password or "", 959 username=model.username, 960 config=config, 961 parameters=model.parameters or {}, 962 ) 963 964 @staticmethod 965 def create_bearer_authenticator( 966 model: BearerAuthenticatorModel, 967 config: Config, 968 token_provider: Optional[TokenProvider] = None, 969 **kwargs: Any, 970 ) -> BearerAuthenticator: 971 if token_provider is not None and model.api_token != "": 972 raise ValueError( 973 "If token_provider is set, api_token is ignored and has to be set to empty string." 974 ) 975 return BearerAuthenticator( 976 token_provider=( 977 token_provider 978 if token_provider is not None 979 else InterpolatedStringTokenProvider( 980 api_token=model.api_token or "", 981 config=config, 982 parameters=model.parameters or {}, 983 ) 984 ), 985 config=config, 986 parameters=model.parameters or {}, 987 ) 988 989 @staticmethod 990 def create_dynamic_stream_check_config( 991 model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any 992 ) -> DynamicStreamCheckConfig: 993 return DynamicStreamCheckConfig( 994 dynamic_stream_name=model.dynamic_stream_name, 995 stream_count=model.stream_count or 0, 996 ) 997 998 def create_check_stream( 999 self, model: CheckStreamModel, config: Config, **kwargs: Any 1000 ) -> CheckStream: 1001 if model.dynamic_streams_check_configs is None and model.stream_names is None: 1002 raise ValueError( 1003 "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream" 1004 ) 1005 1006 dynamic_streams_check_configs = ( 1007 [ 1008 self._create_component_from_model(model=dynamic_stream_check_config, config=config) 1009 for dynamic_stream_check_config in model.dynamic_streams_check_configs 1010 ] 1011 if model.dynamic_streams_check_configs 1012 else [] 1013 ) 1014 1015 return CheckStream( 1016 stream_names=model.stream_names or [], 1017 dynamic_streams_check_configs=dynamic_streams_check_configs, 1018 parameters={}, 1019 ) 1020 1021 @staticmethod 1022 def create_check_dynamic_stream( 1023 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 1024 ) -> CheckDynamicStream: 1025 assert model.use_check_availability is not None # for mypy 1026 1027 use_check_availability = model.use_check_availability 1028 1029 return CheckDynamicStream( 1030 stream_count=model.stream_count, 1031 use_check_availability=use_check_availability, 1032 parameters={}, 1033 ) 1034 1035 def create_composite_error_handler( 1036 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 1037 ) -> CompositeErrorHandler: 1038 error_handlers = [ 1039 self._create_component_from_model(model=error_handler_model, config=config) 1040 for error_handler_model in model.error_handlers 1041 ] 1042 return CompositeErrorHandler( 1043 error_handlers=error_handlers, parameters=model.parameters or {} 1044 ) 1045 1046 @staticmethod 1047 def create_concurrency_level( 1048 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 1049 ) -> ConcurrencyLevel: 1050 return ConcurrencyLevel( 1051 default_concurrency=model.default_concurrency, 1052 max_concurrency=model.max_concurrency, 1053 config=config, 1054 parameters={}, 1055 ) 1056 1057 @staticmethod 1058 def apply_stream_state_migrations( 1059 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 1060 ) -> MutableMapping[str, Any]: 1061 if stream_state_migrations: 1062 for state_migration in stream_state_migrations: 1063 if state_migration.should_migrate(stream_state): 1064 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 1065 stream_state = dict(state_migration.migrate(stream_state)) 1066 return stream_state 1067 1068 def create_concurrent_cursor_from_datetime_based_cursor( 1069 self, 1070 model_type: Type[BaseModel], 1071 component_definition: ComponentDefinition, 1072 stream_name: str, 1073 stream_namespace: Optional[str], 1074 config: Config, 1075 message_repository: Optional[MessageRepository] = None, 1076 runtime_lookback_window: Optional[datetime.timedelta] = None, 1077 stream_state_migrations: Optional[List[Any]] = None, 1078 **kwargs: Any, 1079 ) -> ConcurrentCursor: 1080 # Per-partition incremental streams can dynamically create child cursors which will pass their current 1081 # state via the stream_state keyword argument. Incremental syncs without parent streams use the 1082 # incoming state and connector_state_manager that is initialized when the component factory is created 1083 stream_state = ( 1084 self._connector_state_manager.get_stream_state(stream_name, stream_namespace) 1085 if "stream_state" not in kwargs 1086 else kwargs["stream_state"] 1087 ) 1088 stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state) 1089 1090 component_type = component_definition.get("type") 1091 if component_definition.get("type") != model_type.__name__: 1092 raise ValueError( 1093 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1094 ) 1095 1096 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1097 1098 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1099 raise ValueError( 1100 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1101 ) 1102 1103 interpolated_cursor_field = InterpolatedString.create( 1104 datetime_based_cursor_model.cursor_field, 1105 parameters=datetime_based_cursor_model.parameters or {}, 1106 ) 1107 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1108 1109 interpolated_partition_field_start = InterpolatedString.create( 1110 datetime_based_cursor_model.partition_field_start or "start_time", 1111 parameters=datetime_based_cursor_model.parameters or {}, 1112 ) 1113 interpolated_partition_field_end = InterpolatedString.create( 1114 datetime_based_cursor_model.partition_field_end or "end_time", 1115 parameters=datetime_based_cursor_model.parameters or {}, 1116 ) 1117 1118 slice_boundary_fields = ( 1119 interpolated_partition_field_start.eval(config=config), 1120 interpolated_partition_field_end.eval(config=config), 1121 ) 1122 1123 datetime_format = datetime_based_cursor_model.datetime_format 1124 1125 cursor_granularity = ( 1126 parse_duration(datetime_based_cursor_model.cursor_granularity) 1127 if datetime_based_cursor_model.cursor_granularity 1128 else None 1129 ) 1130 1131 lookback_window = None 1132 interpolated_lookback_window = ( 1133 InterpolatedString.create( 1134 datetime_based_cursor_model.lookback_window, 1135 parameters=datetime_based_cursor_model.parameters or {}, 1136 ) 1137 if datetime_based_cursor_model.lookback_window 1138 else None 1139 ) 1140 if interpolated_lookback_window: 1141 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1142 if evaluated_lookback_window: 1143 lookback_window = parse_duration(evaluated_lookback_window) 1144 1145 connector_state_converter: DateTimeStreamStateConverter 1146 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1147 datetime_format=datetime_format, 1148 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1149 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1150 cursor_granularity=cursor_granularity, 1151 ) 1152 1153 # Adjusts the stream state by applying the runtime lookback window. 1154 # This is used to ensure correct state handling in case of failed partitions. 1155 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1156 if runtime_lookback_window and stream_state_value: 1157 new_stream_state = ( 1158 connector_state_converter.parse_timestamp(stream_state_value) 1159 - runtime_lookback_window 1160 ) 1161 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1162 new_stream_state 1163 ) 1164 1165 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1166 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1167 start_date_runtime_value = self.create_min_max_datetime( 1168 model=datetime_based_cursor_model.start_datetime, config=config 1169 ) 1170 else: 1171 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1172 1173 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1174 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1175 end_date_runtime_value = self.create_min_max_datetime( 1176 model=datetime_based_cursor_model.end_datetime, config=config 1177 ) 1178 else: 1179 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1180 1181 interpolated_start_date = MinMaxDatetime.create( 1182 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1183 parameters=datetime_based_cursor_model.parameters, 1184 ) 1185 interpolated_end_date = ( 1186 None 1187 if not end_date_runtime_value 1188 else MinMaxDatetime.create( 1189 end_date_runtime_value, datetime_based_cursor_model.parameters 1190 ) 1191 ) 1192 1193 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1194 if not interpolated_start_date.datetime_format: 1195 interpolated_start_date.datetime_format = datetime_format 1196 if interpolated_end_date and not interpolated_end_date.datetime_format: 1197 interpolated_end_date.datetime_format = datetime_format 1198 1199 start_date = interpolated_start_date.get_datetime(config=config) 1200 end_date_provider = ( 1201 partial(interpolated_end_date.get_datetime, config) 1202 if interpolated_end_date 1203 else connector_state_converter.get_end_provider() 1204 ) 1205 1206 if ( 1207 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1208 ) or ( 1209 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1210 ): 1211 raise ValueError( 1212 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1213 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1214 ) 1215 1216 # When step is not defined, default to a step size from the starting date to the present moment 1217 step_length = datetime.timedelta.max 1218 interpolated_step = ( 1219 InterpolatedString.create( 1220 datetime_based_cursor_model.step, 1221 parameters=datetime_based_cursor_model.parameters or {}, 1222 ) 1223 if datetime_based_cursor_model.step 1224 else None 1225 ) 1226 if interpolated_step: 1227 evaluated_step = interpolated_step.eval(config) 1228 if evaluated_step: 1229 step_length = parse_duration(evaluated_step) 1230 1231 clamping_strategy: ClampingStrategy = NoClamping() 1232 if datetime_based_cursor_model.clamping: 1233 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1234 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1235 # object which we want to keep agnostic of being low-code 1236 target = InterpolatedString( 1237 string=datetime_based_cursor_model.clamping.target, 1238 parameters=datetime_based_cursor_model.parameters or {}, 1239 ) 1240 evaluated_target = target.eval(config=config) 1241 match evaluated_target: 1242 case "DAY": 1243 clamping_strategy = DayClampingStrategy() 1244 end_date_provider = ClampingEndProvider( 1245 DayClampingStrategy(is_ceiling=False), 1246 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1247 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1248 ) 1249 case "WEEK": 1250 if ( 1251 not datetime_based_cursor_model.clamping.target_details 1252 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1253 ): 1254 raise ValueError( 1255 "Given WEEK clamping, weekday needs to be provided as target_details" 1256 ) 1257 weekday = self._assemble_weekday( 1258 datetime_based_cursor_model.clamping.target_details["weekday"] 1259 ) 1260 clamping_strategy = WeekClampingStrategy(weekday) 1261 end_date_provider = ClampingEndProvider( 1262 WeekClampingStrategy(weekday, is_ceiling=False), 1263 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1264 granularity=cursor_granularity or datetime.timedelta(days=1), 1265 ) 1266 case "MONTH": 1267 clamping_strategy = MonthClampingStrategy() 1268 end_date_provider = ClampingEndProvider( 1269 MonthClampingStrategy(is_ceiling=False), 1270 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1271 granularity=cursor_granularity or datetime.timedelta(days=1), 1272 ) 1273 case _: 1274 raise ValueError( 1275 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1276 ) 1277 1278 return ConcurrentCursor( 1279 stream_name=stream_name, 1280 stream_namespace=stream_namespace, 1281 stream_state=stream_state, 1282 message_repository=message_repository or self._message_repository, 1283 connector_state_manager=self._connector_state_manager, 1284 connector_state_converter=connector_state_converter, 1285 cursor_field=cursor_field, 1286 slice_boundary_fields=slice_boundary_fields, 1287 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1288 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1289 lookback_window=lookback_window, 1290 slice_range=step_length, 1291 cursor_granularity=cursor_granularity, 1292 clamping_strategy=clamping_strategy, 1293 ) 1294 1295 def create_concurrent_cursor_from_incrementing_count_cursor( 1296 self, 1297 model_type: Type[BaseModel], 1298 component_definition: ComponentDefinition, 1299 stream_name: str, 1300 stream_namespace: Optional[str], 1301 config: Config, 1302 message_repository: Optional[MessageRepository] = None, 1303 **kwargs: Any, 1304 ) -> ConcurrentCursor: 1305 # Per-partition incremental streams can dynamically create child cursors which will pass their current 1306 # state via the stream_state keyword argument. Incremental syncs without parent streams use the 1307 # incoming state and connector_state_manager that is initialized when the component factory is created 1308 stream_state = ( 1309 self._connector_state_manager.get_stream_state(stream_name, stream_namespace) 1310 if "stream_state" not in kwargs 1311 else kwargs["stream_state"] 1312 ) 1313 1314 component_type = component_definition.get("type") 1315 if component_definition.get("type") != model_type.__name__: 1316 raise ValueError( 1317 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1318 ) 1319 1320 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1321 1322 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1323 raise ValueError( 1324 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1325 ) 1326 1327 interpolated_start_value = ( 1328 InterpolatedString.create( 1329 incrementing_count_cursor_model.start_value, # type: ignore 1330 parameters=incrementing_count_cursor_model.parameters or {}, 1331 ) 1332 if incrementing_count_cursor_model.start_value 1333 else 0 1334 ) 1335 1336 interpolated_cursor_field = InterpolatedString.create( 1337 incrementing_count_cursor_model.cursor_field, 1338 parameters=incrementing_count_cursor_model.parameters or {}, 1339 ) 1340 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1341 1342 connector_state_converter = IncrementingCountStreamStateConverter( 1343 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1344 ) 1345 1346 return ConcurrentCursor( 1347 stream_name=stream_name, 1348 stream_namespace=stream_namespace, 1349 stream_state=stream_state, 1350 message_repository=message_repository or self._message_repository, 1351 connector_state_manager=self._connector_state_manager, 1352 connector_state_converter=connector_state_converter, 1353 cursor_field=cursor_field, 1354 slice_boundary_fields=None, 1355 start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1356 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1357 ) 1358 1359 def _assemble_weekday(self, weekday: str) -> Weekday: 1360 match weekday: 1361 case "MONDAY": 1362 return Weekday.MONDAY 1363 case "TUESDAY": 1364 return Weekday.TUESDAY 1365 case "WEDNESDAY": 1366 return Weekday.WEDNESDAY 1367 case "THURSDAY": 1368 return Weekday.THURSDAY 1369 case "FRIDAY": 1370 return Weekday.FRIDAY 1371 case "SATURDAY": 1372 return Weekday.SATURDAY 1373 case "SUNDAY": 1374 return Weekday.SUNDAY 1375 case _: 1376 raise ValueError(f"Unknown weekday {weekday}") 1377 1378 def create_concurrent_cursor_from_perpartition_cursor( 1379 self, 1380 state_manager: ConnectorStateManager, 1381 model_type: Type[BaseModel], 1382 component_definition: ComponentDefinition, 1383 stream_name: str, 1384 stream_namespace: Optional[str], 1385 config: Config, 1386 stream_state: MutableMapping[str, Any], 1387 partition_router: PartitionRouter, 1388 stream_state_migrations: Optional[List[Any]] = None, 1389 **kwargs: Any, 1390 ) -> ConcurrentPerPartitionCursor: 1391 component_type = component_definition.get("type") 1392 if component_definition.get("type") != model_type.__name__: 1393 raise ValueError( 1394 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1395 ) 1396 1397 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1398 1399 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1400 raise ValueError( 1401 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1402 ) 1403 1404 interpolated_cursor_field = InterpolatedString.create( 1405 datetime_based_cursor_model.cursor_field, 1406 parameters=datetime_based_cursor_model.parameters or {}, 1407 ) 1408 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1409 1410 datetime_format = datetime_based_cursor_model.datetime_format 1411 1412 cursor_granularity = ( 1413 parse_duration(datetime_based_cursor_model.cursor_granularity) 1414 if datetime_based_cursor_model.cursor_granularity 1415 else None 1416 ) 1417 1418 connector_state_converter: DateTimeStreamStateConverter 1419 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1420 datetime_format=datetime_format, 1421 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1422 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1423 cursor_granularity=cursor_granularity, 1424 ) 1425 1426 # Create the cursor factory 1427 cursor_factory = ConcurrentCursorFactory( 1428 partial( 1429 self.create_concurrent_cursor_from_datetime_based_cursor, 1430 state_manager=state_manager, 1431 model_type=model_type, 1432 component_definition=component_definition, 1433 stream_name=stream_name, 1434 stream_namespace=stream_namespace, 1435 config=config, 1436 message_repository=NoopMessageRepository(), 1437 stream_state_migrations=stream_state_migrations, 1438 ) 1439 ) 1440 stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state) 1441 1442 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state 1443 use_global_cursor = isinstance( 1444 partition_router, GroupingPartitionRouter 1445 ) or component_definition.get("global_substream_cursor", False) 1446 1447 # Return the concurrent cursor and state converter 1448 return ConcurrentPerPartitionCursor( 1449 cursor_factory=cursor_factory, 1450 partition_router=partition_router, 1451 stream_name=stream_name, 1452 stream_namespace=stream_namespace, 1453 stream_state=stream_state, 1454 message_repository=self._message_repository, # type: ignore 1455 connector_state_manager=state_manager, 1456 connector_state_converter=connector_state_converter, 1457 cursor_field=cursor_field, 1458 use_global_cursor=use_global_cursor, 1459 ) 1460 1461 @staticmethod 1462 def create_constant_backoff_strategy( 1463 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1464 ) -> ConstantBackoffStrategy: 1465 return ConstantBackoffStrategy( 1466 backoff_time_in_seconds=model.backoff_time_in_seconds, 1467 config=config, 1468 parameters=model.parameters or {}, 1469 ) 1470 1471 def create_cursor_pagination( 1472 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1473 ) -> CursorPaginationStrategy: 1474 if isinstance(decoder, PaginationDecoderDecorator): 1475 inner_decoder = decoder.decoder 1476 else: 1477 inner_decoder = decoder 1478 decoder = PaginationDecoderDecorator(decoder=decoder) 1479 1480 if self._is_supported_decoder_for_pagination(inner_decoder): 1481 decoder_to_use = decoder 1482 else: 1483 raise ValueError( 1484 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1485 ) 1486 1487 return CursorPaginationStrategy( 1488 cursor_value=model.cursor_value, 1489 decoder=decoder_to_use, 1490 page_size=model.page_size, 1491 stop_condition=model.stop_condition, 1492 config=config, 1493 parameters=model.parameters or {}, 1494 ) 1495 1496 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1497 """ 1498 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1499 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1500 :param model: The Pydantic model of the custom component being created 1501 :param config: The custom defined connector config 1502 :return: The declarative component built from the Pydantic model to be used at runtime 1503 """ 1504 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1505 component_fields = get_type_hints(custom_component_class) 1506 model_args = model.dict() 1507 model_args["config"] = config 1508 1509 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1510 # we defer to these arguments over the component's definition 1511 for key, arg in kwargs.items(): 1512 model_args[key] = arg 1513 1514 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1515 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1516 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1517 for model_field, model_value in model_args.items(): 1518 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1519 if ( 1520 isinstance(model_value, dict) 1521 and "type" not in model_value 1522 and model_field in component_fields 1523 ): 1524 derived_type = self._derive_component_type_from_type_hints( 1525 component_fields.get(model_field) 1526 ) 1527 if derived_type: 1528 model_value["type"] = derived_type 1529 1530 if self._is_component(model_value): 1531 model_args[model_field] = self._create_nested_component( 1532 model, model_field, model_value, config 1533 ) 1534 elif isinstance(model_value, list): 1535 vals = [] 1536 for v in model_value: 1537 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1538 derived_type = self._derive_component_type_from_type_hints( 1539 component_fields.get(model_field) 1540 ) 1541 if derived_type: 1542 v["type"] = derived_type 1543 if self._is_component(v): 1544 vals.append(self._create_nested_component(model, model_field, v, config)) 1545 else: 1546 vals.append(v) 1547 model_args[model_field] = vals 1548 1549 kwargs = { 1550 class_field: model_args[class_field] 1551 for class_field in component_fields.keys() 1552 if class_field in model_args 1553 } 1554 return custom_component_class(**kwargs) 1555 1556 @staticmethod 1557 def _get_class_from_fully_qualified_class_name( 1558 full_qualified_class_name: str, 1559 ) -> Any: 1560 """Get a class from its fully qualified name. 1561 1562 If a custom components module is needed, we assume it is already registered - probably 1563 as `source_declarative_manifest.components` or `components`. 1564 1565 Args: 1566 full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). 1567 1568 Returns: 1569 Any: The class object. 1570 1571 Raises: 1572 ValueError: If the class cannot be loaded. 1573 """ 1574 split = full_qualified_class_name.split(".") 1575 module_name_full = ".".join(split[:-1]) 1576 class_name = split[-1] 1577 1578 try: 1579 module_ref = importlib.import_module(module_name_full) 1580 except ModuleNotFoundError as e: 1581 if split[0] == "source_declarative_manifest": 1582 # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append 1583 try: 1584 import os 1585 1586 module_name_with_source_declarative_manifest = ".".join(split[1:-1]) 1587 module_ref = importlib.import_module( 1588 module_name_with_source_declarative_manifest 1589 ) 1590 except ModuleNotFoundError: 1591 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1592 else: 1593 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1594 1595 try: 1596 return getattr(module_ref, class_name) 1597 except AttributeError as e: 1598 raise ValueError( 1599 f"Could not load class `{class_name}` from module `{module_name_full}`.", 1600 ) from e 1601 1602 @staticmethod 1603 def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: 1604 interface = field_type 1605 while True: 1606 origin = get_origin(interface) 1607 if origin: 1608 # Unnest types until we reach the raw type 1609 # List[T] -> T 1610 # Optional[List[T]] -> T 1611 args = get_args(interface) 1612 interface = args[0] 1613 else: 1614 break 1615 if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface): 1616 return interface.__name__ 1617 return None 1618 1619 @staticmethod 1620 def is_builtin_type(cls: Optional[Type[Any]]) -> bool: 1621 if not cls: 1622 return False 1623 return cls.__module__ == "builtins" 1624 1625 @staticmethod 1626 def _extract_missing_parameters(error: TypeError) -> List[str]: 1627 parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error)) 1628 if parameter_search: 1629 return re.findall(r"\'(.+?)\'", parameter_search.group(1)) 1630 else: 1631 return [] 1632 1633 def _create_nested_component( 1634 self, model: Any, model_field: str, model_value: Any, config: Config 1635 ) -> Any: 1636 type_name = model_value.get("type", None) 1637 if not type_name: 1638 # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent 1639 return model_value 1640 1641 model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None) 1642 if model_type: 1643 parsed_model = model_type.parse_obj(model_value) 1644 try: 1645 # To improve usability of the language, certain fields are shared between components. This can come in the form of 1646 # a parent component passing some of its fields to a child component or the parent extracting fields from other child 1647 # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base 1648 # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created 1649 # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that 1650 # are needed by a component and could not be shared. 1651 model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__) 1652 constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs 1653 model_parameters = model_value.get("$parameters", {}) 1654 matching_parameters = { 1655 kwarg: model_parameters[kwarg] 1656 for kwarg in constructor_kwargs 1657 if kwarg in model_parameters 1658 } 1659 return self._create_component_from_model( 1660 model=parsed_model, config=config, **matching_parameters 1661 ) 1662 except TypeError as error: 1663 missing_parameters = self._extract_missing_parameters(error) 1664 if missing_parameters: 1665 raise ValueError( 1666 f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " 1667 + ", ".join( 1668 ( 1669 f"{type_name}.$parameters.{parameter}" 1670 for parameter in missing_parameters 1671 ) 1672 ) 1673 ) 1674 raise TypeError( 1675 f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" 1676 ) 1677 else: 1678 raise ValueError( 1679 f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" 1680 ) 1681 1682 @staticmethod 1683 def _is_component(model_value: Any) -> bool: 1684 return isinstance(model_value, dict) and model_value.get("type") is not None 1685 1686 def create_datetime_based_cursor( 1687 self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any 1688 ) -> DatetimeBasedCursor: 1689 start_datetime: Union[str, MinMaxDatetime] = ( 1690 model.start_datetime 1691 if isinstance(model.start_datetime, str) 1692 else self.create_min_max_datetime(model.start_datetime, config) 1693 ) 1694 end_datetime: Union[str, MinMaxDatetime, None] = None 1695 if model.is_data_feed and model.end_datetime: 1696 raise ValueError("Data feed does not support end_datetime") 1697 if model.is_data_feed and model.is_client_side_incremental: 1698 raise ValueError( 1699 "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them." 1700 ) 1701 if model.end_datetime: 1702 end_datetime = ( 1703 model.end_datetime 1704 if isinstance(model.end_datetime, str) 1705 else self.create_min_max_datetime(model.end_datetime, config) 1706 ) 1707 1708 end_time_option = ( 1709 self._create_component_from_model( 1710 model.end_time_option, config, parameters=model.parameters or {} 1711 ) 1712 if model.end_time_option 1713 else None 1714 ) 1715 start_time_option = ( 1716 self._create_component_from_model( 1717 model.start_time_option, config, parameters=model.parameters or {} 1718 ) 1719 if model.start_time_option 1720 else None 1721 ) 1722 1723 return DatetimeBasedCursor( 1724 cursor_field=model.cursor_field, 1725 cursor_datetime_formats=model.cursor_datetime_formats 1726 if model.cursor_datetime_formats 1727 else [], 1728 cursor_granularity=model.cursor_granularity, 1729 datetime_format=model.datetime_format, 1730 end_datetime=end_datetime, 1731 start_datetime=start_datetime, 1732 step=model.step, 1733 end_time_option=end_time_option, 1734 lookback_window=model.lookback_window, 1735 start_time_option=start_time_option, 1736 partition_field_end=model.partition_field_end, 1737 partition_field_start=model.partition_field_start, 1738 message_repository=self._message_repository, 1739 is_compare_strictly=model.is_compare_strictly, 1740 config=config, 1741 parameters=model.parameters or {}, 1742 ) 1743 1744 def create_declarative_stream( 1745 self, model: DeclarativeStreamModel, config: Config, **kwargs: Any 1746 ) -> DeclarativeStream: 1747 # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field 1748 # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the 1749 # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in 1750 # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one. 1751 combined_slicers = self._merge_stream_slicers(model=model, config=config) 1752 1753 primary_key = model.primary_key.__root__ if model.primary_key else None 1754 stop_condition_on_cursor = ( 1755 model.incremental_sync 1756 and hasattr(model.incremental_sync, "is_data_feed") 1757 and model.incremental_sync.is_data_feed 1758 ) 1759 client_side_incremental_sync = None 1760 if ( 1761 model.incremental_sync 1762 and hasattr(model.incremental_sync, "is_client_side_incremental") 1763 and model.incremental_sync.is_client_side_incremental 1764 ): 1765 supported_slicers = ( 1766 DatetimeBasedCursor, 1767 GlobalSubstreamCursor, 1768 PerPartitionWithGlobalCursor, 1769 ) 1770 if combined_slicers and not isinstance(combined_slicers, supported_slicers): 1771 raise ValueError( 1772 "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead" 1773 ) 1774 cursor = ( 1775 combined_slicers 1776 if isinstance( 1777 combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor) 1778 ) 1779 else self._create_component_from_model(model=model.incremental_sync, config=config) 1780 ) 1781 1782 client_side_incremental_sync = {"cursor": cursor} 1783 1784 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1785 cursor_model = model.incremental_sync 1786 1787 end_time_option = ( 1788 self._create_component_from_model( 1789 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 1790 ) 1791 if cursor_model.end_time_option 1792 else None 1793 ) 1794 start_time_option = ( 1795 self._create_component_from_model( 1796 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 1797 ) 1798 if cursor_model.start_time_option 1799 else None 1800 ) 1801 1802 request_options_provider = DatetimeBasedRequestOptionsProvider( 1803 start_time_option=start_time_option, 1804 end_time_option=end_time_option, 1805 partition_field_start=cursor_model.partition_field_end, 1806 partition_field_end=cursor_model.partition_field_end, 1807 config=config, 1808 parameters=model.parameters or {}, 1809 ) 1810 elif model.incremental_sync and isinstance( 1811 model.incremental_sync, IncrementingCountCursorModel 1812 ): 1813 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 1814 1815 start_time_option = ( 1816 self._create_component_from_model( 1817 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 1818 config, 1819 parameters=cursor_model.parameters or {}, 1820 ) 1821 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 1822 else None 1823 ) 1824 1825 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 1826 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 1827 partition_field_start = "start" 1828 1829 request_options_provider = DatetimeBasedRequestOptionsProvider( 1830 start_time_option=start_time_option, 1831 partition_field_start=partition_field_start, 1832 config=config, 1833 parameters=model.parameters or {}, 1834 ) 1835 else: 1836 request_options_provider = None 1837 1838 transformations = [] 1839 if model.transformations: 1840 for transformation_model in model.transformations: 1841 transformations.append( 1842 self._create_component_from_model(model=transformation_model, config=config) 1843 ) 1844 1845 retriever = self._create_component_from_model( 1846 model=model.retriever, 1847 config=config, 1848 name=model.name, 1849 primary_key=primary_key, 1850 stream_slicer=combined_slicers, 1851 request_options_provider=request_options_provider, 1852 stop_condition_on_cursor=stop_condition_on_cursor, 1853 client_side_incremental_sync=client_side_incremental_sync, 1854 transformations=transformations, 1855 incremental_sync=model.incremental_sync, 1856 ) 1857 cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None 1858 1859 if model.state_migrations: 1860 state_transformations = [ 1861 self._create_component_from_model(state_migration, config, declarative_stream=model) 1862 for state_migration in model.state_migrations 1863 ] 1864 else: 1865 state_transformations = [] 1866 1867 if model.schema_loader: 1868 schema_loader = self._create_component_from_model( 1869 model=model.schema_loader, config=config 1870 ) 1871 else: 1872 options = model.parameters or {} 1873 if "name" not in options: 1874 options["name"] = model.name 1875 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 1876 1877 return DeclarativeStream( 1878 name=model.name or "", 1879 primary_key=primary_key, 1880 retriever=retriever, 1881 schema_loader=schema_loader, 1882 stream_cursor_field=cursor_field or "", 1883 state_migrations=state_transformations, 1884 config=config, 1885 parameters=model.parameters or {}, 1886 ) 1887 1888 def _build_stream_slicer_from_partition_router( 1889 self, 1890 model: Union[ 1891 AsyncRetrieverModel, 1892 CustomRetrieverModel, 1893 SimpleRetrieverModel, 1894 ], 1895 config: Config, 1896 stream_name: Optional[str] = None, 1897 ) -> Optional[PartitionRouter]: 1898 if ( 1899 hasattr(model, "partition_router") 1900 and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel) 1901 and model.partition_router 1902 ): 1903 stream_slicer_model = model.partition_router 1904 if isinstance(stream_slicer_model, list): 1905 return CartesianProductStreamSlicer( 1906 [ 1907 self._create_component_from_model( 1908 model=slicer, config=config, stream_name=stream_name or "" 1909 ) 1910 for slicer in stream_slicer_model 1911 ], 1912 parameters={}, 1913 ) 1914 else: 1915 return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router 1916 model=stream_slicer_model, config=config, stream_name=stream_name or "" 1917 ) 1918 return None 1919 1920 def _build_incremental_cursor( 1921 self, 1922 model: DeclarativeStreamModel, 1923 stream_slicer: Optional[PartitionRouter], 1924 config: Config, 1925 ) -> Optional[StreamSlicer]: 1926 if model.incremental_sync and stream_slicer: 1927 if model.retriever.type == "AsyncRetriever": 1928 return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 1929 state_manager=self._connector_state_manager, 1930 model_type=DatetimeBasedCursorModel, 1931 component_definition=model.incremental_sync.__dict__, 1932 stream_name=model.name or "", 1933 stream_namespace=None, 1934 config=config or {}, 1935 stream_state={}, 1936 partition_router=stream_slicer, 1937 ) 1938 1939 incremental_sync_model = model.incremental_sync 1940 cursor_component = self._create_component_from_model( 1941 model=incremental_sync_model, config=config 1942 ) 1943 is_global_cursor = ( 1944 hasattr(incremental_sync_model, "global_substream_cursor") 1945 and incremental_sync_model.global_substream_cursor 1946 ) 1947 1948 if is_global_cursor: 1949 return GlobalSubstreamCursor( 1950 stream_cursor=cursor_component, partition_router=stream_slicer 1951 ) 1952 return PerPartitionWithGlobalCursor( 1953 cursor_factory=CursorFactory( 1954 lambda: self._create_component_from_model( 1955 model=incremental_sync_model, config=config 1956 ), 1957 ), 1958 partition_router=stream_slicer, 1959 stream_cursor=cursor_component, 1960 ) 1961 elif model.incremental_sync: 1962 if model.retriever.type == "AsyncRetriever": 1963 return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 1964 model_type=DatetimeBasedCursorModel, 1965 component_definition=model.incremental_sync.__dict__, 1966 stream_name=model.name or "", 1967 stream_namespace=None, 1968 config=config or {}, 1969 stream_state_migrations=model.state_migrations, 1970 ) 1971 return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync 1972 return None 1973 1974 def _build_resumable_cursor( 1975 self, 1976 model: Union[ 1977 AsyncRetrieverModel, 1978 CustomRetrieverModel, 1979 SimpleRetrieverModel, 1980 ], 1981 stream_slicer: Optional[PartitionRouter], 1982 ) -> Optional[StreamSlicer]: 1983 if hasattr(model, "paginator") and model.paginator and not stream_slicer: 1984 # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor` 1985 return ResumableFullRefreshCursor(parameters={}) 1986 elif stream_slicer: 1987 # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor` 1988 return PerPartitionCursor( 1989 cursor_factory=CursorFactory( 1990 create_function=partial(ChildPartitionResumableFullRefreshCursor, {}) 1991 ), 1992 partition_router=stream_slicer, 1993 ) 1994 return None 1995 1996 def _merge_stream_slicers( 1997 self, model: DeclarativeStreamModel, config: Config 1998 ) -> Optional[StreamSlicer]: 1999 retriever_model = model.retriever 2000 2001 stream_slicer = self._build_stream_slicer_from_partition_router( 2002 retriever_model, config, stream_name=model.name 2003 ) 2004 2005 if retriever_model.type == "AsyncRetriever": 2006 is_not_datetime_cursor = ( 2007 model.incremental_sync.type != "DatetimeBasedCursor" 2008 if model.incremental_sync 2009 else None 2010 ) 2011 is_partition_router = ( 2012 bool(retriever_model.partition_router) if model.incremental_sync else None 2013 ) 2014 2015 if is_not_datetime_cursor: 2016 # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the 2017 # support or unordered slices (for example, when we trigger reports for January and February, the report 2018 # in February can be completed first). Once we have support for custom concurrent cursor or have a new 2019 # implementation available in the CDK, we can enable more cursors here. 2020 raise ValueError( 2021 "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet." 2022 ) 2023 2024 if is_partition_router and not stream_slicer: 2025 # Note that this development is also done in parallel to the per partition development which once merged 2026 # we could support here by calling create_concurrent_cursor_from_perpartition_cursor 2027 raise ValueError("Per partition state is not supported yet for AsyncRetriever.") 2028 2029 if model.incremental_sync: 2030 return self._build_incremental_cursor(model, stream_slicer, config) 2031 2032 return ( 2033 stream_slicer 2034 if self._disable_resumable_full_refresh 2035 else self._build_resumable_cursor(retriever_model, stream_slicer) 2036 ) 2037 2038 def create_default_error_handler( 2039 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 2040 ) -> DefaultErrorHandler: 2041 backoff_strategies = [] 2042 if model.backoff_strategies: 2043 for backoff_strategy_model in model.backoff_strategies: 2044 backoff_strategies.append( 2045 self._create_component_from_model(model=backoff_strategy_model, config=config) 2046 ) 2047 2048 response_filters = [] 2049 if model.response_filters: 2050 for response_filter_model in model.response_filters: 2051 response_filters.append( 2052 self._create_component_from_model(model=response_filter_model, config=config) 2053 ) 2054 response_filters.append( 2055 HttpResponseFilter(config=config, parameters=model.parameters or {}) 2056 ) 2057 2058 return DefaultErrorHandler( 2059 backoff_strategies=backoff_strategies, 2060 max_retries=model.max_retries, 2061 response_filters=response_filters, 2062 config=config, 2063 parameters=model.parameters or {}, 2064 ) 2065 2066 def create_default_paginator( 2067 self, 2068 model: DefaultPaginatorModel, 2069 config: Config, 2070 *, 2071 url_base: str, 2072 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2073 decoder: Optional[Decoder] = None, 2074 cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None, 2075 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 2076 if decoder: 2077 if self._is_supported_decoder_for_pagination(decoder): 2078 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 2079 else: 2080 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 2081 else: 2082 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 2083 page_size_option = ( 2084 self._create_component_from_model(model=model.page_size_option, config=config) 2085 if model.page_size_option 2086 else None 2087 ) 2088 page_token_option = ( 2089 self._create_component_from_model(model=model.page_token_option, config=config) 2090 if model.page_token_option 2091 else None 2092 ) 2093 pagination_strategy = self._create_component_from_model( 2094 model=model.pagination_strategy, 2095 config=config, 2096 decoder=decoder_to_use, 2097 extractor_model=extractor_model, 2098 ) 2099 if cursor_used_for_stop_condition: 2100 pagination_strategy = StopConditionPaginationStrategyDecorator( 2101 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2102 ) 2103 paginator = DefaultPaginator( 2104 decoder=decoder_to_use, 2105 page_size_option=page_size_option, 2106 page_token_option=page_token_option, 2107 pagination_strategy=pagination_strategy, 2108 url_base=url_base, 2109 config=config, 2110 parameters=model.parameters or {}, 2111 ) 2112 if self._limit_pages_fetched_per_slice: 2113 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2114 return paginator 2115 2116 def create_dpath_extractor( 2117 self, 2118 model: DpathExtractorModel, 2119 config: Config, 2120 decoder: Optional[Decoder] = None, 2121 **kwargs: Any, 2122 ) -> DpathExtractor: 2123 if decoder: 2124 decoder_to_use = decoder 2125 else: 2126 decoder_to_use = JsonDecoder(parameters={}) 2127 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2128 return DpathExtractor( 2129 decoder=decoder_to_use, 2130 field_path=model_field_path, 2131 config=config, 2132 parameters=model.parameters or {}, 2133 ) 2134 2135 @staticmethod 2136 def create_response_to_file_extractor( 2137 model: ResponseToFileExtractorModel, 2138 **kwargs: Any, 2139 ) -> ResponseToFileExtractor: 2140 return ResponseToFileExtractor(parameters=model.parameters or {}) 2141 2142 @staticmethod 2143 def create_exponential_backoff_strategy( 2144 model: ExponentialBackoffStrategyModel, config: Config 2145 ) -> ExponentialBackoffStrategy: 2146 return ExponentialBackoffStrategy( 2147 factor=model.factor or 5, parameters=model.parameters or {}, config=config 2148 ) 2149 2150 @staticmethod 2151 def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey: 2152 return GroupByKey(model.key, config=config, parameters=model.parameters or {}) 2153 2154 def create_http_requester( 2155 self, 2156 model: HttpRequesterModel, 2157 config: Config, 2158 decoder: Decoder = JsonDecoder(parameters={}), 2159 query_properties_key: Optional[str] = None, 2160 use_cache: Optional[bool] = None, 2161 *, 2162 name: str, 2163 ) -> HttpRequester: 2164 authenticator = ( 2165 self._create_component_from_model( 2166 model=model.authenticator, 2167 config=config, 2168 url_base=model.url_base, 2169 name=name, 2170 decoder=decoder, 2171 ) 2172 if model.authenticator 2173 else None 2174 ) 2175 error_handler = ( 2176 self._create_component_from_model(model=model.error_handler, config=config) 2177 if model.error_handler 2178 else DefaultErrorHandler( 2179 backoff_strategies=[], 2180 response_filters=[], 2181 config=config, 2182 parameters=model.parameters or {}, 2183 ) 2184 ) 2185 2186 api_budget = self._api_budget 2187 2188 request_options_provider = InterpolatedRequestOptionsProvider( 2189 request_body_data=model.request_body_data, 2190 request_body_json=model.request_body_json, 2191 request_headers=model.request_headers, 2192 request_parameters=model.request_parameters, 2193 query_properties_key=query_properties_key, 2194 config=config, 2195 parameters=model.parameters or {}, 2196 ) 2197 2198 assert model.use_cache is not None # for mypy 2199 assert model.http_method is not None # for mypy 2200 2201 should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache 2202 2203 return HttpRequester( 2204 name=name, 2205 url_base=model.url_base, 2206 path=model.path, 2207 authenticator=authenticator, 2208 error_handler=error_handler, 2209 api_budget=api_budget, 2210 http_method=HttpMethod[model.http_method.value], 2211 request_options_provider=request_options_provider, 2212 config=config, 2213 disable_retries=self._disable_retries, 2214 parameters=model.parameters or {}, 2215 message_repository=self._message_repository, 2216 use_cache=should_use_cache, 2217 decoder=decoder, 2218 stream_response=decoder.is_stream_response() if decoder else False, 2219 ) 2220 2221 @staticmethod 2222 def create_http_response_filter( 2223 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2224 ) -> HttpResponseFilter: 2225 if model.action: 2226 action = ResponseAction(model.action.value) 2227 else: 2228 action = None 2229 2230 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2231 2232 http_codes = ( 2233 set(model.http_codes) if model.http_codes else set() 2234 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2235 2236 return HttpResponseFilter( 2237 action=action, 2238 failure_type=failure_type, 2239 error_message=model.error_message or "", 2240 error_message_contains=model.error_message_contains or "", 2241 http_codes=http_codes, 2242 predicate=model.predicate or "", 2243 config=config, 2244 parameters=model.parameters or {}, 2245 ) 2246 2247 @staticmethod 2248 def create_inline_schema_loader( 2249 model: InlineSchemaLoaderModel, config: Config, **kwargs: Any 2250 ) -> InlineSchemaLoader: 2251 return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) 2252 2253 def create_complex_field_type( 2254 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2255 ) -> ComplexFieldType: 2256 items = ( 2257 self._create_component_from_model(model=model.items, config=config) 2258 if isinstance(model.items, ComplexFieldTypeModel) 2259 else model.items 2260 ) 2261 2262 return ComplexFieldType(field_type=model.field_type, items=items) 2263 2264 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2265 target_type = ( 2266 self._create_component_from_model(model=model.target_type, config=config) 2267 if isinstance(model.target_type, ComplexFieldTypeModel) 2268 else model.target_type 2269 ) 2270 2271 return TypesMap( 2272 target_type=target_type, 2273 current_type=model.current_type, 2274 condition=model.condition if model.condition is not None else "True", 2275 ) 2276 2277 def create_schema_type_identifier( 2278 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2279 ) -> SchemaTypeIdentifier: 2280 types_mapping = [] 2281 if model.types_mapping: 2282 types_mapping.extend( 2283 [ 2284 self._create_component_from_model(types_map, config=config) 2285 for types_map in model.types_mapping 2286 ] 2287 ) 2288 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2289 [x for x in model.schema_pointer] if model.schema_pointer else [] 2290 ) 2291 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2292 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2293 [x for x in model.type_pointer] if model.type_pointer else None 2294 ) 2295 2296 return SchemaTypeIdentifier( 2297 schema_pointer=model_schema_pointer, 2298 key_pointer=model_key_pointer, 2299 type_pointer=model_type_pointer, 2300 types_mapping=types_mapping, 2301 parameters=model.parameters or {}, 2302 ) 2303 2304 def create_dynamic_schema_loader( 2305 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2306 ) -> DynamicSchemaLoader: 2307 stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config) 2308 combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer) 2309 2310 schema_transformations = [] 2311 if model.schema_transformations: 2312 for transformation_model in model.schema_transformations: 2313 schema_transformations.append( 2314 self._create_component_from_model(model=transformation_model, config=config) 2315 ) 2316 2317 retriever = self._create_component_from_model( 2318 model=model.retriever, 2319 config=config, 2320 name="dynamic_properties", 2321 primary_key=None, 2322 stream_slicer=combined_slicers, 2323 transformations=[], 2324 use_cache=True, 2325 ) 2326 schema_type_identifier = self._create_component_from_model( 2327 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2328 ) 2329 return DynamicSchemaLoader( 2330 retriever=retriever, 2331 config=config, 2332 schema_transformations=schema_transformations, 2333 schema_type_identifier=schema_type_identifier, 2334 parameters=model.parameters or {}, 2335 ) 2336 2337 @staticmethod 2338 def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2339 return JsonDecoder(parameters={}) 2340 2341 def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2342 return CompositeRawDecoder( 2343 parser=ModelToComponentFactory._get_parser(model, config), 2344 stream_response=False if self._emit_connector_builder_messages else True, 2345 ) 2346 2347 def create_jsonl_decoder( 2348 self, model: JsonlDecoderModel, config: Config, **kwargs: Any 2349 ) -> Decoder: 2350 return CompositeRawDecoder( 2351 parser=ModelToComponentFactory._get_parser(model, config), 2352 stream_response=False if self._emit_connector_builder_messages else True, 2353 ) 2354 2355 def create_gzip_decoder( 2356 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2357 ) -> Decoder: 2358 _compressed_response_types = { 2359 "gzip", 2360 "x-gzip", 2361 "gzip, deflate", 2362 "x-gzip, deflate", 2363 "application/zip", 2364 "application/gzip", 2365 "application/x-gzip", 2366 "application/x-zip-compressed", 2367 } 2368 2369 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2370 2371 if self._emit_connector_builder_messages: 2372 # This is very surprising but if the response is not streamed, 2373 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2374 # which uses urllib3 directly and does not uncompress the data. 2375 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2376 2377 return CompositeRawDecoder.by_headers( 2378 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2379 stream_response=True, 2380 fallback_parser=gzip_parser.inner_parser, 2381 ) 2382 2383 @staticmethod 2384 def create_incrementing_count_cursor( 2385 model: IncrementingCountCursorModel, config: Config, **kwargs: Any 2386 ) -> DatetimeBasedCursor: 2387 # This should not actually get used anywhere at runtime, but needed to add this to pass checks since 2388 # we still parse models into components. The issue is that there's no runtime implementation of a 2389 # IncrementingCountCursor. 2390 # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor. 2391 return DatetimeBasedCursor( 2392 cursor_field=model.cursor_field, 2393 datetime_format="%Y-%m-%d", 2394 start_datetime="2024-12-12", 2395 config=config, 2396 parameters={}, 2397 ) 2398 2399 @staticmethod 2400 def create_iterable_decoder( 2401 model: IterableDecoderModel, config: Config, **kwargs: Any 2402 ) -> IterableDecoder: 2403 return IterableDecoder(parameters={}) 2404 2405 @staticmethod 2406 def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder: 2407 return XmlDecoder(parameters={}) 2408 2409 def create_zipfile_decoder( 2410 self, model: ZipfileDecoderModel, config: Config, **kwargs: Any 2411 ) -> ZipfileDecoder: 2412 return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config)) 2413 2414 @staticmethod 2415 def _get_parser(model: BaseModel, config: Config) -> Parser: 2416 if isinstance(model, JsonDecoderModel): 2417 # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases 2418 return JsonParser() 2419 elif isinstance(model, JsonlDecoderModel): 2420 return JsonLineParser() 2421 elif isinstance(model, CsvDecoderModel): 2422 return CsvParser(encoding=model.encoding, delimiter=model.delimiter) 2423 elif isinstance(model, GzipDecoderModel): 2424 return GzipParser( 2425 inner_parser=ModelToComponentFactory._get_parser(model.decoder, config) 2426 ) 2427 elif isinstance( 2428 model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel) 2429 ): 2430 raise ValueError(f"Decoder type {model} does not have parser associated to it") 2431 2432 raise ValueError(f"Unknown decoder type {model}") 2433 2434 @staticmethod 2435 def create_json_file_schema_loader( 2436 model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any 2437 ) -> JsonFileSchemaLoader: 2438 return JsonFileSchemaLoader( 2439 file_path=model.file_path or "", config=config, parameters=model.parameters or {} 2440 ) 2441 2442 @staticmethod 2443 def create_jwt_authenticator( 2444 model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2445 ) -> JwtAuthenticator: 2446 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2447 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2448 return JwtAuthenticator( 2449 config=config, 2450 parameters=model.parameters or {}, 2451 algorithm=JwtAlgorithm(model.algorithm.value), 2452 secret_key=model.secret_key, 2453 base64_encode_secret_key=model.base64_encode_secret_key, 2454 token_duration=model.token_duration, 2455 header_prefix=model.header_prefix, 2456 kid=jwt_headers.kid, 2457 typ=jwt_headers.typ, 2458 cty=jwt_headers.cty, 2459 iss=jwt_payload.iss, 2460 sub=jwt_payload.sub, 2461 aud=jwt_payload.aud, 2462 additional_jwt_headers=model.additional_jwt_headers, 2463 additional_jwt_payload=model.additional_jwt_payload, 2464 ) 2465 2466 def create_list_partition_router( 2467 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2468 ) -> ListPartitionRouter: 2469 request_option = ( 2470 self._create_component_from_model(model.request_option, config) 2471 if model.request_option 2472 else None 2473 ) 2474 return ListPartitionRouter( 2475 cursor_field=model.cursor_field, 2476 request_option=request_option, 2477 values=model.values, 2478 config=config, 2479 parameters=model.parameters or {}, 2480 ) 2481 2482 @staticmethod 2483 def create_min_max_datetime( 2484 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2485 ) -> MinMaxDatetime: 2486 return MinMaxDatetime( 2487 datetime=model.datetime, 2488 datetime_format=model.datetime_format or "", 2489 max_datetime=model.max_datetime or "", 2490 min_datetime=model.min_datetime or "", 2491 parameters=model.parameters or {}, 2492 ) 2493 2494 @staticmethod 2495 def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: 2496 return NoAuth(parameters=model.parameters or {}) 2497 2498 @staticmethod 2499 def create_no_pagination( 2500 model: NoPaginationModel, config: Config, **kwargs: Any 2501 ) -> NoPagination: 2502 return NoPagination(parameters={}) 2503 2504 def create_oauth_authenticator( 2505 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2506 ) -> DeclarativeOauth2Authenticator: 2507 profile_assertion = ( 2508 self._create_component_from_model(model.profile_assertion, config=config) 2509 if model.profile_assertion 2510 else None 2511 ) 2512 2513 if model.refresh_token_updater: 2514 # ignore type error because fixing it would have a lot of dependencies, revisit later 2515 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2516 config, 2517 InterpolatedString.create( 2518 model.token_refresh_endpoint, # type: ignore 2519 parameters=model.parameters or {}, 2520 ).eval(config), 2521 access_token_name=InterpolatedString.create( 2522 model.access_token_name or "access_token", parameters=model.parameters or {} 2523 ).eval(config), 2524 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2525 expires_in_name=InterpolatedString.create( 2526 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2527 ).eval(config), 2528 client_id_name=InterpolatedString.create( 2529 model.client_id_name or "client_id", parameters=model.parameters or {} 2530 ).eval(config), 2531 client_id=InterpolatedString.create( 2532 model.client_id, parameters=model.parameters or {} 2533 ).eval(config) 2534 if model.client_id 2535 else model.client_id, 2536 client_secret_name=InterpolatedString.create( 2537 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2538 ).eval(config), 2539 client_secret=InterpolatedString.create( 2540 model.client_secret, parameters=model.parameters or {} 2541 ).eval(config) 2542 if model.client_secret 2543 else model.client_secret, 2544 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2545 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2546 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2547 grant_type_name=InterpolatedString.create( 2548 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2549 ).eval(config), 2550 grant_type=InterpolatedString.create( 2551 model.grant_type or "refresh_token", parameters=model.parameters or {} 2552 ).eval(config), 2553 refresh_request_body=InterpolatedMapping( 2554 model.refresh_request_body or {}, parameters=model.parameters or {} 2555 ).eval(config), 2556 refresh_request_headers=InterpolatedMapping( 2557 model.refresh_request_headers or {}, parameters=model.parameters or {} 2558 ).eval(config), 2559 scopes=model.scopes, 2560 token_expiry_date_format=model.token_expiry_date_format, 2561 message_repository=self._message_repository, 2562 refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes, 2563 refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key, 2564 refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values, 2565 ) 2566 # ignore type error because fixing it would have a lot of dependencies, revisit later 2567 return DeclarativeOauth2Authenticator( # type: ignore 2568 access_token_name=model.access_token_name or "access_token", 2569 access_token_value=model.access_token_value, 2570 client_id_name=model.client_id_name or "client_id", 2571 client_id=model.client_id, 2572 client_secret_name=model.client_secret_name or "client_secret", 2573 client_secret=model.client_secret, 2574 expires_in_name=model.expires_in_name or "expires_in", 2575 grant_type_name=model.grant_type_name or "grant_type", 2576 grant_type=model.grant_type or "refresh_token", 2577 refresh_request_body=model.refresh_request_body, 2578 refresh_request_headers=model.refresh_request_headers, 2579 refresh_token_name=model.refresh_token_name or "refresh_token", 2580 refresh_token=model.refresh_token, 2581 scopes=model.scopes, 2582 token_expiry_date=model.token_expiry_date, 2583 token_expiry_date_format=model.token_expiry_date_format, 2584 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2585 token_refresh_endpoint=model.token_refresh_endpoint, 2586 config=config, 2587 parameters=model.parameters or {}, 2588 message_repository=self._message_repository, 2589 profile_assertion=profile_assertion, 2590 use_profile_assertion=model.use_profile_assertion, 2591 ) 2592 2593 def create_offset_increment( 2594 self, 2595 model: OffsetIncrementModel, 2596 config: Config, 2597 decoder: Decoder, 2598 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2599 **kwargs: Any, 2600 ) -> OffsetIncrement: 2601 if isinstance(decoder, PaginationDecoderDecorator): 2602 inner_decoder = decoder.decoder 2603 else: 2604 inner_decoder = decoder 2605 decoder = PaginationDecoderDecorator(decoder=decoder) 2606 2607 if self._is_supported_decoder_for_pagination(inner_decoder): 2608 decoder_to_use = decoder 2609 else: 2610 raise ValueError( 2611 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2612 ) 2613 2614 # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever) 2615 # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the 2616 # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing 2617 # behavior by having two separate extractors with identical behavior since they use the same extractor model. 2618 # When we have more time to investigate we can look into reusing the same component. 2619 extractor = ( 2620 self._create_component_from_model( 2621 model=extractor_model, config=config, decoder=decoder_to_use 2622 ) 2623 if extractor_model 2624 else None 2625 ) 2626 2627 return OffsetIncrement( 2628 page_size=model.page_size, 2629 config=config, 2630 decoder=decoder_to_use, 2631 extractor=extractor, 2632 inject_on_first_request=model.inject_on_first_request or False, 2633 parameters=model.parameters or {}, 2634 ) 2635 2636 @staticmethod 2637 def create_page_increment( 2638 model: PageIncrementModel, config: Config, **kwargs: Any 2639 ) -> PageIncrement: 2640 return PageIncrement( 2641 page_size=model.page_size, 2642 config=config, 2643 start_from_page=model.start_from_page or 0, 2644 inject_on_first_request=model.inject_on_first_request or False, 2645 parameters=model.parameters or {}, 2646 ) 2647 2648 def create_parent_stream_config( 2649 self, model: ParentStreamConfigModel, config: Config, **kwargs: Any 2650 ) -> ParentStreamConfig: 2651 declarative_stream = self._create_component_from_model( 2652 model.stream, config=config, **kwargs 2653 ) 2654 request_option = ( 2655 self._create_component_from_model(model.request_option, config=config) 2656 if model.request_option 2657 else None 2658 ) 2659 2660 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2661 raise ValueError( 2662 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2663 ) 2664 2665 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2666 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2667 ) 2668 2669 return ParentStreamConfig( 2670 parent_key=model.parent_key, 2671 request_option=request_option, 2672 stream=declarative_stream, 2673 partition_field=model.partition_field, 2674 config=config, 2675 incremental_dependency=model.incremental_dependency or False, 2676 parameters=model.parameters or {}, 2677 extra_fields=model.extra_fields, 2678 lazy_read_pointer=model_lazy_read_pointer, 2679 ) 2680 2681 def create_properties_from_endpoint( 2682 self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any 2683 ) -> PropertiesFromEndpoint: 2684 retriever = self._create_component_from_model( 2685 model=model.retriever, 2686 config=config, 2687 name="dynamic_properties", 2688 primary_key=None, 2689 stream_slicer=None, 2690 transformations=[], 2691 use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different 2692 ) 2693 return PropertiesFromEndpoint( 2694 property_field_path=model.property_field_path, 2695 retriever=retriever, 2696 config=config, 2697 parameters=model.parameters or {}, 2698 ) 2699 2700 def create_property_chunking( 2701 self, model: PropertyChunkingModel, config: Config, **kwargs: Any 2702 ) -> PropertyChunking: 2703 record_merge_strategy = ( 2704 self._create_component_from_model( 2705 model=model.record_merge_strategy, config=config, **kwargs 2706 ) 2707 if model.record_merge_strategy 2708 else None 2709 ) 2710 2711 property_limit_type: PropertyLimitType 2712 match model.property_limit_type: 2713 case PropertyLimitTypeModel.property_count: 2714 property_limit_type = PropertyLimitType.property_count 2715 case PropertyLimitTypeModel.characters: 2716 property_limit_type = PropertyLimitType.characters 2717 case _: 2718 raise ValueError(f"Invalid PropertyLimitType {property_limit_type}") 2719 2720 return PropertyChunking( 2721 property_limit_type=property_limit_type, 2722 property_limit=model.property_limit, 2723 record_merge_strategy=record_merge_strategy, 2724 config=config, 2725 parameters=model.parameters or {}, 2726 ) 2727 2728 def create_query_properties( 2729 self, model: QueryPropertiesModel, config: Config, **kwargs: Any 2730 ) -> QueryProperties: 2731 if isinstance(model.property_list, list): 2732 property_list = model.property_list 2733 else: 2734 property_list = self._create_component_from_model( 2735 model=model.property_list, config=config, **kwargs 2736 ) 2737 2738 property_chunking = ( 2739 self._create_component_from_model( 2740 model=model.property_chunking, config=config, **kwargs 2741 ) 2742 if model.property_chunking 2743 else None 2744 ) 2745 2746 return QueryProperties( 2747 property_list=property_list, 2748 always_include_properties=model.always_include_properties, 2749 property_chunking=property_chunking, 2750 config=config, 2751 parameters=model.parameters or {}, 2752 ) 2753 2754 @staticmethod 2755 def create_record_filter( 2756 model: RecordFilterModel, config: Config, **kwargs: Any 2757 ) -> RecordFilter: 2758 return RecordFilter( 2759 condition=model.condition or "", config=config, parameters=model.parameters or {} 2760 ) 2761 2762 @staticmethod 2763 def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: 2764 return RequestPath(parameters={}) 2765 2766 @staticmethod 2767 def create_request_option( 2768 model: RequestOptionModel, config: Config, **kwargs: Any 2769 ) -> RequestOption: 2770 inject_into = RequestOptionType(model.inject_into.value) 2771 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 2772 [ 2773 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 2774 for segment in model.field_path 2775 ] 2776 if model.field_path 2777 else None 2778 ) 2779 field_name = ( 2780 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 2781 if model.field_name 2782 else None 2783 ) 2784 return RequestOption( 2785 field_name=field_name, 2786 field_path=field_path, 2787 inject_into=inject_into, 2788 parameters=kwargs.get("parameters", {}), 2789 ) 2790 2791 def create_record_selector( 2792 self, 2793 model: RecordSelectorModel, 2794 config: Config, 2795 *, 2796 name: str, 2797 transformations: List[RecordTransformation] | None = None, 2798 decoder: Decoder | None = None, 2799 client_side_incremental_sync: Dict[str, Any] | None = None, 2800 **kwargs: Any, 2801 ) -> RecordSelector: 2802 extractor = self._create_component_from_model( 2803 model=model.extractor, decoder=decoder, config=config 2804 ) 2805 record_filter = ( 2806 self._create_component_from_model(model.record_filter, config=config) 2807 if model.record_filter 2808 else None 2809 ) 2810 2811 assert model.transform_before_filtering is not None # for mypy 2812 2813 transform_before_filtering = model.transform_before_filtering 2814 if client_side_incremental_sync: 2815 record_filter = ClientSideIncrementalRecordFilterDecorator( 2816 config=config, 2817 parameters=model.parameters, 2818 condition=model.record_filter.condition 2819 if (model.record_filter and hasattr(model.record_filter, "condition")) 2820 else None, 2821 **client_side_incremental_sync, 2822 ) 2823 transform_before_filtering = True 2824 2825 schema_normalization = ( 2826 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 2827 if isinstance(model.schema_normalization, SchemaNormalizationModel) 2828 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 2829 ) 2830 2831 return RecordSelector( 2832 extractor=extractor, 2833 name=name, 2834 config=config, 2835 record_filter=record_filter, 2836 transformations=transformations or [], 2837 schema_normalization=schema_normalization, 2838 parameters=model.parameters or {}, 2839 transform_before_filtering=transform_before_filtering, 2840 ) 2841 2842 @staticmethod 2843 def create_remove_fields( 2844 model: RemoveFieldsModel, config: Config, **kwargs: Any 2845 ) -> RemoveFields: 2846 return RemoveFields( 2847 field_pointers=model.field_pointers, condition=model.condition or "", parameters={} 2848 ) 2849 2850 def create_selective_authenticator( 2851 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 2852 ) -> DeclarativeAuthenticator: 2853 authenticators = { 2854 name: self._create_component_from_model(model=auth, config=config) 2855 for name, auth in model.authenticators.items() 2856 } 2857 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 2858 return SelectiveAuthenticator( # type: ignore[abstract] 2859 config=config, 2860 authenticators=authenticators, 2861 authenticator_selection_path=model.authenticator_selection_path, 2862 **kwargs, 2863 ) 2864 2865 @staticmethod 2866 def create_legacy_session_token_authenticator( 2867 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 2868 ) -> LegacySessionTokenAuthenticator: 2869 return LegacySessionTokenAuthenticator( 2870 api_url=url_base, 2871 header=model.header, 2872 login_url=model.login_url, 2873 password=model.password or "", 2874 session_token=model.session_token or "", 2875 session_token_response_key=model.session_token_response_key or "", 2876 username=model.username or "", 2877 validate_session_url=model.validate_session_url, 2878 config=config, 2879 parameters=model.parameters or {}, 2880 ) 2881 2882 def create_simple_retriever( 2883 self, 2884 model: SimpleRetrieverModel, 2885 config: Config, 2886 *, 2887 name: str, 2888 primary_key: Optional[Union[str, List[str], List[List[str]]]], 2889 stream_slicer: Optional[StreamSlicer], 2890 request_options_provider: Optional[RequestOptionsProvider] = None, 2891 stop_condition_on_cursor: bool = False, 2892 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 2893 transformations: List[RecordTransformation], 2894 incremental_sync: Optional[ 2895 Union[ 2896 IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel 2897 ] 2898 ] = None, 2899 use_cache: Optional[bool] = None, 2900 **kwargs: Any, 2901 ) -> SimpleRetriever: 2902 decoder = ( 2903 self._create_component_from_model(model=model.decoder, config=config) 2904 if model.decoder 2905 else JsonDecoder(parameters={}) 2906 ) 2907 record_selector = self._create_component_from_model( 2908 model=model.record_selector, 2909 name=name, 2910 config=config, 2911 decoder=decoder, 2912 transformations=transformations, 2913 client_side_incremental_sync=client_side_incremental_sync, 2914 ) 2915 2916 query_properties: Optional[QueryProperties] = None 2917 query_properties_key: Optional[str] = None 2918 if ( 2919 hasattr(model.requester, "request_parameters") 2920 and model.requester.request_parameters 2921 and isinstance(model.requester.request_parameters, Mapping) 2922 ): 2923 query_properties_definitions = [] 2924 for key, request_parameter in model.requester.request_parameters.items(): 2925 # When translating JSON schema into Pydantic models, enforcing types for arrays containing both 2926 # concrete string complex object definitions like QueryProperties would get resolved to Union[str, Any]. 2927 # This adds the extra validation that we couldn't get for free in Pydantic model generation 2928 if ( 2929 isinstance(request_parameter, Mapping) 2930 and request_parameter.get("type") == "QueryProperties" 2931 ): 2932 query_properties_key = key 2933 query_properties_definitions.append(request_parameter) 2934 elif not isinstance(request_parameter, str): 2935 raise ValueError( 2936 f"Each element of request_parameters should be of type str or QueryProperties, but received {request_parameter.get('type')}" 2937 ) 2938 2939 if len(query_properties_definitions) > 1: 2940 raise ValueError( 2941 f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages" 2942 ) 2943 2944 if len(query_properties_definitions) == 1: 2945 query_properties = self.create_component( 2946 model_type=QueryPropertiesModel, 2947 component_definition=query_properties_definitions[0], 2948 config=config, 2949 ) 2950 2951 # Removes QueryProperties components from the interpolated mappings because it will be resolved in 2952 # the provider from the slice directly instead of through jinja interpolation 2953 if isinstance(model.requester.request_parameters, Mapping): 2954 model.requester.request_parameters = self._remove_query_properties( 2955 model.requester.request_parameters 2956 ) 2957 2958 requester = self._create_component_from_model( 2959 model=model.requester, 2960 decoder=decoder, 2961 name=name, 2962 query_properties_key=query_properties_key, 2963 use_cache=use_cache, 2964 config=config, 2965 ) 2966 url_base = ( 2967 model.requester.url_base 2968 if hasattr(model.requester, "url_base") 2969 else requester.get_url_base() 2970 ) 2971 2972 # Define cursor only if per partition or common incremental support is needed 2973 cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None 2974 2975 if ( 2976 not isinstance(stream_slicer, DatetimeBasedCursor) 2977 or type(stream_slicer) is not DatetimeBasedCursor 2978 ): 2979 # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods). 2980 # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement 2981 # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's 2982 # request_options_provider 2983 request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={}) 2984 elif not request_options_provider: 2985 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 2986 2987 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 2988 2989 cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None 2990 paginator = ( 2991 self._create_component_from_model( 2992 model=model.paginator, 2993 config=config, 2994 url_base=url_base, 2995 extractor_model=model.record_selector.extractor, 2996 decoder=decoder, 2997 cursor_used_for_stop_condition=cursor_used_for_stop_condition, 2998 ) 2999 if model.paginator 3000 else NoPagination(parameters={}) 3001 ) 3002 3003 ignore_stream_slicer_parameters_on_paginated_requests = ( 3004 model.ignore_stream_slicer_parameters_on_paginated_requests or False 3005 ) 3006 3007 if ( 3008 model.partition_router 3009 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 3010 and not bool(self._connector_state_manager.get_stream_state(name, None)) 3011 and any( 3012 parent_stream_config.lazy_read_pointer 3013 for parent_stream_config in model.partition_router.parent_stream_configs 3014 ) 3015 ): 3016 if incremental_sync: 3017 if incremental_sync.type != "DatetimeBasedCursor": 3018 raise ValueError( 3019 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 3020 ) 3021 3022 elif incremental_sync.step or incremental_sync.cursor_granularity: 3023 raise ValueError( 3024 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 3025 ) 3026 3027 if model.decoder and model.decoder.type != "JsonDecoder": 3028 raise ValueError( 3029 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 3030 ) 3031 3032 return LazySimpleRetriever( 3033 name=name, 3034 paginator=paginator, 3035 primary_key=primary_key, 3036 requester=requester, 3037 record_selector=record_selector, 3038 stream_slicer=stream_slicer, 3039 request_option_provider=request_options_provider, 3040 cursor=cursor, 3041 config=config, 3042 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3043 parameters=model.parameters or {}, 3044 ) 3045 3046 if self._limit_slices_fetched or self._emit_connector_builder_messages: 3047 return SimpleRetrieverTestReadDecorator( 3048 name=name, 3049 paginator=paginator, 3050 primary_key=primary_key, 3051 requester=requester, 3052 record_selector=record_selector, 3053 stream_slicer=stream_slicer, 3054 request_option_provider=request_options_provider, 3055 cursor=cursor, 3056 config=config, 3057 maximum_number_of_slices=self._limit_slices_fetched or 5, 3058 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3059 parameters=model.parameters or {}, 3060 ) 3061 return SimpleRetriever( 3062 name=name, 3063 paginator=paginator, 3064 primary_key=primary_key, 3065 requester=requester, 3066 record_selector=record_selector, 3067 stream_slicer=stream_slicer, 3068 request_option_provider=request_options_provider, 3069 cursor=cursor, 3070 config=config, 3071 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3072 additional_query_properties=query_properties, 3073 parameters=model.parameters or {}, 3074 ) 3075 3076 @staticmethod 3077 def _remove_query_properties( 3078 request_parameters: Mapping[str, Union[Any, str]], 3079 ) -> Mapping[str, Union[Any, str]]: 3080 return { 3081 parameter_field: request_parameter 3082 for parameter_field, request_parameter in request_parameters.items() 3083 if not isinstance(request_parameter, Mapping) 3084 or not request_parameter.get("type") == "QueryProperties" 3085 } 3086 3087 def create_state_delegating_stream( 3088 self, 3089 model: StateDelegatingStreamModel, 3090 config: Config, 3091 has_parent_state: Optional[bool] = None, 3092 **kwargs: Any, 3093 ) -> DeclarativeStream: 3094 if ( 3095 model.full_refresh_stream.name != model.name 3096 or model.name != model.incremental_stream.name 3097 ): 3098 raise ValueError( 3099 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 3100 ) 3101 3102 stream_model = ( 3103 model.incremental_stream 3104 if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state 3105 else model.full_refresh_stream 3106 ) 3107 3108 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description 3109 3110 def _create_async_job_status_mapping( 3111 self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any 3112 ) -> Mapping[str, AsyncJobStatus]: 3113 api_status_to_cdk_status = {} 3114 for cdk_status, api_statuses in model.dict().items(): 3115 if cdk_status == "type": 3116 # This is an element of the dict because of the typing of the CDK but it is not a CDK status 3117 continue 3118 3119 for status in api_statuses: 3120 if status in api_status_to_cdk_status: 3121 raise ValueError( 3122 f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once" 3123 ) 3124 api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status) 3125 return api_status_to_cdk_status 3126 3127 def _get_async_job_status(self, status: str) -> AsyncJobStatus: 3128 match status: 3129 case "running": 3130 return AsyncJobStatus.RUNNING 3131 case "completed": 3132 return AsyncJobStatus.COMPLETED 3133 case "failed": 3134 return AsyncJobStatus.FAILED 3135 case "timeout": 3136 return AsyncJobStatus.TIMED_OUT 3137 case _: 3138 raise ValueError(f"Unsupported CDK status {status}") 3139 3140 def create_async_retriever( 3141 self, 3142 model: AsyncRetrieverModel, 3143 config: Config, 3144 *, 3145 name: str, 3146 primary_key: Optional[ 3147 Union[str, List[str], List[List[str]]] 3148 ], # this seems to be needed to match create_simple_retriever 3149 stream_slicer: Optional[StreamSlicer], 3150 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3151 transformations: List[RecordTransformation], 3152 **kwargs: Any, 3153 ) -> AsyncRetriever: 3154 def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever: 3155 record_selector = RecordSelector( 3156 extractor=download_extractor, 3157 name=name, 3158 record_filter=None, 3159 transformations=transformations, 3160 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 3161 config=config, 3162 parameters={}, 3163 ) 3164 paginator = ( 3165 self._create_component_from_model( 3166 model=model.download_paginator, 3167 decoder=decoder, 3168 config=config, 3169 url_base="", 3170 ) 3171 if model.download_paginator 3172 else NoPagination(parameters={}) 3173 ) 3174 maximum_number_of_slices = self._limit_slices_fetched or 5 3175 3176 if self._limit_slices_fetched or self._emit_connector_builder_messages: 3177 return SimpleRetrieverTestReadDecorator( 3178 requester=download_requester, 3179 record_selector=record_selector, 3180 primary_key=None, 3181 name=job_download_components_name, 3182 paginator=paginator, 3183 config=config, 3184 parameters={}, 3185 maximum_number_of_slices=maximum_number_of_slices, 3186 ) 3187 3188 return SimpleRetriever( 3189 requester=download_requester, 3190 record_selector=record_selector, 3191 primary_key=None, 3192 name=job_download_components_name, 3193 paginator=paginator, 3194 config=config, 3195 parameters={}, 3196 ) 3197 3198 def _get_job_timeout() -> datetime.timedelta: 3199 user_defined_timeout: Optional[int] = ( 3200 int( 3201 InterpolatedString.create( 3202 str(model.polling_job_timeout), 3203 parameters={}, 3204 ).eval(config) 3205 ) 3206 if model.polling_job_timeout 3207 else None 3208 ) 3209 3210 # check for user defined timeout during the test read or 15 minutes 3211 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 3212 # default value for non-connector builder is 60 minutes. 3213 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 3214 3215 return ( 3216 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 3217 ) 3218 3219 decoder = ( 3220 self._create_component_from_model(model=model.decoder, config=config) 3221 if model.decoder 3222 else JsonDecoder(parameters={}) 3223 ) 3224 record_selector = self._create_component_from_model( 3225 model=model.record_selector, 3226 config=config, 3227 decoder=decoder, 3228 name=name, 3229 transformations=transformations, 3230 client_side_incremental_sync=client_side_incremental_sync, 3231 ) 3232 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 3233 creation_requester = self._create_component_from_model( 3234 model=model.creation_requester, 3235 decoder=decoder, 3236 config=config, 3237 name=f"job creation - {name}", 3238 ) 3239 polling_requester = self._create_component_from_model( 3240 model=model.polling_requester, 3241 decoder=decoder, 3242 config=config, 3243 name=f"job polling - {name}", 3244 ) 3245 job_download_components_name = f"job download - {name}" 3246 download_decoder = ( 3247 self._create_component_from_model(model=model.download_decoder, config=config) 3248 if model.download_decoder 3249 else JsonDecoder(parameters={}) 3250 ) 3251 download_extractor = ( 3252 self._create_component_from_model( 3253 model=model.download_extractor, 3254 config=config, 3255 decoder=download_decoder, 3256 parameters=model.parameters, 3257 ) 3258 if model.download_extractor 3259 else DpathExtractor( 3260 [], 3261 config=config, 3262 decoder=download_decoder, 3263 parameters=model.parameters or {}, 3264 ) 3265 ) 3266 download_requester = self._create_component_from_model( 3267 model=model.download_requester, 3268 decoder=download_decoder, 3269 config=config, 3270 name=job_download_components_name, 3271 ) 3272 download_retriever = _get_download_retriever() 3273 abort_requester = ( 3274 self._create_component_from_model( 3275 model=model.abort_requester, 3276 decoder=decoder, 3277 config=config, 3278 name=f"job abort - {name}", 3279 ) 3280 if model.abort_requester 3281 else None 3282 ) 3283 delete_requester = ( 3284 self._create_component_from_model( 3285 model=model.delete_requester, 3286 decoder=decoder, 3287 config=config, 3288 name=f"job delete - {name}", 3289 ) 3290 if model.delete_requester 3291 else None 3292 ) 3293 download_target_requester = ( 3294 self._create_component_from_model( 3295 model=model.download_target_requester, 3296 decoder=decoder, 3297 config=config, 3298 name=f"job extract_url - {name}", 3299 ) 3300 if model.download_target_requester 3301 else None 3302 ) 3303 status_extractor = self._create_component_from_model( 3304 model=model.status_extractor, decoder=decoder, config=config, name=name 3305 ) 3306 download_target_extractor = self._create_component_from_model( 3307 model=model.download_target_extractor, 3308 decoder=decoder, 3309 config=config, 3310 name=name, 3311 ) 3312 3313 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3314 creation_requester=creation_requester, 3315 polling_requester=polling_requester, 3316 download_retriever=download_retriever, 3317 download_target_requester=download_target_requester, 3318 abort_requester=abort_requester, 3319 delete_requester=delete_requester, 3320 status_extractor=status_extractor, 3321 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3322 download_target_extractor=download_target_extractor, 3323 job_timeout=_get_job_timeout(), 3324 ) 3325 3326 async_job_partition_router = AsyncJobPartitionRouter( 3327 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3328 job_repository, 3329 stream_slices, 3330 self._job_tracker, 3331 self._message_repository, 3332 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3333 has_bulk_parent=False, 3334 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3335 # `None` == default retry is set to 3 attempts, under the hood. 3336 job_max_retry=1 if self._emit_connector_builder_messages else None, 3337 ), 3338 stream_slicer=stream_slicer, 3339 config=config, 3340 parameters=model.parameters or {}, 3341 ) 3342 3343 return AsyncRetriever( 3344 record_selector=record_selector, 3345 stream_slicer=async_job_partition_router, 3346 config=config, 3347 parameters=model.parameters or {}, 3348 ) 3349 3350 @staticmethod 3351 def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3352 return Spec( 3353 connection_specification=model.connection_specification, 3354 documentation_url=model.documentation_url, 3355 advanced_auth=model.advanced_auth, 3356 parameters={}, 3357 ) 3358 3359 def create_substream_partition_router( 3360 self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any 3361 ) -> SubstreamPartitionRouter: 3362 parent_stream_configs = [] 3363 if model.parent_stream_configs: 3364 parent_stream_configs.extend( 3365 [ 3366 self._create_message_repository_substream_wrapper( 3367 model=parent_stream_config, config=config, **kwargs 3368 ) 3369 for parent_stream_config in model.parent_stream_configs 3370 ] 3371 ) 3372 3373 return SubstreamPartitionRouter( 3374 parent_stream_configs=parent_stream_configs, 3375 parameters=model.parameters or {}, 3376 config=config, 3377 ) 3378 3379 def _create_message_repository_substream_wrapper( 3380 self, model: ParentStreamConfigModel, config: Config, **kwargs: Any 3381 ) -> Any: 3382 substream_factory = ModelToComponentFactory( 3383 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3384 limit_slices_fetched=self._limit_slices_fetched, 3385 emit_connector_builder_messages=self._emit_connector_builder_messages, 3386 disable_retries=self._disable_retries, 3387 disable_cache=self._disable_cache, 3388 message_repository=LogAppenderMessageRepositoryDecorator( 3389 {"airbyte_cdk": {"stream": {"is_substream": True}}, "http": {"is_auxiliary": True}}, 3390 self._message_repository, 3391 self._evaluate_log_level(self._emit_connector_builder_messages), 3392 ), 3393 ) 3394 3395 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3396 has_parent_state = bool( 3397 self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None) 3398 if model.incremental_dependency 3399 else False 3400 ) 3401 return substream_factory._create_component_from_model( 3402 model=model, config=config, has_parent_state=has_parent_state, **kwargs 3403 ) 3404 3405 @staticmethod 3406 def create_wait_time_from_header( 3407 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 3408 ) -> WaitTimeFromHeaderBackoffStrategy: 3409 return WaitTimeFromHeaderBackoffStrategy( 3410 header=model.header, 3411 parameters=model.parameters or {}, 3412 config=config, 3413 regex=model.regex, 3414 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 3415 if model.max_waiting_time_in_seconds is not None 3416 else None, 3417 ) 3418 3419 @staticmethod 3420 def create_wait_until_time_from_header( 3421 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 3422 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 3423 return WaitUntilTimeFromHeaderBackoffStrategy( 3424 header=model.header, 3425 parameters=model.parameters or {}, 3426 config=config, 3427 min_wait=model.min_wait, 3428 regex=model.regex, 3429 ) 3430 3431 def get_message_repository(self) -> MessageRepository: 3432 return self._message_repository 3433 3434 def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: 3435 return Level.DEBUG if emit_connector_builder_messages else Level.INFO 3436 3437 @staticmethod 3438 def create_components_mapping_definition( 3439 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 3440 ) -> ComponentMappingDefinition: 3441 interpolated_value = InterpolatedString.create( 3442 model.value, parameters=model.parameters or {} 3443 ) 3444 field_path = [ 3445 InterpolatedString.create(path, parameters=model.parameters or {}) 3446 for path in model.field_path 3447 ] 3448 return ComponentMappingDefinition( 3449 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 3450 value=interpolated_value, 3451 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 3452 parameters=model.parameters or {}, 3453 ) 3454 3455 def create_http_components_resolver( 3456 self, model: HttpComponentsResolverModel, config: Config 3457 ) -> Any: 3458 stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config) 3459 combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer) 3460 3461 retriever = self._create_component_from_model( 3462 model=model.retriever, 3463 config=config, 3464 name="", 3465 primary_key=None, 3466 stream_slicer=stream_slicer if stream_slicer else combined_slicers, 3467 transformations=[], 3468 ) 3469 3470 components_mapping = [ 3471 self._create_component_from_model( 3472 model=components_mapping_definition_model, 3473 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3474 components_mapping_definition_model.value_type 3475 ), 3476 config=config, 3477 ) 3478 for components_mapping_definition_model in model.components_mapping 3479 ] 3480 3481 return HttpComponentsResolver( 3482 retriever=retriever, 3483 config=config, 3484 components_mapping=components_mapping, 3485 parameters=model.parameters or {}, 3486 ) 3487 3488 @staticmethod 3489 def create_stream_config( 3490 model: StreamConfigModel, config: Config, **kwargs: Any 3491 ) -> StreamConfig: 3492 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 3493 [x for x in model.configs_pointer] if model.configs_pointer else [] 3494 ) 3495 3496 return StreamConfig( 3497 configs_pointer=model_configs_pointer, 3498 parameters=model.parameters or {}, 3499 ) 3500 3501 def create_config_components_resolver( 3502 self, model: ConfigComponentsResolverModel, config: Config 3503 ) -> Any: 3504 stream_config = self._create_component_from_model( 3505 model.stream_config, config=config, parameters=model.parameters or {} 3506 ) 3507 3508 components_mapping = [ 3509 self._create_component_from_model( 3510 model=components_mapping_definition_model, 3511 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3512 components_mapping_definition_model.value_type 3513 ), 3514 config=config, 3515 ) 3516 for components_mapping_definition_model in model.components_mapping 3517 ] 3518 3519 return ConfigComponentsResolver( 3520 stream_config=stream_config, 3521 config=config, 3522 components_mapping=components_mapping, 3523 parameters=model.parameters or {}, 3524 ) 3525 3526 _UNSUPPORTED_DECODER_ERROR = ( 3527 "Specified decoder of {decoder_type} is not supported for pagination." 3528 "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead." 3529 "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`." 3530 ) 3531 3532 def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool: 3533 if isinstance(decoder, (JsonDecoder, XmlDecoder)): 3534 return True 3535 elif isinstance(decoder, CompositeRawDecoder): 3536 return self._is_supported_parser_for_pagination(decoder.parser) 3537 else: 3538 return False 3539 3540 def _is_supported_parser_for_pagination(self, parser: Parser) -> bool: 3541 if isinstance(parser, JsonParser): 3542 return True 3543 elif isinstance(parser, GzipParser): 3544 return isinstance(parser.inner_parser, JsonParser) 3545 else: 3546 return False 3547 3548 def create_http_api_budget( 3549 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 3550 ) -> HttpAPIBudget: 3551 policies = [ 3552 self._create_component_from_model(model=policy, config=config) 3553 for policy in model.policies 3554 ] 3555 3556 return HttpAPIBudget( 3557 policies=policies, 3558 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 3559 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 3560 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 3561 ) 3562 3563 def create_fixed_window_call_rate_policy( 3564 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 3565 ) -> FixedWindowCallRatePolicy: 3566 matchers = [ 3567 self._create_component_from_model(model=matcher, config=config) 3568 for matcher in model.matchers 3569 ] 3570 3571 # Set the initial reset timestamp to 10 days from now. 3572 # This value will be updated by the first request. 3573 return FixedWindowCallRatePolicy( 3574 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 3575 period=parse_duration(model.period), 3576 call_limit=model.call_limit, 3577 matchers=matchers, 3578 ) 3579 3580 def create_moving_window_call_rate_policy( 3581 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 3582 ) -> MovingWindowCallRatePolicy: 3583 rates = [ 3584 self._create_component_from_model(model=rate, config=config) for rate in model.rates 3585 ] 3586 matchers = [ 3587 self._create_component_from_model(model=matcher, config=config) 3588 for matcher in model.matchers 3589 ] 3590 return MovingWindowCallRatePolicy( 3591 rates=rates, 3592 matchers=matchers, 3593 ) 3594 3595 def create_unlimited_call_rate_policy( 3596 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 3597 ) -> UnlimitedCallRatePolicy: 3598 matchers = [ 3599 self._create_component_from_model(model=matcher, config=config) 3600 for matcher in model.matchers 3601 ] 3602 3603 return UnlimitedCallRatePolicy( 3604 matchers=matchers, 3605 ) 3606 3607 def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate: 3608 interpolated_limit = InterpolatedString.create(str(model.limit), parameters={}) 3609 return Rate( 3610 limit=int(interpolated_limit.eval(config=config)), 3611 interval=parse_duration(model.interval), 3612 ) 3613 3614 def create_http_request_matcher( 3615 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 3616 ) -> HttpRequestRegexMatcher: 3617 return HttpRequestRegexMatcher( 3618 method=model.method, 3619 url_base=model.url_base, 3620 url_path_pattern=model.url_path_pattern, 3621 params=model.params, 3622 headers=model.headers, 3623 ) 3624 3625 def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None: 3626 self._api_budget = self.create_component( 3627 model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config 3628 ) 3629 3630 def create_grouping_partition_router( 3631 self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any 3632 ) -> GroupingPartitionRouter: 3633 underlying_router = self._create_component_from_model( 3634 model=model.underlying_partition_router, config=config 3635 ) 3636 if model.group_size < 1: 3637 raise ValueError(f"Group size must be greater than 0, got {model.group_size}") 3638 3639 # Request options in underlying partition routers are not supported for GroupingPartitionRouter 3640 # because they are specific to individual partitions and cannot be aggregated or handled 3641 # when grouping, potentially leading to incorrect API calls. Any request customization 3642 # should be managed at the stream level through the requester's configuration. 3643 if isinstance(underlying_router, SubstreamPartitionRouter): 3644 if any( 3645 parent_config.request_option 3646 for parent_config in underlying_router.parent_stream_configs 3647 ): 3648 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 3649 3650 if isinstance(underlying_router, ListPartitionRouter): 3651 if underlying_router.request_option: 3652 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 3653 3654 return GroupingPartitionRouter( 3655 group_size=model.group_size, 3656 underlying_partition_router=underlying_router, 3657 deduplicate=model.deduplicate if model.deduplicate is not None else True, 3658 config=config, 3659 )
ModelToComponentFactory( limit_pages_fetched_per_slice: Optional[int] = None, limit_slices_fetched: Optional[int] = None, emit_connector_builder_messages: bool = False, disable_retries: bool = False, disable_cache: bool = False, disable_resumable_full_refresh: bool = False, message_repository: Optional[airbyte_cdk.MessageRepository] = None, connector_state_manager: Optional[airbyte_cdk.ConnectorStateManager] = None, max_concurrent_async_job_count: Optional[int] = None)
563 def __init__( 564 self, 565 limit_pages_fetched_per_slice: Optional[int] = None, 566 limit_slices_fetched: Optional[int] = None, 567 emit_connector_builder_messages: bool = False, 568 disable_retries: bool = False, 569 disable_cache: bool = False, 570 disable_resumable_full_refresh: bool = False, 571 message_repository: Optional[MessageRepository] = None, 572 connector_state_manager: Optional[ConnectorStateManager] = None, 573 max_concurrent_async_job_count: Optional[int] = None, 574 ): 575 self._init_mappings() 576 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 577 self._limit_slices_fetched = limit_slices_fetched 578 self._emit_connector_builder_messages = emit_connector_builder_messages 579 self._disable_retries = disable_retries 580 self._disable_cache = disable_cache 581 self._disable_resumable_full_refresh = disable_resumable_full_refresh 582 self._message_repository = message_repository or InMemoryMessageRepository( 583 self._evaluate_log_level(emit_connector_builder_messages) 584 ) 585 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 586 self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None 587 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1)
def
create_component( self, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], config: Mapping[str, Any], **kwargs: Any) -> Any:
691 def create_component( 692 self, 693 model_type: Type[BaseModel], 694 component_definition: ComponentDefinition, 695 config: Config, 696 **kwargs: Any, 697 ) -> Any: 698 """ 699 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 700 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 701 creating declarative components from that model. 702 703 :param model_type: The type of declarative component that is being initialized 704 :param component_definition: The mapping that represents a declarative component 705 :param config: The connector config that is provided by the customer 706 :return: The declarative component to be used at runtime 707 """ 708 709 component_type = component_definition.get("type") 710 if component_definition.get("type") != model_type.__name__: 711 raise ValueError( 712 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 713 ) 714 715 declarative_component_model = model_type.parse_obj(component_definition) 716 717 if not isinstance(declarative_component_model, model_type): 718 raise ValueError( 719 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 720 ) 721 722 return self._create_component_from_model( 723 model=declarative_component_model, config=config, **kwargs 724 )
Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating creating declarative components from that model.
Parameters
- model_type: The type of declarative component that is being initialized
- component_definition: The mapping that represents a declarative component
- config: The connector config that is provided by the customer
Returns
The declarative component to be used at runtime
@staticmethod
def
create_added_field_definition( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.AddedFieldDefinition, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.AddedFieldDefinition:
736 @staticmethod 737 def create_added_field_definition( 738 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 739 ) -> AddedFieldDefinition: 740 interpolated_value = InterpolatedString.create( 741 model.value, parameters=model.parameters or {} 742 ) 743 return AddedFieldDefinition( 744 path=model.path, 745 value=interpolated_value, 746 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 747 parameters=model.parameters or {}, 748 )
def
create_add_fields( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.AddFields, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.AddFields:
750 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 751 added_field_definitions = [ 752 self._create_component_from_model( 753 model=added_field_definition_model, 754 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 755 added_field_definition_model.value_type 756 ), 757 config=config, 758 ) 759 for added_field_definition_model in model.fields 760 ] 761 return AddFields( 762 fields=added_field_definitions, 763 condition=model.condition or "", 764 parameters=model.parameters or {}, 765 )
def
create_keys_to_lower_transformation( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.KeysToLower, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation.KeysToLowerTransformation:
def
create_keys_to_snake_transformation( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.KeysToSnakeCase, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation.KeysToSnakeCaseTransformation:
def
create_keys_replace_transformation( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.KeysReplace, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.keys_replace_transformation.KeysReplaceTransformation:
def
create_flatten_fields( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.FlattenFields, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.flatten_fields.FlattenFields:
def
create_dpath_flatten_fields( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DpathFlattenFields, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.dpath_flatten_fields.DpathFlattenFields:
791 def create_dpath_flatten_fields( 792 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 793 ) -> DpathFlattenFields: 794 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 795 key_transformation = ( 796 KeyTransformation( 797 config=config, 798 prefix=model.key_transformation.prefix, 799 suffix=model.key_transformation.suffix, 800 parameters=model.parameters or {}, 801 ) 802 if model.key_transformation is not None 803 else None 804 ) 805 return DpathFlattenFields( 806 config=config, 807 field_path=model_field_path, 808 delete_origin_value=model.delete_origin_value 809 if model.delete_origin_value is not None 810 else False, 811 replace_record=model.replace_record if model.replace_record is not None else False, 812 key_transformation=key_transformation, 813 parameters=model.parameters or {}, 814 )
def
create_api_key_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ApiKeyAuthenticator, config: Mapping[str, Any], token_provider: Optional[airbyte_cdk.sources.declarative.auth.token_provider.TokenProvider] = None, **kwargs: Any) -> airbyte_cdk.ApiKeyAuthenticator:
828 def create_api_key_authenticator( 829 self, 830 model: ApiKeyAuthenticatorModel, 831 config: Config, 832 token_provider: Optional[TokenProvider] = None, 833 **kwargs: Any, 834 ) -> ApiKeyAuthenticator: 835 if model.inject_into is None and model.header is None: 836 raise ValueError( 837 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 838 ) 839 840 if model.inject_into is not None and model.header is not None: 841 raise ValueError( 842 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 843 ) 844 845 if token_provider is not None and model.api_token != "": 846 raise ValueError( 847 "If token_provider is set, api_token is ignored and has to be set to empty string." 848 ) 849 850 request_option = ( 851 self._create_component_from_model( 852 model.inject_into, config, parameters=model.parameters or {} 853 ) 854 if model.inject_into 855 else RequestOption( 856 inject_into=RequestOptionType.header, 857 field_name=model.header or "", 858 parameters=model.parameters or {}, 859 ) 860 ) 861 862 return ApiKeyAuthenticator( 863 token_provider=( 864 token_provider 865 if token_provider is not None 866 else InterpolatedStringTokenProvider( 867 api_token=model.api_token or "", 868 config=config, 869 parameters=model.parameters or {}, 870 ) 871 ), 872 request_option=request_option, 873 config=config, 874 parameters=model.parameters or {}, 875 )
def
create_legacy_to_per_partition_state_migration( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.LegacyToPerPartitionStateMigration, config: Mapping[str, Any], declarative_stream: airbyte_cdk.sources.declarative.models.declarative_component_schema.DeclarativeStream) -> airbyte_cdk.LegacyToPerPartitionStateMigration:
877 def create_legacy_to_per_partition_state_migration( 878 self, 879 model: LegacyToPerPartitionStateMigrationModel, 880 config: Mapping[str, Any], 881 declarative_stream: DeclarativeStreamModel, 882 ) -> LegacyToPerPartitionStateMigration: 883 retriever = declarative_stream.retriever 884 if not isinstance(retriever, SimpleRetrieverModel): 885 raise ValueError( 886 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}" 887 ) 888 partition_router = retriever.partition_router 889 if not isinstance( 890 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 891 ): 892 raise ValueError( 893 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 894 ) 895 if not hasattr(partition_router, "parent_stream_configs"): 896 raise ValueError( 897 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 898 ) 899 900 if not hasattr(declarative_stream, "incremental_sync"): 901 raise ValueError( 902 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 903 ) 904 905 return LegacyToPerPartitionStateMigration( 906 partition_router, # type: ignore # was already checked above 907 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 908 config, 909 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 910 )
def
create_session_token_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SessionTokenAuthenticator, config: Mapping[str, Any], name: str, **kwargs: Any) -> Union[airbyte_cdk.ApiKeyAuthenticator, airbyte_cdk.BearerAuthenticator]:
912 def create_session_token_authenticator( 913 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 914 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 915 decoder = ( 916 self._create_component_from_model(model=model.decoder, config=config) 917 if model.decoder 918 else JsonDecoder(parameters={}) 919 ) 920 login_requester = self._create_component_from_model( 921 model=model.login_requester, 922 config=config, 923 name=f"{name}_login_requester", 924 decoder=decoder, 925 ) 926 token_provider = SessionTokenProvider( 927 login_requester=login_requester, 928 session_token_path=model.session_token_path, 929 expiration_duration=parse_duration(model.expiration_duration) 930 if model.expiration_duration 931 else None, 932 parameters=model.parameters or {}, 933 message_repository=self._message_repository, 934 decoder=decoder, 935 ) 936 if model.request_authentication.type == "Bearer": 937 return ModelToComponentFactory.create_bearer_authenticator( 938 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 939 config, 940 token_provider=token_provider, 941 ) 942 else: 943 return self.create_api_key_authenticator( 944 ApiKeyAuthenticatorModel( 945 type="ApiKeyAuthenticator", 946 api_token="", 947 inject_into=model.request_authentication.inject_into, 948 ), # type: ignore # $parameters and headers default to None 949 config=config, 950 token_provider=token_provider, 951 )
@staticmethod
def
create_basic_http_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.BasicHttpAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.BasicHttpAuthenticator:
953 @staticmethod 954 def create_basic_http_authenticator( 955 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 956 ) -> BasicHttpAuthenticator: 957 return BasicHttpAuthenticator( 958 password=model.password or "", 959 username=model.username, 960 config=config, 961 parameters=model.parameters or {}, 962 )
@staticmethod
def
create_bearer_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.BearerAuthenticator, config: Mapping[str, Any], token_provider: Optional[airbyte_cdk.sources.declarative.auth.token_provider.TokenProvider] = None, **kwargs: Any) -> airbyte_cdk.BearerAuthenticator:
964 @staticmethod 965 def create_bearer_authenticator( 966 model: BearerAuthenticatorModel, 967 config: Config, 968 token_provider: Optional[TokenProvider] = None, 969 **kwargs: Any, 970 ) -> BearerAuthenticator: 971 if token_provider is not None and model.api_token != "": 972 raise ValueError( 973 "If token_provider is set, api_token is ignored and has to be set to empty string." 974 ) 975 return BearerAuthenticator( 976 token_provider=( 977 token_provider 978 if token_provider is not None 979 else InterpolatedStringTokenProvider( 980 api_token=model.api_token or "", 981 config=config, 982 parameters=model.parameters or {}, 983 ) 984 ), 985 config=config, 986 parameters=model.parameters or {}, 987 )
@staticmethod
def
create_dynamic_stream_check_config( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DynamicStreamCheckConfig, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.checks.DynamicStreamCheckConfig:
989 @staticmethod 990 def create_dynamic_stream_check_config( 991 model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any 992 ) -> DynamicStreamCheckConfig: 993 return DynamicStreamCheckConfig( 994 dynamic_stream_name=model.dynamic_stream_name, 995 stream_count=model.stream_count or 0, 996 )
def
create_check_stream( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CheckStream, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.checks.CheckStream:
998 def create_check_stream( 999 self, model: CheckStreamModel, config: Config, **kwargs: Any 1000 ) -> CheckStream: 1001 if model.dynamic_streams_check_configs is None and model.stream_names is None: 1002 raise ValueError( 1003 "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream" 1004 ) 1005 1006 dynamic_streams_check_configs = ( 1007 [ 1008 self._create_component_from_model(model=dynamic_stream_check_config, config=config) 1009 for dynamic_stream_check_config in model.dynamic_streams_check_configs 1010 ] 1011 if model.dynamic_streams_check_configs 1012 else [] 1013 ) 1014 1015 return CheckStream( 1016 stream_names=model.stream_names or [], 1017 dynamic_streams_check_configs=dynamic_streams_check_configs, 1018 parameters={}, 1019 )
@staticmethod
def
create_check_dynamic_stream( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CheckDynamicStream, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.checks.CheckDynamicStream:
1021 @staticmethod 1022 def create_check_dynamic_stream( 1023 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 1024 ) -> CheckDynamicStream: 1025 assert model.use_check_availability is not None # for mypy 1026 1027 use_check_availability = model.use_check_availability 1028 1029 return CheckDynamicStream( 1030 stream_count=model.stream_count, 1031 use_check_availability=use_check_availability, 1032 parameters={}, 1033 )
def
create_composite_error_handler( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CompositeErrorHandler, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.CompositeErrorHandler:
1035 def create_composite_error_handler( 1036 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 1037 ) -> CompositeErrorHandler: 1038 error_handlers = [ 1039 self._create_component_from_model(model=error_handler_model, config=config) 1040 for error_handler_model in model.error_handlers 1041 ] 1042 return CompositeErrorHandler( 1043 error_handlers=error_handlers, parameters=model.parameters or {} 1044 )
@staticmethod
def
create_concurrency_level( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ConcurrencyLevel, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.concurrency_level.ConcurrencyLevel:
1046 @staticmethod 1047 def create_concurrency_level( 1048 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 1049 ) -> ConcurrencyLevel: 1050 return ConcurrencyLevel( 1051 default_concurrency=model.default_concurrency, 1052 max_concurrency=model.max_concurrency, 1053 config=config, 1054 parameters={}, 1055 )
@staticmethod
def
apply_stream_state_migrations( stream_state_migrations: Optional[List[Any]], stream_state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
1057 @staticmethod 1058 def apply_stream_state_migrations( 1059 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 1060 ) -> MutableMapping[str, Any]: 1061 if stream_state_migrations: 1062 for state_migration in stream_state_migrations: 1063 if state_migration.should_migrate(stream_state): 1064 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 1065 stream_state = dict(state_migration.migrate(stream_state)) 1066 return stream_state
def
create_concurrent_cursor_from_datetime_based_cursor( self, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], stream_name: str, stream_namespace: Optional[str], config: Mapping[str, Any], message_repository: Optional[airbyte_cdk.MessageRepository] = None, runtime_lookback_window: Optional[datetime.timedelta] = None, stream_state_migrations: Optional[List[Any]] = None, **kwargs: Any) -> airbyte_cdk.ConcurrentCursor:
1068 def create_concurrent_cursor_from_datetime_based_cursor( 1069 self, 1070 model_type: Type[BaseModel], 1071 component_definition: ComponentDefinition, 1072 stream_name: str, 1073 stream_namespace: Optional[str], 1074 config: Config, 1075 message_repository: Optional[MessageRepository] = None, 1076 runtime_lookback_window: Optional[datetime.timedelta] = None, 1077 stream_state_migrations: Optional[List[Any]] = None, 1078 **kwargs: Any, 1079 ) -> ConcurrentCursor: 1080 # Per-partition incremental streams can dynamically create child cursors which will pass their current 1081 # state via the stream_state keyword argument. Incremental syncs without parent streams use the 1082 # incoming state and connector_state_manager that is initialized when the component factory is created 1083 stream_state = ( 1084 self._connector_state_manager.get_stream_state(stream_name, stream_namespace) 1085 if "stream_state" not in kwargs 1086 else kwargs["stream_state"] 1087 ) 1088 stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state) 1089 1090 component_type = component_definition.get("type") 1091 if component_definition.get("type") != model_type.__name__: 1092 raise ValueError( 1093 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1094 ) 1095 1096 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1097 1098 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1099 raise ValueError( 1100 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1101 ) 1102 1103 interpolated_cursor_field = InterpolatedString.create( 1104 datetime_based_cursor_model.cursor_field, 1105 parameters=datetime_based_cursor_model.parameters or {}, 1106 ) 1107 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1108 1109 interpolated_partition_field_start = InterpolatedString.create( 1110 datetime_based_cursor_model.partition_field_start or "start_time", 1111 parameters=datetime_based_cursor_model.parameters or {}, 1112 ) 1113 interpolated_partition_field_end = InterpolatedString.create( 1114 datetime_based_cursor_model.partition_field_end or "end_time", 1115 parameters=datetime_based_cursor_model.parameters or {}, 1116 ) 1117 1118 slice_boundary_fields = ( 1119 interpolated_partition_field_start.eval(config=config), 1120 interpolated_partition_field_end.eval(config=config), 1121 ) 1122 1123 datetime_format = datetime_based_cursor_model.datetime_format 1124 1125 cursor_granularity = ( 1126 parse_duration(datetime_based_cursor_model.cursor_granularity) 1127 if datetime_based_cursor_model.cursor_granularity 1128 else None 1129 ) 1130 1131 lookback_window = None 1132 interpolated_lookback_window = ( 1133 InterpolatedString.create( 1134 datetime_based_cursor_model.lookback_window, 1135 parameters=datetime_based_cursor_model.parameters or {}, 1136 ) 1137 if datetime_based_cursor_model.lookback_window 1138 else None 1139 ) 1140 if interpolated_lookback_window: 1141 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1142 if evaluated_lookback_window: 1143 lookback_window = parse_duration(evaluated_lookback_window) 1144 1145 connector_state_converter: DateTimeStreamStateConverter 1146 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1147 datetime_format=datetime_format, 1148 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1149 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1150 cursor_granularity=cursor_granularity, 1151 ) 1152 1153 # Adjusts the stream state by applying the runtime lookback window. 1154 # This is used to ensure correct state handling in case of failed partitions. 1155 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1156 if runtime_lookback_window and stream_state_value: 1157 new_stream_state = ( 1158 connector_state_converter.parse_timestamp(stream_state_value) 1159 - runtime_lookback_window 1160 ) 1161 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1162 new_stream_state 1163 ) 1164 1165 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1166 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1167 start_date_runtime_value = self.create_min_max_datetime( 1168 model=datetime_based_cursor_model.start_datetime, config=config 1169 ) 1170 else: 1171 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1172 1173 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1174 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1175 end_date_runtime_value = self.create_min_max_datetime( 1176 model=datetime_based_cursor_model.end_datetime, config=config 1177 ) 1178 else: 1179 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1180 1181 interpolated_start_date = MinMaxDatetime.create( 1182 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1183 parameters=datetime_based_cursor_model.parameters, 1184 ) 1185 interpolated_end_date = ( 1186 None 1187 if not end_date_runtime_value 1188 else MinMaxDatetime.create( 1189 end_date_runtime_value, datetime_based_cursor_model.parameters 1190 ) 1191 ) 1192 1193 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1194 if not interpolated_start_date.datetime_format: 1195 interpolated_start_date.datetime_format = datetime_format 1196 if interpolated_end_date and not interpolated_end_date.datetime_format: 1197 interpolated_end_date.datetime_format = datetime_format 1198 1199 start_date = interpolated_start_date.get_datetime(config=config) 1200 end_date_provider = ( 1201 partial(interpolated_end_date.get_datetime, config) 1202 if interpolated_end_date 1203 else connector_state_converter.get_end_provider() 1204 ) 1205 1206 if ( 1207 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1208 ) or ( 1209 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1210 ): 1211 raise ValueError( 1212 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1213 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1214 ) 1215 1216 # When step is not defined, default to a step size from the starting date to the present moment 1217 step_length = datetime.timedelta.max 1218 interpolated_step = ( 1219 InterpolatedString.create( 1220 datetime_based_cursor_model.step, 1221 parameters=datetime_based_cursor_model.parameters or {}, 1222 ) 1223 if datetime_based_cursor_model.step 1224 else None 1225 ) 1226 if interpolated_step: 1227 evaluated_step = interpolated_step.eval(config) 1228 if evaluated_step: 1229 step_length = parse_duration(evaluated_step) 1230 1231 clamping_strategy: ClampingStrategy = NoClamping() 1232 if datetime_based_cursor_model.clamping: 1233 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1234 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1235 # object which we want to keep agnostic of being low-code 1236 target = InterpolatedString( 1237 string=datetime_based_cursor_model.clamping.target, 1238 parameters=datetime_based_cursor_model.parameters or {}, 1239 ) 1240 evaluated_target = target.eval(config=config) 1241 match evaluated_target: 1242 case "DAY": 1243 clamping_strategy = DayClampingStrategy() 1244 end_date_provider = ClampingEndProvider( 1245 DayClampingStrategy(is_ceiling=False), 1246 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1247 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1248 ) 1249 case "WEEK": 1250 if ( 1251 not datetime_based_cursor_model.clamping.target_details 1252 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1253 ): 1254 raise ValueError( 1255 "Given WEEK clamping, weekday needs to be provided as target_details" 1256 ) 1257 weekday = self._assemble_weekday( 1258 datetime_based_cursor_model.clamping.target_details["weekday"] 1259 ) 1260 clamping_strategy = WeekClampingStrategy(weekday) 1261 end_date_provider = ClampingEndProvider( 1262 WeekClampingStrategy(weekday, is_ceiling=False), 1263 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1264 granularity=cursor_granularity or datetime.timedelta(days=1), 1265 ) 1266 case "MONTH": 1267 clamping_strategy = MonthClampingStrategy() 1268 end_date_provider = ClampingEndProvider( 1269 MonthClampingStrategy(is_ceiling=False), 1270 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1271 granularity=cursor_granularity or datetime.timedelta(days=1), 1272 ) 1273 case _: 1274 raise ValueError( 1275 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1276 ) 1277 1278 return ConcurrentCursor( 1279 stream_name=stream_name, 1280 stream_namespace=stream_namespace, 1281 stream_state=stream_state, 1282 message_repository=message_repository or self._message_repository, 1283 connector_state_manager=self._connector_state_manager, 1284 connector_state_converter=connector_state_converter, 1285 cursor_field=cursor_field, 1286 slice_boundary_fields=slice_boundary_fields, 1287 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1288 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1289 lookback_window=lookback_window, 1290 slice_range=step_length, 1291 cursor_granularity=cursor_granularity, 1292 clamping_strategy=clamping_strategy, 1293 )
def
create_concurrent_cursor_from_incrementing_count_cursor( self, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], stream_name: str, stream_namespace: Optional[str], config: Mapping[str, Any], message_repository: Optional[airbyte_cdk.MessageRepository] = None, **kwargs: Any) -> airbyte_cdk.ConcurrentCursor:
1295 def create_concurrent_cursor_from_incrementing_count_cursor( 1296 self, 1297 model_type: Type[BaseModel], 1298 component_definition: ComponentDefinition, 1299 stream_name: str, 1300 stream_namespace: Optional[str], 1301 config: Config, 1302 message_repository: Optional[MessageRepository] = None, 1303 **kwargs: Any, 1304 ) -> ConcurrentCursor: 1305 # Per-partition incremental streams can dynamically create child cursors which will pass their current 1306 # state via the stream_state keyword argument. Incremental syncs without parent streams use the 1307 # incoming state and connector_state_manager that is initialized when the component factory is created 1308 stream_state = ( 1309 self._connector_state_manager.get_stream_state(stream_name, stream_namespace) 1310 if "stream_state" not in kwargs 1311 else kwargs["stream_state"] 1312 ) 1313 1314 component_type = component_definition.get("type") 1315 if component_definition.get("type") != model_type.__name__: 1316 raise ValueError( 1317 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1318 ) 1319 1320 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1321 1322 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1323 raise ValueError( 1324 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1325 ) 1326 1327 interpolated_start_value = ( 1328 InterpolatedString.create( 1329 incrementing_count_cursor_model.start_value, # type: ignore 1330 parameters=incrementing_count_cursor_model.parameters or {}, 1331 ) 1332 if incrementing_count_cursor_model.start_value 1333 else 0 1334 ) 1335 1336 interpolated_cursor_field = InterpolatedString.create( 1337 incrementing_count_cursor_model.cursor_field, 1338 parameters=incrementing_count_cursor_model.parameters or {}, 1339 ) 1340 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1341 1342 connector_state_converter = IncrementingCountStreamStateConverter( 1343 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1344 ) 1345 1346 return ConcurrentCursor( 1347 stream_name=stream_name, 1348 stream_namespace=stream_namespace, 1349 stream_state=stream_state, 1350 message_repository=message_repository or self._message_repository, 1351 connector_state_manager=self._connector_state_manager, 1352 connector_state_converter=connector_state_converter, 1353 cursor_field=cursor_field, 1354 slice_boundary_fields=None, 1355 start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1356 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1357 )
def
create_concurrent_cursor_from_perpartition_cursor( self, state_manager: airbyte_cdk.ConnectorStateManager, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], stream_name: str, stream_namespace: Optional[str], config: Mapping[str, Any], stream_state: MutableMapping[str, Any], partition_router: airbyte_cdk.sources.declarative.partition_routers.PartitionRouter, stream_state_migrations: Optional[List[Any]] = None, **kwargs: Any) -> airbyte_cdk.sources.declarative.incremental.ConcurrentPerPartitionCursor:
1378 def create_concurrent_cursor_from_perpartition_cursor( 1379 self, 1380 state_manager: ConnectorStateManager, 1381 model_type: Type[BaseModel], 1382 component_definition: ComponentDefinition, 1383 stream_name: str, 1384 stream_namespace: Optional[str], 1385 config: Config, 1386 stream_state: MutableMapping[str, Any], 1387 partition_router: PartitionRouter, 1388 stream_state_migrations: Optional[List[Any]] = None, 1389 **kwargs: Any, 1390 ) -> ConcurrentPerPartitionCursor: 1391 component_type = component_definition.get("type") 1392 if component_definition.get("type") != model_type.__name__: 1393 raise ValueError( 1394 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1395 ) 1396 1397 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1398 1399 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1400 raise ValueError( 1401 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1402 ) 1403 1404 interpolated_cursor_field = InterpolatedString.create( 1405 datetime_based_cursor_model.cursor_field, 1406 parameters=datetime_based_cursor_model.parameters or {}, 1407 ) 1408 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1409 1410 datetime_format = datetime_based_cursor_model.datetime_format 1411 1412 cursor_granularity = ( 1413 parse_duration(datetime_based_cursor_model.cursor_granularity) 1414 if datetime_based_cursor_model.cursor_granularity 1415 else None 1416 ) 1417 1418 connector_state_converter: DateTimeStreamStateConverter 1419 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1420 datetime_format=datetime_format, 1421 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1422 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1423 cursor_granularity=cursor_granularity, 1424 ) 1425 1426 # Create the cursor factory 1427 cursor_factory = ConcurrentCursorFactory( 1428 partial( 1429 self.create_concurrent_cursor_from_datetime_based_cursor, 1430 state_manager=state_manager, 1431 model_type=model_type, 1432 component_definition=component_definition, 1433 stream_name=stream_name, 1434 stream_namespace=stream_namespace, 1435 config=config, 1436 message_repository=NoopMessageRepository(), 1437 stream_state_migrations=stream_state_migrations, 1438 ) 1439 ) 1440 stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state) 1441 1442 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state 1443 use_global_cursor = isinstance( 1444 partition_router, GroupingPartitionRouter 1445 ) or component_definition.get("global_substream_cursor", False) 1446 1447 # Return the concurrent cursor and state converter 1448 return ConcurrentPerPartitionCursor( 1449 cursor_factory=cursor_factory, 1450 partition_router=partition_router, 1451 stream_name=stream_name, 1452 stream_namespace=stream_namespace, 1453 stream_state=stream_state, 1454 message_repository=self._message_repository, # type: ignore 1455 connector_state_manager=state_manager, 1456 connector_state_converter=connector_state_converter, 1457 cursor_field=cursor_field, 1458 use_global_cursor=use_global_cursor, 1459 )
@staticmethod
def
create_constant_backoff_strategy( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ConstantBackoffStrategy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.ConstantBackoffStrategy:
1461 @staticmethod 1462 def create_constant_backoff_strategy( 1463 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1464 ) -> ConstantBackoffStrategy: 1465 return ConstantBackoffStrategy( 1466 backoff_time_in_seconds=model.backoff_time_in_seconds, 1467 config=config, 1468 parameters=model.parameters or {}, 1469 )
def
create_cursor_pagination( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CursorPagination, config: Mapping[str, Any], decoder: airbyte_cdk.Decoder, **kwargs: Any) -> airbyte_cdk.CursorPaginationStrategy:
1471 def create_cursor_pagination( 1472 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1473 ) -> CursorPaginationStrategy: 1474 if isinstance(decoder, PaginationDecoderDecorator): 1475 inner_decoder = decoder.decoder 1476 else: 1477 inner_decoder = decoder 1478 decoder = PaginationDecoderDecorator(decoder=decoder) 1479 1480 if self._is_supported_decoder_for_pagination(inner_decoder): 1481 decoder_to_use = decoder 1482 else: 1483 raise ValueError( 1484 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1485 ) 1486 1487 return CursorPaginationStrategy( 1488 cursor_value=model.cursor_value, 1489 decoder=decoder_to_use, 1490 page_size=model.page_size, 1491 stop_condition=model.stop_condition, 1492 config=config, 1493 parameters=model.parameters or {}, 1494 )
def
create_custom_component(self, model: Any, config: Mapping[str, Any], **kwargs: Any) -> Any:
1496 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1497 """ 1498 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1499 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1500 :param model: The Pydantic model of the custom component being created 1501 :param config: The custom defined connector config 1502 :return: The declarative component built from the Pydantic model to be used at runtime 1503 """ 1504 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1505 component_fields = get_type_hints(custom_component_class) 1506 model_args = model.dict() 1507 model_args["config"] = config 1508 1509 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1510 # we defer to these arguments over the component's definition 1511 for key, arg in kwargs.items(): 1512 model_args[key] = arg 1513 1514 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1515 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1516 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1517 for model_field, model_value in model_args.items(): 1518 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1519 if ( 1520 isinstance(model_value, dict) 1521 and "type" not in model_value 1522 and model_field in component_fields 1523 ): 1524 derived_type = self._derive_component_type_from_type_hints( 1525 component_fields.get(model_field) 1526 ) 1527 if derived_type: 1528 model_value["type"] = derived_type 1529 1530 if self._is_component(model_value): 1531 model_args[model_field] = self._create_nested_component( 1532 model, model_field, model_value, config 1533 ) 1534 elif isinstance(model_value, list): 1535 vals = [] 1536 for v in model_value: 1537 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1538 derived_type = self._derive_component_type_from_type_hints( 1539 component_fields.get(model_field) 1540 ) 1541 if derived_type: 1542 v["type"] = derived_type 1543 if self._is_component(v): 1544 vals.append(self._create_nested_component(model, model_field, v, config)) 1545 else: 1546 vals.append(v) 1547 model_args[model_field] = vals 1548 1549 kwargs = { 1550 class_field: model_args[class_field] 1551 for class_field in component_fields.keys() 1552 if class_field in model_args 1553 } 1554 return custom_component_class(**kwargs)
Generically creates a custom component based on the model type and a class_name reference to the custom Python class being instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
Parameters
- model: The Pydantic model of the custom component being created
- config: The custom defined connector config
Returns
The declarative component built from the Pydantic model to be used at runtime
def
create_datetime_based_cursor( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DatetimeBasedCursor, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DatetimeBasedCursor:
1686 def create_datetime_based_cursor( 1687 self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any 1688 ) -> DatetimeBasedCursor: 1689 start_datetime: Union[str, MinMaxDatetime] = ( 1690 model.start_datetime 1691 if isinstance(model.start_datetime, str) 1692 else self.create_min_max_datetime(model.start_datetime, config) 1693 ) 1694 end_datetime: Union[str, MinMaxDatetime, None] = None 1695 if model.is_data_feed and model.end_datetime: 1696 raise ValueError("Data feed does not support end_datetime") 1697 if model.is_data_feed and model.is_client_side_incremental: 1698 raise ValueError( 1699 "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them." 1700 ) 1701 if model.end_datetime: 1702 end_datetime = ( 1703 model.end_datetime 1704 if isinstance(model.end_datetime, str) 1705 else self.create_min_max_datetime(model.end_datetime, config) 1706 ) 1707 1708 end_time_option = ( 1709 self._create_component_from_model( 1710 model.end_time_option, config, parameters=model.parameters or {} 1711 ) 1712 if model.end_time_option 1713 else None 1714 ) 1715 start_time_option = ( 1716 self._create_component_from_model( 1717 model.start_time_option, config, parameters=model.parameters or {} 1718 ) 1719 if model.start_time_option 1720 else None 1721 ) 1722 1723 return DatetimeBasedCursor( 1724 cursor_field=model.cursor_field, 1725 cursor_datetime_formats=model.cursor_datetime_formats 1726 if model.cursor_datetime_formats 1727 else [], 1728 cursor_granularity=model.cursor_granularity, 1729 datetime_format=model.datetime_format, 1730 end_datetime=end_datetime, 1731 start_datetime=start_datetime, 1732 step=model.step, 1733 end_time_option=end_time_option, 1734 lookback_window=model.lookback_window, 1735 start_time_option=start_time_option, 1736 partition_field_end=model.partition_field_end, 1737 partition_field_start=model.partition_field_start, 1738 message_repository=self._message_repository, 1739 is_compare_strictly=model.is_compare_strictly, 1740 config=config, 1741 parameters=model.parameters or {}, 1742 )
def
create_declarative_stream( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DeclarativeStream, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DeclarativeStream:
1744 def create_declarative_stream( 1745 self, model: DeclarativeStreamModel, config: Config, **kwargs: Any 1746 ) -> DeclarativeStream: 1747 # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field 1748 # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the 1749 # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in 1750 # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one. 1751 combined_slicers = self._merge_stream_slicers(model=model, config=config) 1752 1753 primary_key = model.primary_key.__root__ if model.primary_key else None 1754 stop_condition_on_cursor = ( 1755 model.incremental_sync 1756 and hasattr(model.incremental_sync, "is_data_feed") 1757 and model.incremental_sync.is_data_feed 1758 ) 1759 client_side_incremental_sync = None 1760 if ( 1761 model.incremental_sync 1762 and hasattr(model.incremental_sync, "is_client_side_incremental") 1763 and model.incremental_sync.is_client_side_incremental 1764 ): 1765 supported_slicers = ( 1766 DatetimeBasedCursor, 1767 GlobalSubstreamCursor, 1768 PerPartitionWithGlobalCursor, 1769 ) 1770 if combined_slicers and not isinstance(combined_slicers, supported_slicers): 1771 raise ValueError( 1772 "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead" 1773 ) 1774 cursor = ( 1775 combined_slicers 1776 if isinstance( 1777 combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor) 1778 ) 1779 else self._create_component_from_model(model=model.incremental_sync, config=config) 1780 ) 1781 1782 client_side_incremental_sync = {"cursor": cursor} 1783 1784 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1785 cursor_model = model.incremental_sync 1786 1787 end_time_option = ( 1788 self._create_component_from_model( 1789 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 1790 ) 1791 if cursor_model.end_time_option 1792 else None 1793 ) 1794 start_time_option = ( 1795 self._create_component_from_model( 1796 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 1797 ) 1798 if cursor_model.start_time_option 1799 else None 1800 ) 1801 1802 request_options_provider = DatetimeBasedRequestOptionsProvider( 1803 start_time_option=start_time_option, 1804 end_time_option=end_time_option, 1805 partition_field_start=cursor_model.partition_field_end, 1806 partition_field_end=cursor_model.partition_field_end, 1807 config=config, 1808 parameters=model.parameters or {}, 1809 ) 1810 elif model.incremental_sync and isinstance( 1811 model.incremental_sync, IncrementingCountCursorModel 1812 ): 1813 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 1814 1815 start_time_option = ( 1816 self._create_component_from_model( 1817 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 1818 config, 1819 parameters=cursor_model.parameters or {}, 1820 ) 1821 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 1822 else None 1823 ) 1824 1825 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 1826 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 1827 partition_field_start = "start" 1828 1829 request_options_provider = DatetimeBasedRequestOptionsProvider( 1830 start_time_option=start_time_option, 1831 partition_field_start=partition_field_start, 1832 config=config, 1833 parameters=model.parameters or {}, 1834 ) 1835 else: 1836 request_options_provider = None 1837 1838 transformations = [] 1839 if model.transformations: 1840 for transformation_model in model.transformations: 1841 transformations.append( 1842 self._create_component_from_model(model=transformation_model, config=config) 1843 ) 1844 1845 retriever = self._create_component_from_model( 1846 model=model.retriever, 1847 config=config, 1848 name=model.name, 1849 primary_key=primary_key, 1850 stream_slicer=combined_slicers, 1851 request_options_provider=request_options_provider, 1852 stop_condition_on_cursor=stop_condition_on_cursor, 1853 client_side_incremental_sync=client_side_incremental_sync, 1854 transformations=transformations, 1855 incremental_sync=model.incremental_sync, 1856 ) 1857 cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None 1858 1859 if model.state_migrations: 1860 state_transformations = [ 1861 self._create_component_from_model(state_migration, config, declarative_stream=model) 1862 for state_migration in model.state_migrations 1863 ] 1864 else: 1865 state_transformations = [] 1866 1867 if model.schema_loader: 1868 schema_loader = self._create_component_from_model( 1869 model=model.schema_loader, config=config 1870 ) 1871 else: 1872 options = model.parameters or {} 1873 if "name" not in options: 1874 options["name"] = model.name 1875 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 1876 1877 return DeclarativeStream( 1878 name=model.name or "", 1879 primary_key=primary_key, 1880 retriever=retriever, 1881 schema_loader=schema_loader, 1882 stream_cursor_field=cursor_field or "", 1883 state_migrations=state_transformations, 1884 config=config, 1885 parameters=model.parameters or {}, 1886 )
def
create_default_error_handler( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DefaultErrorHandler, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.DefaultErrorHandler:
2038 def create_default_error_handler( 2039 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 2040 ) -> DefaultErrorHandler: 2041 backoff_strategies = [] 2042 if model.backoff_strategies: 2043 for backoff_strategy_model in model.backoff_strategies: 2044 backoff_strategies.append( 2045 self._create_component_from_model(model=backoff_strategy_model, config=config) 2046 ) 2047 2048 response_filters = [] 2049 if model.response_filters: 2050 for response_filter_model in model.response_filters: 2051 response_filters.append( 2052 self._create_component_from_model(model=response_filter_model, config=config) 2053 ) 2054 response_filters.append( 2055 HttpResponseFilter(config=config, parameters=model.parameters or {}) 2056 ) 2057 2058 return DefaultErrorHandler( 2059 backoff_strategies=backoff_strategies, 2060 max_retries=model.max_retries, 2061 response_filters=response_filters, 2062 config=config, 2063 parameters=model.parameters or {}, 2064 )
def
create_default_paginator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DefaultPaginator, config: Mapping[str, Any], *, url_base: str, extractor_model: Union[airbyte_cdk.sources.declarative.models.declarative_component_schema.CustomRecordExtractor, airbyte_cdk.sources.declarative.models.declarative_component_schema.DpathExtractor, NoneType] = None, decoder: Optional[airbyte_cdk.Decoder] = None, cursor_used_for_stop_condition: Optional[airbyte_cdk.sources.declarative.incremental.DeclarativeCursor] = None) -> Union[airbyte_cdk.DefaultPaginator, airbyte_cdk.sources.declarative.requesters.paginators.PaginatorTestReadDecorator]:
2066 def create_default_paginator( 2067 self, 2068 model: DefaultPaginatorModel, 2069 config: Config, 2070 *, 2071 url_base: str, 2072 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2073 decoder: Optional[Decoder] = None, 2074 cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None, 2075 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 2076 if decoder: 2077 if self._is_supported_decoder_for_pagination(decoder): 2078 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 2079 else: 2080 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 2081 else: 2082 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 2083 page_size_option = ( 2084 self._create_component_from_model(model=model.page_size_option, config=config) 2085 if model.page_size_option 2086 else None 2087 ) 2088 page_token_option = ( 2089 self._create_component_from_model(model=model.page_token_option, config=config) 2090 if model.page_token_option 2091 else None 2092 ) 2093 pagination_strategy = self._create_component_from_model( 2094 model=model.pagination_strategy, 2095 config=config, 2096 decoder=decoder_to_use, 2097 extractor_model=extractor_model, 2098 ) 2099 if cursor_used_for_stop_condition: 2100 pagination_strategy = StopConditionPaginationStrategyDecorator( 2101 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2102 ) 2103 paginator = DefaultPaginator( 2104 decoder=decoder_to_use, 2105 page_size_option=page_size_option, 2106 page_token_option=page_token_option, 2107 pagination_strategy=pagination_strategy, 2108 url_base=url_base, 2109 config=config, 2110 parameters=model.parameters or {}, 2111 ) 2112 if self._limit_pages_fetched_per_slice: 2113 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2114 return paginator
def
create_dpath_extractor( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DpathExtractor, config: Mapping[str, Any], decoder: Optional[airbyte_cdk.Decoder] = None, **kwargs: Any) -> airbyte_cdk.DpathExtractor:
2116 def create_dpath_extractor( 2117 self, 2118 model: DpathExtractorModel, 2119 config: Config, 2120 decoder: Optional[Decoder] = None, 2121 **kwargs: Any, 2122 ) -> DpathExtractor: 2123 if decoder: 2124 decoder_to_use = decoder 2125 else: 2126 decoder_to_use = JsonDecoder(parameters={}) 2127 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2128 return DpathExtractor( 2129 decoder=decoder_to_use, 2130 field_path=model_field_path, 2131 config=config, 2132 parameters=model.parameters or {}, 2133 )
@staticmethod
def
create_response_to_file_extractor( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ResponseToFileExtractor, **kwargs: Any) -> airbyte_cdk.sources.declarative.extractors.ResponseToFileExtractor:
@staticmethod
def
create_exponential_backoff_strategy( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ExponentialBackoffStrategy, config: Mapping[str, Any]) -> airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.ExponentialBackoffStrategy:
@staticmethod
def
create_group_by_key( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.GroupByKeyMergeStrategy, config: Mapping[str, Any]) -> airbyte_cdk.sources.declarative.requesters.query_properties.strategies.GroupByKey:
def
create_http_requester( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpRequester, config: Mapping[str, Any], decoder: airbyte_cdk.Decoder = JsonDecoder(), query_properties_key: Optional[str] = None, use_cache: Optional[bool] = None, *, name: str) -> airbyte_cdk.HttpRequester:
2154 def create_http_requester( 2155 self, 2156 model: HttpRequesterModel, 2157 config: Config, 2158 decoder: Decoder = JsonDecoder(parameters={}), 2159 query_properties_key: Optional[str] = None, 2160 use_cache: Optional[bool] = None, 2161 *, 2162 name: str, 2163 ) -> HttpRequester: 2164 authenticator = ( 2165 self._create_component_from_model( 2166 model=model.authenticator, 2167 config=config, 2168 url_base=model.url_base, 2169 name=name, 2170 decoder=decoder, 2171 ) 2172 if model.authenticator 2173 else None 2174 ) 2175 error_handler = ( 2176 self._create_component_from_model(model=model.error_handler, config=config) 2177 if model.error_handler 2178 else DefaultErrorHandler( 2179 backoff_strategies=[], 2180 response_filters=[], 2181 config=config, 2182 parameters=model.parameters or {}, 2183 ) 2184 ) 2185 2186 api_budget = self._api_budget 2187 2188 request_options_provider = InterpolatedRequestOptionsProvider( 2189 request_body_data=model.request_body_data, 2190 request_body_json=model.request_body_json, 2191 request_headers=model.request_headers, 2192 request_parameters=model.request_parameters, 2193 query_properties_key=query_properties_key, 2194 config=config, 2195 parameters=model.parameters or {}, 2196 ) 2197 2198 assert model.use_cache is not None # for mypy 2199 assert model.http_method is not None # for mypy 2200 2201 should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache 2202 2203 return HttpRequester( 2204 name=name, 2205 url_base=model.url_base, 2206 path=model.path, 2207 authenticator=authenticator, 2208 error_handler=error_handler, 2209 api_budget=api_budget, 2210 http_method=HttpMethod[model.http_method.value], 2211 request_options_provider=request_options_provider, 2212 config=config, 2213 disable_retries=self._disable_retries, 2214 parameters=model.parameters or {}, 2215 message_repository=self._message_repository, 2216 use_cache=should_use_cache, 2217 decoder=decoder, 2218 stream_response=decoder.is_stream_response() if decoder else False, 2219 )
@staticmethod
def
create_http_response_filter( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpResponseFilter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.HttpResponseFilter:
2221 @staticmethod 2222 def create_http_response_filter( 2223 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2224 ) -> HttpResponseFilter: 2225 if model.action: 2226 action = ResponseAction(model.action.value) 2227 else: 2228 action = None 2229 2230 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2231 2232 http_codes = ( 2233 set(model.http_codes) if model.http_codes else set() 2234 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2235 2236 return HttpResponseFilter( 2237 action=action, 2238 failure_type=failure_type, 2239 error_message=model.error_message or "", 2240 error_message_contains=model.error_message_contains or "", 2241 http_codes=http_codes, 2242 predicate=model.predicate or "", 2243 config=config, 2244 parameters=model.parameters or {}, 2245 )
@staticmethod
def
create_inline_schema_loader( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.InlineSchemaLoader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.InlineSchemaLoader:
def
create_complex_field_type( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ComplexFieldType, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.ComplexFieldType:
2253 def create_complex_field_type( 2254 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2255 ) -> ComplexFieldType: 2256 items = ( 2257 self._create_component_from_model(model=model.items, config=config) 2258 if isinstance(model.items, ComplexFieldTypeModel) 2259 else model.items 2260 ) 2261 2262 return ComplexFieldType(field_type=model.field_type, items=items)
def
create_types_map( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.TypesMap, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.TypesMap:
2264 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2265 target_type = ( 2266 self._create_component_from_model(model=model.target_type, config=config) 2267 if isinstance(model.target_type, ComplexFieldTypeModel) 2268 else model.target_type 2269 ) 2270 2271 return TypesMap( 2272 target_type=target_type, 2273 current_type=model.current_type, 2274 condition=model.condition if model.condition is not None else "True", 2275 )
def
create_schema_type_identifier( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SchemaTypeIdentifier, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.SchemaTypeIdentifier:
2277 def create_schema_type_identifier( 2278 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2279 ) -> SchemaTypeIdentifier: 2280 types_mapping = [] 2281 if model.types_mapping: 2282 types_mapping.extend( 2283 [ 2284 self._create_component_from_model(types_map, config=config) 2285 for types_map in model.types_mapping 2286 ] 2287 ) 2288 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2289 [x for x in model.schema_pointer] if model.schema_pointer else [] 2290 ) 2291 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2292 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2293 [x for x in model.type_pointer] if model.type_pointer else None 2294 ) 2295 2296 return SchemaTypeIdentifier( 2297 schema_pointer=model_schema_pointer, 2298 key_pointer=model_key_pointer, 2299 type_pointer=model_type_pointer, 2300 types_mapping=types_mapping, 2301 parameters=model.parameters or {}, 2302 )
def
create_dynamic_schema_loader( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DynamicSchemaLoader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.DynamicSchemaLoader:
2304 def create_dynamic_schema_loader( 2305 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2306 ) -> DynamicSchemaLoader: 2307 stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config) 2308 combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer) 2309 2310 schema_transformations = [] 2311 if model.schema_transformations: 2312 for transformation_model in model.schema_transformations: 2313 schema_transformations.append( 2314 self._create_component_from_model(model=transformation_model, config=config) 2315 ) 2316 2317 retriever = self._create_component_from_model( 2318 model=model.retriever, 2319 config=config, 2320 name="dynamic_properties", 2321 primary_key=None, 2322 stream_slicer=combined_slicers, 2323 transformations=[], 2324 use_cache=True, 2325 ) 2326 schema_type_identifier = self._create_component_from_model( 2327 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2328 ) 2329 return DynamicSchemaLoader( 2330 retriever=retriever, 2331 config=config, 2332 schema_transformations=schema_transformations, 2333 schema_type_identifier=schema_type_identifier, 2334 parameters=model.parameters or {}, 2335 )
@staticmethod
def
create_json_decoder( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JsonDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
def
create_csv_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CsvDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
def
create_jsonl_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JsonlDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
def
create_gzip_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.GzipDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
2355 def create_gzip_decoder( 2356 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2357 ) -> Decoder: 2358 _compressed_response_types = { 2359 "gzip", 2360 "x-gzip", 2361 "gzip, deflate", 2362 "x-gzip, deflate", 2363 "application/zip", 2364 "application/gzip", 2365 "application/x-gzip", 2366 "application/x-zip-compressed", 2367 } 2368 2369 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2370 2371 if self._emit_connector_builder_messages: 2372 # This is very surprising but if the response is not streamed, 2373 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2374 # which uses urllib3 directly and does not uncompress the data. 2375 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2376 2377 return CompositeRawDecoder.by_headers( 2378 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2379 stream_response=True, 2380 fallback_parser=gzip_parser.inner_parser, 2381 )
@staticmethod
def
create_incrementing_count_cursor( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.IncrementingCountCursor, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DatetimeBasedCursor:
2383 @staticmethod 2384 def create_incrementing_count_cursor( 2385 model: IncrementingCountCursorModel, config: Config, **kwargs: Any 2386 ) -> DatetimeBasedCursor: 2387 # This should not actually get used anywhere at runtime, but needed to add this to pass checks since 2388 # we still parse models into components. The issue is that there's no runtime implementation of a 2389 # IncrementingCountCursor. 2390 # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor. 2391 return DatetimeBasedCursor( 2392 cursor_field=model.cursor_field, 2393 datetime_format="%Y-%m-%d", 2394 start_datetime="2024-12-12", 2395 config=config, 2396 parameters={}, 2397 )
@staticmethod
def
create_iterable_decoder( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.IterableDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.decoders.IterableDecoder:
@staticmethod
def
create_xml_decoder( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.XmlDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.decoders.XmlDecoder:
def
create_zipfile_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ZipfileDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.decoders.ZipfileDecoder:
@staticmethod
def
create_json_file_schema_loader( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JsonFileSchemaLoader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.JsonFileSchemaLoader:
@staticmethod
def
create_jwt_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JwtAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.auth.JwtAuthenticator:
2442 @staticmethod 2443 def create_jwt_authenticator( 2444 model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2445 ) -> JwtAuthenticator: 2446 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2447 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2448 return JwtAuthenticator( 2449 config=config, 2450 parameters=model.parameters or {}, 2451 algorithm=JwtAlgorithm(model.algorithm.value), 2452 secret_key=model.secret_key, 2453 base64_encode_secret_key=model.base64_encode_secret_key, 2454 token_duration=model.token_duration, 2455 header_prefix=model.header_prefix, 2456 kid=jwt_headers.kid, 2457 typ=jwt_headers.typ, 2458 cty=jwt_headers.cty, 2459 iss=jwt_payload.iss, 2460 sub=jwt_payload.sub, 2461 aud=jwt_payload.aud, 2462 additional_jwt_headers=model.additional_jwt_headers, 2463 additional_jwt_payload=model.additional_jwt_payload, 2464 )
def
create_list_partition_router( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ListPartitionRouter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.partition_routers.ListPartitionRouter:
2466 def create_list_partition_router( 2467 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2468 ) -> ListPartitionRouter: 2469 request_option = ( 2470 self._create_component_from_model(model.request_option, config) 2471 if model.request_option 2472 else None 2473 ) 2474 return ListPartitionRouter( 2475 cursor_field=model.cursor_field, 2476 request_option=request_option, 2477 values=model.values, 2478 config=config, 2479 parameters=model.parameters or {}, 2480 )
@staticmethod
def
create_min_max_datetime( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.MinMaxDatetime, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.MinMaxDatetime:
2482 @staticmethod 2483 def create_min_max_datetime( 2484 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2485 ) -> MinMaxDatetime: 2486 return MinMaxDatetime( 2487 datetime=model.datetime, 2488 datetime_format=model.datetime_format or "", 2489 max_datetime=model.max_datetime or "", 2490 min_datetime=model.min_datetime or "", 2491 parameters=model.parameters or {}, 2492 )
@staticmethod
def
create_no_auth( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.NoAuth, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.NoAuth:
@staticmethod
def
create_no_pagination( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.NoPagination, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.paginators.NoPagination:
def
create_oauth_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.OAuthAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DeclarativeOauth2Authenticator:
2504 def create_oauth_authenticator( 2505 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2506 ) -> DeclarativeOauth2Authenticator: 2507 profile_assertion = ( 2508 self._create_component_from_model(model.profile_assertion, config=config) 2509 if model.profile_assertion 2510 else None 2511 ) 2512 2513 if model.refresh_token_updater: 2514 # ignore type error because fixing it would have a lot of dependencies, revisit later 2515 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2516 config, 2517 InterpolatedString.create( 2518 model.token_refresh_endpoint, # type: ignore 2519 parameters=model.parameters or {}, 2520 ).eval(config), 2521 access_token_name=InterpolatedString.create( 2522 model.access_token_name or "access_token", parameters=model.parameters or {} 2523 ).eval(config), 2524 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2525 expires_in_name=InterpolatedString.create( 2526 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2527 ).eval(config), 2528 client_id_name=InterpolatedString.create( 2529 model.client_id_name or "client_id", parameters=model.parameters or {} 2530 ).eval(config), 2531 client_id=InterpolatedString.create( 2532 model.client_id, parameters=model.parameters or {} 2533 ).eval(config) 2534 if model.client_id 2535 else model.client_id, 2536 client_secret_name=InterpolatedString.create( 2537 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2538 ).eval(config), 2539 client_secret=InterpolatedString.create( 2540 model.client_secret, parameters=model.parameters or {} 2541 ).eval(config) 2542 if model.client_secret 2543 else model.client_secret, 2544 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2545 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2546 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2547 grant_type_name=InterpolatedString.create( 2548 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2549 ).eval(config), 2550 grant_type=InterpolatedString.create( 2551 model.grant_type or "refresh_token", parameters=model.parameters or {} 2552 ).eval(config), 2553 refresh_request_body=InterpolatedMapping( 2554 model.refresh_request_body or {}, parameters=model.parameters or {} 2555 ).eval(config), 2556 refresh_request_headers=InterpolatedMapping( 2557 model.refresh_request_headers or {}, parameters=model.parameters or {} 2558 ).eval(config), 2559 scopes=model.scopes, 2560 token_expiry_date_format=model.token_expiry_date_format, 2561 message_repository=self._message_repository, 2562 refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes, 2563 refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key, 2564 refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values, 2565 ) 2566 # ignore type error because fixing it would have a lot of dependencies, revisit later 2567 return DeclarativeOauth2Authenticator( # type: ignore 2568 access_token_name=model.access_token_name or "access_token", 2569 access_token_value=model.access_token_value, 2570 client_id_name=model.client_id_name or "client_id", 2571 client_id=model.client_id, 2572 client_secret_name=model.client_secret_name or "client_secret", 2573 client_secret=model.client_secret, 2574 expires_in_name=model.expires_in_name or "expires_in", 2575 grant_type_name=model.grant_type_name or "grant_type", 2576 grant_type=model.grant_type or "refresh_token", 2577 refresh_request_body=model.refresh_request_body, 2578 refresh_request_headers=model.refresh_request_headers, 2579 refresh_token_name=model.refresh_token_name or "refresh_token", 2580 refresh_token=model.refresh_token, 2581 scopes=model.scopes, 2582 token_expiry_date=model.token_expiry_date, 2583 token_expiry_date_format=model.token_expiry_date_format, 2584 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2585 token_refresh_endpoint=model.token_refresh_endpoint, 2586 config=config, 2587 parameters=model.parameters or {}, 2588 message_repository=self._message_repository, 2589 profile_assertion=profile_assertion, 2590 use_profile_assertion=model.use_profile_assertion, 2591 )
def
create_offset_increment( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.OffsetIncrement, config: Mapping[str, Any], decoder: airbyte_cdk.Decoder, extractor_model: Union[airbyte_cdk.sources.declarative.models.declarative_component_schema.CustomRecordExtractor, airbyte_cdk.sources.declarative.models.declarative_component_schema.DpathExtractor, NoneType] = None, **kwargs: Any) -> airbyte_cdk.OffsetIncrement:
2593 def create_offset_increment( 2594 self, 2595 model: OffsetIncrementModel, 2596 config: Config, 2597 decoder: Decoder, 2598 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2599 **kwargs: Any, 2600 ) -> OffsetIncrement: 2601 if isinstance(decoder, PaginationDecoderDecorator): 2602 inner_decoder = decoder.decoder 2603 else: 2604 inner_decoder = decoder 2605 decoder = PaginationDecoderDecorator(decoder=decoder) 2606 2607 if self._is_supported_decoder_for_pagination(inner_decoder): 2608 decoder_to_use = decoder 2609 else: 2610 raise ValueError( 2611 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2612 ) 2613 2614 # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever) 2615 # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the 2616 # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing 2617 # behavior by having two separate extractors with identical behavior since they use the same extractor model. 2618 # When we have more time to investigate we can look into reusing the same component. 2619 extractor = ( 2620 self._create_component_from_model( 2621 model=extractor_model, config=config, decoder=decoder_to_use 2622 ) 2623 if extractor_model 2624 else None 2625 ) 2626 2627 return OffsetIncrement( 2628 page_size=model.page_size, 2629 config=config, 2630 decoder=decoder_to_use, 2631 extractor=extractor, 2632 inject_on_first_request=model.inject_on_first_request or False, 2633 parameters=model.parameters or {}, 2634 )
@staticmethod
def
create_page_increment( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.PageIncrement, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.PageIncrement:
2636 @staticmethod 2637 def create_page_increment( 2638 model: PageIncrementModel, config: Config, **kwargs: Any 2639 ) -> PageIncrement: 2640 return PageIncrement( 2641 page_size=model.page_size, 2642 config=config, 2643 start_from_page=model.start_from_page or 0, 2644 inject_on_first_request=model.inject_on_first_request or False, 2645 parameters=model.parameters or {}, 2646 )
def
create_parent_stream_config( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ParentStreamConfig, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.ParentStreamConfig:
2648 def create_parent_stream_config( 2649 self, model: ParentStreamConfigModel, config: Config, **kwargs: Any 2650 ) -> ParentStreamConfig: 2651 declarative_stream = self._create_component_from_model( 2652 model.stream, config=config, **kwargs 2653 ) 2654 request_option = ( 2655 self._create_component_from_model(model.request_option, config=config) 2656 if model.request_option 2657 else None 2658 ) 2659 2660 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2661 raise ValueError( 2662 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2663 ) 2664 2665 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2666 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2667 ) 2668 2669 return ParentStreamConfig( 2670 parent_key=model.parent_key, 2671 request_option=request_option, 2672 stream=declarative_stream, 2673 partition_field=model.partition_field, 2674 config=config, 2675 incremental_dependency=model.incremental_dependency or False, 2676 parameters=model.parameters or {}, 2677 extra_fields=model.extra_fields, 2678 lazy_read_pointer=model_lazy_read_pointer, 2679 )
def
create_properties_from_endpoint( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.PropertiesFromEndpoint, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.query_properties.PropertiesFromEndpoint:
2681 def create_properties_from_endpoint( 2682 self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any 2683 ) -> PropertiesFromEndpoint: 2684 retriever = self._create_component_from_model( 2685 model=model.retriever, 2686 config=config, 2687 name="dynamic_properties", 2688 primary_key=None, 2689 stream_slicer=None, 2690 transformations=[], 2691 use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different 2692 ) 2693 return PropertiesFromEndpoint( 2694 property_field_path=model.property_field_path, 2695 retriever=retriever, 2696 config=config, 2697 parameters=model.parameters or {}, 2698 )
def
create_property_chunking( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.PropertyChunking, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.query_properties.PropertyChunking:
2700 def create_property_chunking( 2701 self, model: PropertyChunkingModel, config: Config, **kwargs: Any 2702 ) -> PropertyChunking: 2703 record_merge_strategy = ( 2704 self._create_component_from_model( 2705 model=model.record_merge_strategy, config=config, **kwargs 2706 ) 2707 if model.record_merge_strategy 2708 else None 2709 ) 2710 2711 property_limit_type: PropertyLimitType 2712 match model.property_limit_type: 2713 case PropertyLimitTypeModel.property_count: 2714 property_limit_type = PropertyLimitType.property_count 2715 case PropertyLimitTypeModel.characters: 2716 property_limit_type = PropertyLimitType.characters 2717 case _: 2718 raise ValueError(f"Invalid PropertyLimitType {property_limit_type}") 2719 2720 return PropertyChunking( 2721 property_limit_type=property_limit_type, 2722 property_limit=model.property_limit, 2723 record_merge_strategy=record_merge_strategy, 2724 config=config, 2725 parameters=model.parameters or {}, 2726 )
def
create_query_properties( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.QueryProperties, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.query_properties.QueryProperties:
2728 def create_query_properties( 2729 self, model: QueryPropertiesModel, config: Config, **kwargs: Any 2730 ) -> QueryProperties: 2731 if isinstance(model.property_list, list): 2732 property_list = model.property_list 2733 else: 2734 property_list = self._create_component_from_model( 2735 model=model.property_list, config=config, **kwargs 2736 ) 2737 2738 property_chunking = ( 2739 self._create_component_from_model( 2740 model=model.property_chunking, config=config, **kwargs 2741 ) 2742 if model.property_chunking 2743 else None 2744 ) 2745 2746 return QueryProperties( 2747 property_list=property_list, 2748 always_include_properties=model.always_include_properties, 2749 property_chunking=property_chunking, 2750 config=config, 2751 parameters=model.parameters or {}, 2752 )
@staticmethod
def
create_record_filter( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RecordFilter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.RecordFilter:
@staticmethod
def
create_request_path( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RequestPath, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.request_path.RequestPath:
@staticmethod
def
create_request_option( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RequestOption, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.RequestOption:
2766 @staticmethod 2767 def create_request_option( 2768 model: RequestOptionModel, config: Config, **kwargs: Any 2769 ) -> RequestOption: 2770 inject_into = RequestOptionType(model.inject_into.value) 2771 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 2772 [ 2773 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 2774 for segment in model.field_path 2775 ] 2776 if model.field_path 2777 else None 2778 ) 2779 field_name = ( 2780 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 2781 if model.field_name 2782 else None 2783 ) 2784 return RequestOption( 2785 field_name=field_name, 2786 field_path=field_path, 2787 inject_into=inject_into, 2788 parameters=kwargs.get("parameters", {}), 2789 )
def
create_record_selector( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RecordSelector, config: Mapping[str, Any], *, name: str, transformations: Optional[List[airbyte_cdk.RecordTransformation]] = None, decoder: airbyte_cdk.Decoder | None = None, client_side_incremental_sync: Optional[Dict[str, Any]] = None, **kwargs: Any) -> airbyte_cdk.RecordSelector:
2791 def create_record_selector( 2792 self, 2793 model: RecordSelectorModel, 2794 config: Config, 2795 *, 2796 name: str, 2797 transformations: List[RecordTransformation] | None = None, 2798 decoder: Decoder | None = None, 2799 client_side_incremental_sync: Dict[str, Any] | None = None, 2800 **kwargs: Any, 2801 ) -> RecordSelector: 2802 extractor = self._create_component_from_model( 2803 model=model.extractor, decoder=decoder, config=config 2804 ) 2805 record_filter = ( 2806 self._create_component_from_model(model.record_filter, config=config) 2807 if model.record_filter 2808 else None 2809 ) 2810 2811 assert model.transform_before_filtering is not None # for mypy 2812 2813 transform_before_filtering = model.transform_before_filtering 2814 if client_side_incremental_sync: 2815 record_filter = ClientSideIncrementalRecordFilterDecorator( 2816 config=config, 2817 parameters=model.parameters, 2818 condition=model.record_filter.condition 2819 if (model.record_filter and hasattr(model.record_filter, "condition")) 2820 else None, 2821 **client_side_incremental_sync, 2822 ) 2823 transform_before_filtering = True 2824 2825 schema_normalization = ( 2826 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 2827 if isinstance(model.schema_normalization, SchemaNormalizationModel) 2828 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 2829 ) 2830 2831 return RecordSelector( 2832 extractor=extractor, 2833 name=name, 2834 config=config, 2835 record_filter=record_filter, 2836 transformations=transformations or [], 2837 schema_normalization=schema_normalization, 2838 parameters=model.parameters or {}, 2839 transform_before_filtering=transform_before_filtering, 2840 )
@staticmethod
def
create_remove_fields( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RemoveFields, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.RemoveFields:
def
create_selective_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SelectiveAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DeclarativeAuthenticator:
2850 def create_selective_authenticator( 2851 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 2852 ) -> DeclarativeAuthenticator: 2853 authenticators = { 2854 name: self._create_component_from_model(model=auth, config=config) 2855 for name, auth in model.authenticators.items() 2856 } 2857 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 2858 return SelectiveAuthenticator( # type: ignore[abstract] 2859 config=config, 2860 authenticators=authenticators, 2861 authenticator_selection_path=model.authenticator_selection_path, 2862 **kwargs, 2863 )
@staticmethod
def
create_legacy_session_token_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.LegacySessionTokenAuthenticator, config: Mapping[str, Any], *, url_base: str, **kwargs: Any) -> airbyte_cdk.sources.declarative.auth.token.LegacySessionTokenAuthenticator:
2865 @staticmethod 2866 def create_legacy_session_token_authenticator( 2867 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 2868 ) -> LegacySessionTokenAuthenticator: 2869 return LegacySessionTokenAuthenticator( 2870 api_url=url_base, 2871 header=model.header, 2872 login_url=model.login_url, 2873 password=model.password or "", 2874 session_token=model.session_token or "", 2875 session_token_response_key=model.session_token_response_key or "", 2876 username=model.username or "", 2877 validate_session_url=model.validate_session_url, 2878 config=config, 2879 parameters=model.parameters or {}, 2880 )
def
create_simple_retriever( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SimpleRetriever, config: Mapping[str, Any], *, name: str, primary_key: Union[str, List[str], List[List[str]], NoneType], stream_slicer: Optional[airbyte_cdk.sources.declarative.stream_slicers.StreamSlicer], request_options_provider: Optional[airbyte_cdk.sources.declarative.requesters.request_options.RequestOptionsProvider] = None, stop_condition_on_cursor: bool = False, client_side_incremental_sync: Optional[Dict[str, Any]] = None, transformations: List[airbyte_cdk.RecordTransformation], incremental_sync: Union[airbyte_cdk.sources.declarative.models.declarative_component_schema.CustomIncrementalSync, airbyte_cdk.sources.declarative.models.declarative_component_schema.DatetimeBasedCursor, airbyte_cdk.sources.declarative.models.declarative_component_schema.IncrementingCountCursor, NoneType] = None, use_cache: Optional[bool] = None, **kwargs: Any) -> airbyte_cdk.SimpleRetriever:
2882 def create_simple_retriever( 2883 self, 2884 model: SimpleRetrieverModel, 2885 config: Config, 2886 *, 2887 name: str, 2888 primary_key: Optional[Union[str, List[str], List[List[str]]]], 2889 stream_slicer: Optional[StreamSlicer], 2890 request_options_provider: Optional[RequestOptionsProvider] = None, 2891 stop_condition_on_cursor: bool = False, 2892 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 2893 transformations: List[RecordTransformation], 2894 incremental_sync: Optional[ 2895 Union[ 2896 IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel 2897 ] 2898 ] = None, 2899 use_cache: Optional[bool] = None, 2900 **kwargs: Any, 2901 ) -> SimpleRetriever: 2902 decoder = ( 2903 self._create_component_from_model(model=model.decoder, config=config) 2904 if model.decoder 2905 else JsonDecoder(parameters={}) 2906 ) 2907 record_selector = self._create_component_from_model( 2908 model=model.record_selector, 2909 name=name, 2910 config=config, 2911 decoder=decoder, 2912 transformations=transformations, 2913 client_side_incremental_sync=client_side_incremental_sync, 2914 ) 2915 2916 query_properties: Optional[QueryProperties] = None 2917 query_properties_key: Optional[str] = None 2918 if ( 2919 hasattr(model.requester, "request_parameters") 2920 and model.requester.request_parameters 2921 and isinstance(model.requester.request_parameters, Mapping) 2922 ): 2923 query_properties_definitions = [] 2924 for key, request_parameter in model.requester.request_parameters.items(): 2925 # When translating JSON schema into Pydantic models, enforcing types for arrays containing both 2926 # concrete string complex object definitions like QueryProperties would get resolved to Union[str, Any]. 2927 # This adds the extra validation that we couldn't get for free in Pydantic model generation 2928 if ( 2929 isinstance(request_parameter, Mapping) 2930 and request_parameter.get("type") == "QueryProperties" 2931 ): 2932 query_properties_key = key 2933 query_properties_definitions.append(request_parameter) 2934 elif not isinstance(request_parameter, str): 2935 raise ValueError( 2936 f"Each element of request_parameters should be of type str or QueryProperties, but received {request_parameter.get('type')}" 2937 ) 2938 2939 if len(query_properties_definitions) > 1: 2940 raise ValueError( 2941 f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages" 2942 ) 2943 2944 if len(query_properties_definitions) == 1: 2945 query_properties = self.create_component( 2946 model_type=QueryPropertiesModel, 2947 component_definition=query_properties_definitions[0], 2948 config=config, 2949 ) 2950 2951 # Removes QueryProperties components from the interpolated mappings because it will be resolved in 2952 # the provider from the slice directly instead of through jinja interpolation 2953 if isinstance(model.requester.request_parameters, Mapping): 2954 model.requester.request_parameters = self._remove_query_properties( 2955 model.requester.request_parameters 2956 ) 2957 2958 requester = self._create_component_from_model( 2959 model=model.requester, 2960 decoder=decoder, 2961 name=name, 2962 query_properties_key=query_properties_key, 2963 use_cache=use_cache, 2964 config=config, 2965 ) 2966 url_base = ( 2967 model.requester.url_base 2968 if hasattr(model.requester, "url_base") 2969 else requester.get_url_base() 2970 ) 2971 2972 # Define cursor only if per partition or common incremental support is needed 2973 cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None 2974 2975 if ( 2976 not isinstance(stream_slicer, DatetimeBasedCursor) 2977 or type(stream_slicer) is not DatetimeBasedCursor 2978 ): 2979 # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods). 2980 # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement 2981 # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's 2982 # request_options_provider 2983 request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={}) 2984 elif not request_options_provider: 2985 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 2986 2987 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 2988 2989 cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None 2990 paginator = ( 2991 self._create_component_from_model( 2992 model=model.paginator, 2993 config=config, 2994 url_base=url_base, 2995 extractor_model=model.record_selector.extractor, 2996 decoder=decoder, 2997 cursor_used_for_stop_condition=cursor_used_for_stop_condition, 2998 ) 2999 if model.paginator 3000 else NoPagination(parameters={}) 3001 ) 3002 3003 ignore_stream_slicer_parameters_on_paginated_requests = ( 3004 model.ignore_stream_slicer_parameters_on_paginated_requests or False 3005 ) 3006 3007 if ( 3008 model.partition_router 3009 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 3010 and not bool(self._connector_state_manager.get_stream_state(name, None)) 3011 and any( 3012 parent_stream_config.lazy_read_pointer 3013 for parent_stream_config in model.partition_router.parent_stream_configs 3014 ) 3015 ): 3016 if incremental_sync: 3017 if incremental_sync.type != "DatetimeBasedCursor": 3018 raise ValueError( 3019 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 3020 ) 3021 3022 elif incremental_sync.step or incremental_sync.cursor_granularity: 3023 raise ValueError( 3024 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 3025 ) 3026 3027 if model.decoder and model.decoder.type != "JsonDecoder": 3028 raise ValueError( 3029 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 3030 ) 3031 3032 return LazySimpleRetriever( 3033 name=name, 3034 paginator=paginator, 3035 primary_key=primary_key, 3036 requester=requester, 3037 record_selector=record_selector, 3038 stream_slicer=stream_slicer, 3039 request_option_provider=request_options_provider, 3040 cursor=cursor, 3041 config=config, 3042 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3043 parameters=model.parameters or {}, 3044 ) 3045 3046 if self._limit_slices_fetched or self._emit_connector_builder_messages: 3047 return SimpleRetrieverTestReadDecorator( 3048 name=name, 3049 paginator=paginator, 3050 primary_key=primary_key, 3051 requester=requester, 3052 record_selector=record_selector, 3053 stream_slicer=stream_slicer, 3054 request_option_provider=request_options_provider, 3055 cursor=cursor, 3056 config=config, 3057 maximum_number_of_slices=self._limit_slices_fetched or 5, 3058 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3059 parameters=model.parameters or {}, 3060 ) 3061 return SimpleRetriever( 3062 name=name, 3063 paginator=paginator, 3064 primary_key=primary_key, 3065 requester=requester, 3066 record_selector=record_selector, 3067 stream_slicer=stream_slicer, 3068 request_option_provider=request_options_provider, 3069 cursor=cursor, 3070 config=config, 3071 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3072 additional_query_properties=query_properties, 3073 parameters=model.parameters or {}, 3074 )
def
create_state_delegating_stream( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.StateDelegatingStream, config: Mapping[str, Any], has_parent_state: Optional[bool] = None, **kwargs: Any) -> airbyte_cdk.DeclarativeStream:
3087 def create_state_delegating_stream( 3088 self, 3089 model: StateDelegatingStreamModel, 3090 config: Config, 3091 has_parent_state: Optional[bool] = None, 3092 **kwargs: Any, 3093 ) -> DeclarativeStream: 3094 if ( 3095 model.full_refresh_stream.name != model.name 3096 or model.name != model.incremental_stream.name 3097 ): 3098 raise ValueError( 3099 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 3100 ) 3101 3102 stream_model = ( 3103 model.incremental_stream 3104 if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state 3105 else model.full_refresh_stream 3106 ) 3107 3108 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description
def
create_async_retriever( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.AsyncRetriever, config: Mapping[str, Any], *, name: str, primary_key: Union[str, List[str], List[List[str]], NoneType], stream_slicer: Optional[airbyte_cdk.sources.declarative.stream_slicers.StreamSlicer], client_side_incremental_sync: Optional[Dict[str, Any]] = None, transformations: List[airbyte_cdk.RecordTransformation], **kwargs: Any) -> airbyte_cdk.sources.declarative.retrievers.AsyncRetriever:
3140 def create_async_retriever( 3141 self, 3142 model: AsyncRetrieverModel, 3143 config: Config, 3144 *, 3145 name: str, 3146 primary_key: Optional[ 3147 Union[str, List[str], List[List[str]]] 3148 ], # this seems to be needed to match create_simple_retriever 3149 stream_slicer: Optional[StreamSlicer], 3150 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3151 transformations: List[RecordTransformation], 3152 **kwargs: Any, 3153 ) -> AsyncRetriever: 3154 def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever: 3155 record_selector = RecordSelector( 3156 extractor=download_extractor, 3157 name=name, 3158 record_filter=None, 3159 transformations=transformations, 3160 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 3161 config=config, 3162 parameters={}, 3163 ) 3164 paginator = ( 3165 self._create_component_from_model( 3166 model=model.download_paginator, 3167 decoder=decoder, 3168 config=config, 3169 url_base="", 3170 ) 3171 if model.download_paginator 3172 else NoPagination(parameters={}) 3173 ) 3174 maximum_number_of_slices = self._limit_slices_fetched or 5 3175 3176 if self._limit_slices_fetched or self._emit_connector_builder_messages: 3177 return SimpleRetrieverTestReadDecorator( 3178 requester=download_requester, 3179 record_selector=record_selector, 3180 primary_key=None, 3181 name=job_download_components_name, 3182 paginator=paginator, 3183 config=config, 3184 parameters={}, 3185 maximum_number_of_slices=maximum_number_of_slices, 3186 ) 3187 3188 return SimpleRetriever( 3189 requester=download_requester, 3190 record_selector=record_selector, 3191 primary_key=None, 3192 name=job_download_components_name, 3193 paginator=paginator, 3194 config=config, 3195 parameters={}, 3196 ) 3197 3198 def _get_job_timeout() -> datetime.timedelta: 3199 user_defined_timeout: Optional[int] = ( 3200 int( 3201 InterpolatedString.create( 3202 str(model.polling_job_timeout), 3203 parameters={}, 3204 ).eval(config) 3205 ) 3206 if model.polling_job_timeout 3207 else None 3208 ) 3209 3210 # check for user defined timeout during the test read or 15 minutes 3211 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 3212 # default value for non-connector builder is 60 minutes. 3213 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 3214 3215 return ( 3216 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 3217 ) 3218 3219 decoder = ( 3220 self._create_component_from_model(model=model.decoder, config=config) 3221 if model.decoder 3222 else JsonDecoder(parameters={}) 3223 ) 3224 record_selector = self._create_component_from_model( 3225 model=model.record_selector, 3226 config=config, 3227 decoder=decoder, 3228 name=name, 3229 transformations=transformations, 3230 client_side_incremental_sync=client_side_incremental_sync, 3231 ) 3232 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 3233 creation_requester = self._create_component_from_model( 3234 model=model.creation_requester, 3235 decoder=decoder, 3236 config=config, 3237 name=f"job creation - {name}", 3238 ) 3239 polling_requester = self._create_component_from_model( 3240 model=model.polling_requester, 3241 decoder=decoder, 3242 config=config, 3243 name=f"job polling - {name}", 3244 ) 3245 job_download_components_name = f"job download - {name}" 3246 download_decoder = ( 3247 self._create_component_from_model(model=model.download_decoder, config=config) 3248 if model.download_decoder 3249 else JsonDecoder(parameters={}) 3250 ) 3251 download_extractor = ( 3252 self._create_component_from_model( 3253 model=model.download_extractor, 3254 config=config, 3255 decoder=download_decoder, 3256 parameters=model.parameters, 3257 ) 3258 if model.download_extractor 3259 else DpathExtractor( 3260 [], 3261 config=config, 3262 decoder=download_decoder, 3263 parameters=model.parameters or {}, 3264 ) 3265 ) 3266 download_requester = self._create_component_from_model( 3267 model=model.download_requester, 3268 decoder=download_decoder, 3269 config=config, 3270 name=job_download_components_name, 3271 ) 3272 download_retriever = _get_download_retriever() 3273 abort_requester = ( 3274 self._create_component_from_model( 3275 model=model.abort_requester, 3276 decoder=decoder, 3277 config=config, 3278 name=f"job abort - {name}", 3279 ) 3280 if model.abort_requester 3281 else None 3282 ) 3283 delete_requester = ( 3284 self._create_component_from_model( 3285 model=model.delete_requester, 3286 decoder=decoder, 3287 config=config, 3288 name=f"job delete - {name}", 3289 ) 3290 if model.delete_requester 3291 else None 3292 ) 3293 download_target_requester = ( 3294 self._create_component_from_model( 3295 model=model.download_target_requester, 3296 decoder=decoder, 3297 config=config, 3298 name=f"job extract_url - {name}", 3299 ) 3300 if model.download_target_requester 3301 else None 3302 ) 3303 status_extractor = self._create_component_from_model( 3304 model=model.status_extractor, decoder=decoder, config=config, name=name 3305 ) 3306 download_target_extractor = self._create_component_from_model( 3307 model=model.download_target_extractor, 3308 decoder=decoder, 3309 config=config, 3310 name=name, 3311 ) 3312 3313 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3314 creation_requester=creation_requester, 3315 polling_requester=polling_requester, 3316 download_retriever=download_retriever, 3317 download_target_requester=download_target_requester, 3318 abort_requester=abort_requester, 3319 delete_requester=delete_requester, 3320 status_extractor=status_extractor, 3321 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3322 download_target_extractor=download_target_extractor, 3323 job_timeout=_get_job_timeout(), 3324 ) 3325 3326 async_job_partition_router = AsyncJobPartitionRouter( 3327 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3328 job_repository, 3329 stream_slices, 3330 self._job_tracker, 3331 self._message_repository, 3332 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3333 has_bulk_parent=False, 3334 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3335 # `None` == default retry is set to 3 attempts, under the hood. 3336 job_max_retry=1 if self._emit_connector_builder_messages else None, 3337 ), 3338 stream_slicer=stream_slicer, 3339 config=config, 3340 parameters=model.parameters or {}, 3341 ) 3342 3343 return AsyncRetriever( 3344 record_selector=record_selector, 3345 stream_slicer=async_job_partition_router, 3346 config=config, 3347 parameters=model.parameters or {}, 3348 )
@staticmethod
def
create_spec( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.Spec, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.spec.Spec:
def
create_substream_partition_router( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SubstreamPartitionRouter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.SubstreamPartitionRouter:
3359 def create_substream_partition_router( 3360 self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any 3361 ) -> SubstreamPartitionRouter: 3362 parent_stream_configs = [] 3363 if model.parent_stream_configs: 3364 parent_stream_configs.extend( 3365 [ 3366 self._create_message_repository_substream_wrapper( 3367 model=parent_stream_config, config=config, **kwargs 3368 ) 3369 for parent_stream_config in model.parent_stream_configs 3370 ] 3371 ) 3372 3373 return SubstreamPartitionRouter( 3374 parent_stream_configs=parent_stream_configs, 3375 parameters=model.parameters or {}, 3376 config=config, 3377 )
@staticmethod
def
create_wait_time_from_header( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.WaitTimeFromHeader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.WaitTimeFromHeaderBackoffStrategy:
3405 @staticmethod 3406 def create_wait_time_from_header( 3407 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 3408 ) -> WaitTimeFromHeaderBackoffStrategy: 3409 return WaitTimeFromHeaderBackoffStrategy( 3410 header=model.header, 3411 parameters=model.parameters or {}, 3412 config=config, 3413 regex=model.regex, 3414 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 3415 if model.max_waiting_time_in_seconds is not None 3416 else None, 3417 )
@staticmethod
def
create_wait_until_time_from_header( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.WaitUntilTimeFromHeader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.WaitUntilTimeFromHeaderBackoffStrategy:
3419 @staticmethod 3420 def create_wait_until_time_from_header( 3421 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 3422 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 3423 return WaitUntilTimeFromHeaderBackoffStrategy( 3424 header=model.header, 3425 parameters=model.parameters or {}, 3426 config=config, 3427 min_wait=model.min_wait, 3428 regex=model.regex, 3429 )
@staticmethod
def
create_components_mapping_definition( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ComponentMappingDefinition, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.resolvers.ComponentMappingDefinition:
3437 @staticmethod 3438 def create_components_mapping_definition( 3439 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 3440 ) -> ComponentMappingDefinition: 3441 interpolated_value = InterpolatedString.create( 3442 model.value, parameters=model.parameters or {} 3443 ) 3444 field_path = [ 3445 InterpolatedString.create(path, parameters=model.parameters or {}) 3446 for path in model.field_path 3447 ] 3448 return ComponentMappingDefinition( 3449 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 3450 value=interpolated_value, 3451 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 3452 parameters=model.parameters or {}, 3453 )
def
create_http_components_resolver( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpComponentsResolver, config: Mapping[str, Any]) -> Any:
3455 def create_http_components_resolver( 3456 self, model: HttpComponentsResolverModel, config: Config 3457 ) -> Any: 3458 stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config) 3459 combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer) 3460 3461 retriever = self._create_component_from_model( 3462 model=model.retriever, 3463 config=config, 3464 name="", 3465 primary_key=None, 3466 stream_slicer=stream_slicer if stream_slicer else combined_slicers, 3467 transformations=[], 3468 ) 3469 3470 components_mapping = [ 3471 self._create_component_from_model( 3472 model=components_mapping_definition_model, 3473 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3474 components_mapping_definition_model.value_type 3475 ), 3476 config=config, 3477 ) 3478 for components_mapping_definition_model in model.components_mapping 3479 ] 3480 3481 return HttpComponentsResolver( 3482 retriever=retriever, 3483 config=config, 3484 components_mapping=components_mapping, 3485 parameters=model.parameters or {}, 3486 )
@staticmethod
def
create_stream_config( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.StreamConfig, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.resolvers.StreamConfig:
3488 @staticmethod 3489 def create_stream_config( 3490 model: StreamConfigModel, config: Config, **kwargs: Any 3491 ) -> StreamConfig: 3492 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 3493 [x for x in model.configs_pointer] if model.configs_pointer else [] 3494 ) 3495 3496 return StreamConfig( 3497 configs_pointer=model_configs_pointer, 3498 parameters=model.parameters or {}, 3499 )
def
create_config_components_resolver( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ConfigComponentsResolver, config: Mapping[str, Any]) -> Any:
3501 def create_config_components_resolver( 3502 self, model: ConfigComponentsResolverModel, config: Config 3503 ) -> Any: 3504 stream_config = self._create_component_from_model( 3505 model.stream_config, config=config, parameters=model.parameters or {} 3506 ) 3507 3508 components_mapping = [ 3509 self._create_component_from_model( 3510 model=components_mapping_definition_model, 3511 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3512 components_mapping_definition_model.value_type 3513 ), 3514 config=config, 3515 ) 3516 for components_mapping_definition_model in model.components_mapping 3517 ] 3518 3519 return ConfigComponentsResolver( 3520 stream_config=stream_config, 3521 config=config, 3522 components_mapping=components_mapping, 3523 parameters=model.parameters or {}, 3524 )
def
create_http_api_budget( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HTTPAPIBudget, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.HttpAPIBudget:
3548 def create_http_api_budget( 3549 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 3550 ) -> HttpAPIBudget: 3551 policies = [ 3552 self._create_component_from_model(model=policy, config=config) 3553 for policy in model.policies 3554 ] 3555 3556 return HttpAPIBudget( 3557 policies=policies, 3558 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 3559 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 3560 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 3561 )
def
create_fixed_window_call_rate_policy( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.FixedWindowCallRatePolicy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.streams.call_rate.FixedWindowCallRatePolicy:
3563 def create_fixed_window_call_rate_policy( 3564 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 3565 ) -> FixedWindowCallRatePolicy: 3566 matchers = [ 3567 self._create_component_from_model(model=matcher, config=config) 3568 for matcher in model.matchers 3569 ] 3570 3571 # Set the initial reset timestamp to 10 days from now. 3572 # This value will be updated by the first request. 3573 return FixedWindowCallRatePolicy( 3574 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 3575 period=parse_duration(model.period), 3576 call_limit=model.call_limit, 3577 matchers=matchers, 3578 )
def
create_moving_window_call_rate_policy( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.MovingWindowCallRatePolicy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.MovingWindowCallRatePolicy:
3580 def create_moving_window_call_rate_policy( 3581 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 3582 ) -> MovingWindowCallRatePolicy: 3583 rates = [ 3584 self._create_component_from_model(model=rate, config=config) for rate in model.rates 3585 ] 3586 matchers = [ 3587 self._create_component_from_model(model=matcher, config=config) 3588 for matcher in model.matchers 3589 ] 3590 return MovingWindowCallRatePolicy( 3591 rates=rates, 3592 matchers=matchers, 3593 )
def
create_unlimited_call_rate_policy( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.UnlimitedCallRatePolicy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.streams.call_rate.UnlimitedCallRatePolicy:
3595 def create_unlimited_call_rate_policy( 3596 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 3597 ) -> UnlimitedCallRatePolicy: 3598 matchers = [ 3599 self._create_component_from_model(model=matcher, config=config) 3600 for matcher in model.matchers 3601 ] 3602 3603 return UnlimitedCallRatePolicy( 3604 matchers=matchers, 3605 )
def
create_rate( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.Rate, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Rate:
def
create_http_request_matcher( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpRequestRegexMatcher, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.streams.call_rate.HttpRequestRegexMatcher:
3614 def create_http_request_matcher( 3615 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 3616 ) -> HttpRequestRegexMatcher: 3617 return HttpRequestRegexMatcher( 3618 method=model.method, 3619 url_base=model.url_base, 3620 url_path_pattern=model.url_path_pattern, 3621 params=model.params, 3622 headers=model.headers, 3623 )
def
set_api_budget( self, component_definition: Mapping[str, Any], config: Mapping[str, Any]) -> None:
def
create_grouping_partition_router( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.GroupingPartitionRouter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.partition_routers.GroupingPartitionRouter:
3630 def create_grouping_partition_router( 3631 self, model: GroupingPartitionRouterModel, config: Config, **kwargs: Any 3632 ) -> GroupingPartitionRouter: 3633 underlying_router = self._create_component_from_model( 3634 model=model.underlying_partition_router, config=config 3635 ) 3636 if model.group_size < 1: 3637 raise ValueError(f"Group size must be greater than 0, got {model.group_size}") 3638 3639 # Request options in underlying partition routers are not supported for GroupingPartitionRouter 3640 # because they are specific to individual partitions and cannot be aggregated or handled 3641 # when grouping, potentially leading to incorrect API calls. Any request customization 3642 # should be managed at the stream level through the requester's configuration. 3643 if isinstance(underlying_router, SubstreamPartitionRouter): 3644 if any( 3645 parent_config.request_option 3646 for parent_config in underlying_router.parent_stream_configs 3647 ): 3648 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 3649 3650 if isinstance(underlying_router, ListPartitionRouter): 3651 if underlying_router.request_option: 3652 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 3653 3654 return GroupingPartitionRouter( 3655 group_size=model.group_size, 3656 underlying_partition_router=underlying_router, 3657 deduplicate=model.deduplicate if model.deduplicate is not None else True, 3658 config=config, 3659 )