airbyte_cdk.sources.declarative.parsers.model_to_component_factory
1# 2# Copyright (c) 2023 Airbyte, Inc., all rights reserved. 3# 4 5from __future__ import annotations 6 7import datetime 8import importlib 9import inspect 10import re 11from functools import partial 12from typing import ( 13 Any, 14 Callable, 15 Dict, 16 List, 17 Mapping, 18 MutableMapping, 19 Optional, 20 Type, 21 Union, 22 get_args, 23 get_origin, 24 get_type_hints, 25) 26 27from isodate import parse_duration 28from pydantic.v1 import BaseModel 29 30from airbyte_cdk.models import FailureType, Level 31from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager 32from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator 33from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker 34from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository 35from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus 36from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator 37from airbyte_cdk.sources.declarative.auth.declarative_authenticator import ( 38 DeclarativeAuthenticator, 39 NoAuth, 40) 41from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm 42from airbyte_cdk.sources.declarative.auth.oauth import ( 43 DeclarativeSingleUseRefreshTokenOauth2Authenticator, 44) 45from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator 46from airbyte_cdk.sources.declarative.auth.token import ( 47 ApiKeyAuthenticator, 48 BasicHttpAuthenticator, 49 BearerAuthenticator, 50 LegacySessionTokenAuthenticator, 51) 52from airbyte_cdk.sources.declarative.auth.token_provider import ( 53 InterpolatedStringTokenProvider, 54 SessionTokenProvider, 55 TokenProvider, 56) 57from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream 58from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel 59from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime 60from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream 61from airbyte_cdk.sources.declarative.decoders import ( 62 Decoder, 63 IterableDecoder, 64 JsonDecoder, 65 PaginationDecoderDecorator, 66 XmlDecoder, 67 ZipfileDecoder, 68) 69from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import ( 70 CompositeRawDecoder, 71 CsvParser, 72 GzipParser, 73 JsonLineParser, 74 JsonParser, 75 Parser, 76) 77from airbyte_cdk.sources.declarative.extractors import ( 78 DpathExtractor, 79 RecordFilter, 80 RecordSelector, 81 ResponseToFileExtractor, 82) 83from airbyte_cdk.sources.declarative.extractors.record_filter import ( 84 ClientSideIncrementalRecordFilterDecorator, 85) 86from airbyte_cdk.sources.declarative.incremental import ( 87 ChildPartitionResumableFullRefreshCursor, 88 ConcurrentCursorFactory, 89 ConcurrentPerPartitionCursor, 90 CursorFactory, 91 DatetimeBasedCursor, 92 DeclarativeCursor, 93 GlobalSubstreamCursor, 94 PerPartitionCursor, 95 PerPartitionWithGlobalCursor, 96 ResumableFullRefreshCursor, 97) 98from airbyte_cdk.sources.declarative.interpolation import InterpolatedString 99from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping 100from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import ( 101 LegacyToPerPartitionStateMigration, 102) 103from airbyte_cdk.sources.declarative.models import ( 104 CustomStateMigration, 105 GzipDecoder, 106) 107from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 108 AddedFieldDefinition as AddedFieldDefinitionModel, 109) 110from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 111 AddFields as AddFieldsModel, 112) 113from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 114 ApiKeyAuthenticator as ApiKeyAuthenticatorModel, 115) 116from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 117 AsyncJobStatusMap as AsyncJobStatusMapModel, 118) 119from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 120 AsyncRetriever as AsyncRetrieverModel, 121) 122from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 123 BasicHttpAuthenticator as BasicHttpAuthenticatorModel, 124) 125from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 126 BearerAuthenticator as BearerAuthenticatorModel, 127) 128from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 129 CheckDynamicStream as CheckDynamicStreamModel, 130) 131from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 132 CheckStream as CheckStreamModel, 133) 134from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 135 ComplexFieldType as ComplexFieldTypeModel, 136) 137from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 138 ComponentMappingDefinition as ComponentMappingDefinitionModel, 139) 140from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 141 CompositeErrorHandler as CompositeErrorHandlerModel, 142) 143from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 144 ConcurrencyLevel as ConcurrencyLevelModel, 145) 146from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 147 ConfigComponentsResolver as ConfigComponentsResolverModel, 148) 149from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 150 ConstantBackoffStrategy as ConstantBackoffStrategyModel, 151) 152from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 153 CsvDecoder as CsvDecoderModel, 154) 155from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 156 CursorPagination as CursorPaginationModel, 157) 158from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 159 CustomAuthenticator as CustomAuthenticatorModel, 160) 161from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 162 CustomBackoffStrategy as CustomBackoffStrategyModel, 163) 164from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 165 CustomDecoder as CustomDecoderModel, 166) 167from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 168 CustomErrorHandler as CustomErrorHandlerModel, 169) 170from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 171 CustomIncrementalSync as CustomIncrementalSyncModel, 172) 173from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 174 CustomPaginationStrategy as CustomPaginationStrategyModel, 175) 176from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 177 CustomPartitionRouter as CustomPartitionRouterModel, 178) 179from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 180 CustomRecordExtractor as CustomRecordExtractorModel, 181) 182from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 183 CustomRecordFilter as CustomRecordFilterModel, 184) 185from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 186 CustomRequester as CustomRequesterModel, 187) 188from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 189 CustomRetriever as CustomRetrieverModel, 190) 191from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 192 CustomSchemaLoader as CustomSchemaLoader, 193) 194from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 195 CustomSchemaNormalization as CustomSchemaNormalizationModel, 196) 197from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 198 CustomTransformation as CustomTransformationModel, 199) 200from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 201 DatetimeBasedCursor as DatetimeBasedCursorModel, 202) 203from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 204 DeclarativeStream as DeclarativeStreamModel, 205) 206from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 207 DefaultErrorHandler as DefaultErrorHandlerModel, 208) 209from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 210 DefaultPaginator as DefaultPaginatorModel, 211) 212from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 213 DpathExtractor as DpathExtractorModel, 214) 215from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 216 DpathFlattenFields as DpathFlattenFieldsModel, 217) 218from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 219 DynamicSchemaLoader as DynamicSchemaLoaderModel, 220) 221from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 222 ExponentialBackoffStrategy as ExponentialBackoffStrategyModel, 223) 224from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 225 FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel, 226) 227from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 228 FlattenFields as FlattenFieldsModel, 229) 230from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 231 GzipDecoder as GzipDecoderModel, 232) 233from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 234 HTTPAPIBudget as HTTPAPIBudgetModel, 235) 236from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 237 HttpComponentsResolver as HttpComponentsResolverModel, 238) 239from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 240 HttpRequester as HttpRequesterModel, 241) 242from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 243 HttpRequestRegexMatcher as HttpRequestRegexMatcherModel, 244) 245from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 246 HttpResponseFilter as HttpResponseFilterModel, 247) 248from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 249 IncrementingCountCursor as IncrementingCountCursorModel, 250) 251from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 252 InlineSchemaLoader as InlineSchemaLoaderModel, 253) 254from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 255 IterableDecoder as IterableDecoderModel, 256) 257from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 258 JsonDecoder as JsonDecoderModel, 259) 260from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 261 JsonFileSchemaLoader as JsonFileSchemaLoaderModel, 262) 263from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 264 JsonlDecoder as JsonlDecoderModel, 265) 266from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 267 JwtAuthenticator as JwtAuthenticatorModel, 268) 269from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 270 JwtHeaders as JwtHeadersModel, 271) 272from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 273 JwtPayload as JwtPayloadModel, 274) 275from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 276 KeysReplace as KeysReplaceModel, 277) 278from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 279 KeysToLower as KeysToLowerModel, 280) 281from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 282 KeysToSnakeCase as KeysToSnakeCaseModel, 283) 284from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 285 LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel, 286) 287from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 288 LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel, 289) 290from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 291 ListPartitionRouter as ListPartitionRouterModel, 292) 293from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 294 MinMaxDatetime as MinMaxDatetimeModel, 295) 296from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 297 MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel, 298) 299from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 300 NoAuth as NoAuthModel, 301) 302from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 303 NoPagination as NoPaginationModel, 304) 305from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 306 OAuthAuthenticator as OAuthAuthenticatorModel, 307) 308from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 309 OffsetIncrement as OffsetIncrementModel, 310) 311from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 312 PageIncrement as PageIncrementModel, 313) 314from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 315 ParentStreamConfig as ParentStreamConfigModel, 316) 317from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 318 Rate as RateModel, 319) 320from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 321 RecordFilter as RecordFilterModel, 322) 323from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 324 RecordSelector as RecordSelectorModel, 325) 326from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 327 RemoveFields as RemoveFieldsModel, 328) 329from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 330 RequestOption as RequestOptionModel, 331) 332from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 333 RequestPath as RequestPathModel, 334) 335from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 336 ResponseToFileExtractor as ResponseToFileExtractorModel, 337) 338from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 339 SchemaNormalization as SchemaNormalizationModel, 340) 341from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 342 SchemaTypeIdentifier as SchemaTypeIdentifierModel, 343) 344from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 345 SelectiveAuthenticator as SelectiveAuthenticatorModel, 346) 347from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 348 SessionTokenAuthenticator as SessionTokenAuthenticatorModel, 349) 350from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 351 SimpleRetriever as SimpleRetrieverModel, 352) 353from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel 354from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 355 StateDelegatingStream as StateDelegatingStreamModel, 356) 357from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 358 StreamConfig as StreamConfigModel, 359) 360from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 361 SubstreamPartitionRouter as SubstreamPartitionRouterModel, 362) 363from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 364 TypesMap as TypesMapModel, 365) 366from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 367 UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel, 368) 369from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType 370from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 371 WaitTimeFromHeader as WaitTimeFromHeaderModel, 372) 373from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 374 WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel, 375) 376from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 377 XmlDecoder as XmlDecoderModel, 378) 379from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 380 ZipfileDecoder as ZipfileDecoderModel, 381) 382from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import ( 383 COMPONENTS_MODULE_NAME, 384 SDM_COMPONENTS_MODULE_NAME, 385) 386from airbyte_cdk.sources.declarative.partition_routers import ( 387 CartesianProductStreamSlicer, 388 ListPartitionRouter, 389 PartitionRouter, 390 SinglePartitionRouter, 391 SubstreamPartitionRouter, 392) 393from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import ( 394 AsyncJobPartitionRouter, 395) 396from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ( 397 ParentStreamConfig, 398) 399from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption 400from airbyte_cdk.sources.declarative.requesters.error_handlers import ( 401 CompositeErrorHandler, 402 DefaultErrorHandler, 403 HttpResponseFilter, 404) 405from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import ( 406 ConstantBackoffStrategy, 407 ExponentialBackoffStrategy, 408 WaitTimeFromHeaderBackoffStrategy, 409 WaitUntilTimeFromHeaderBackoffStrategy, 410) 411from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository 412from airbyte_cdk.sources.declarative.requesters.paginators import ( 413 DefaultPaginator, 414 NoPagination, 415 PaginatorTestReadDecorator, 416) 417from airbyte_cdk.sources.declarative.requesters.paginators.strategies import ( 418 CursorPaginationStrategy, 419 CursorStopCondition, 420 OffsetIncrement, 421 PageIncrement, 422 StopConditionPaginationStrategyDecorator, 423) 424from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType 425from airbyte_cdk.sources.declarative.requesters.request_options import ( 426 DatetimeBasedRequestOptionsProvider, 427 DefaultRequestOptionsProvider, 428 InterpolatedRequestOptionsProvider, 429 RequestOptionsProvider, 430) 431from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath 432from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod 433from airbyte_cdk.sources.declarative.resolvers import ( 434 ComponentMappingDefinition, 435 ConfigComponentsResolver, 436 HttpComponentsResolver, 437 StreamConfig, 438) 439from airbyte_cdk.sources.declarative.retrievers import ( 440 AsyncRetriever, 441 LazySimpleRetriever, 442 SimpleRetriever, 443 SimpleRetrieverTestReadDecorator, 444) 445from airbyte_cdk.sources.declarative.schema import ( 446 ComplexFieldType, 447 DefaultSchemaLoader, 448 DynamicSchemaLoader, 449 InlineSchemaLoader, 450 JsonFileSchemaLoader, 451 SchemaTypeIdentifier, 452 TypesMap, 453) 454from airbyte_cdk.sources.declarative.spec import Spec 455from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer 456from airbyte_cdk.sources.declarative.transformations import ( 457 AddFields, 458 RecordTransformation, 459 RemoveFields, 460) 461from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition 462from airbyte_cdk.sources.declarative.transformations.dpath_flatten_fields import ( 463 DpathFlattenFields, 464) 465from airbyte_cdk.sources.declarative.transformations.flatten_fields import ( 466 FlattenFields, 467) 468from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import ( 469 KeysReplaceTransformation, 470) 471from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import ( 472 KeysToLowerTransformation, 473) 474from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import ( 475 KeysToSnakeCaseTransformation, 476) 477from airbyte_cdk.sources.message import ( 478 InMemoryMessageRepository, 479 LogAppenderMessageRepositoryDecorator, 480 MessageRepository, 481 NoopMessageRepository, 482) 483from airbyte_cdk.sources.streams.call_rate import ( 484 APIBudget, 485 FixedWindowCallRatePolicy, 486 HttpAPIBudget, 487 HttpRequestRegexMatcher, 488 MovingWindowCallRatePolicy, 489 Rate, 490 UnlimitedCallRatePolicy, 491) 492from airbyte_cdk.sources.streams.concurrent.clamping import ( 493 ClampingEndProvider, 494 ClampingStrategy, 495 DayClampingStrategy, 496 MonthClampingStrategy, 497 NoClamping, 498 WeekClampingStrategy, 499 Weekday, 500) 501from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField 502from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import ( 503 CustomFormatConcurrentStreamStateConverter, 504 DateTimeStreamStateConverter, 505) 506from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import ( 507 IncrementingCountStreamStateConverter, 508) 509from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction 510from airbyte_cdk.sources.types import Config 511from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer 512 513ComponentDefinition = Mapping[str, Any] 514 515SCHEMA_TRANSFORMER_TYPE_MAPPING = { 516 SchemaNormalizationModel.None_: TransformConfig.NoTransform, 517 SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization, 518} 519 520 521class ModelToComponentFactory: 522 EPOCH_DATETIME_FORMAT = "%s" 523 524 def __init__( 525 self, 526 limit_pages_fetched_per_slice: Optional[int] = None, 527 limit_slices_fetched: Optional[int] = None, 528 emit_connector_builder_messages: bool = False, 529 disable_retries: bool = False, 530 disable_cache: bool = False, 531 disable_resumable_full_refresh: bool = False, 532 message_repository: Optional[MessageRepository] = None, 533 connector_state_manager: Optional[ConnectorStateManager] = None, 534 max_concurrent_async_job_count: Optional[int] = None, 535 ): 536 self._init_mappings() 537 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 538 self._limit_slices_fetched = limit_slices_fetched 539 self._emit_connector_builder_messages = emit_connector_builder_messages 540 self._disable_retries = disable_retries 541 self._disable_cache = disable_cache 542 self._disable_resumable_full_refresh = disable_resumable_full_refresh 543 self._message_repository = message_repository or InMemoryMessageRepository( 544 self._evaluate_log_level(emit_connector_builder_messages) 545 ) 546 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 547 self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None 548 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 549 550 def _init_mappings(self) -> None: 551 self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = { 552 AddedFieldDefinitionModel: self.create_added_field_definition, 553 AddFieldsModel: self.create_add_fields, 554 ApiKeyAuthenticatorModel: self.create_api_key_authenticator, 555 BasicHttpAuthenticatorModel: self.create_basic_http_authenticator, 556 BearerAuthenticatorModel: self.create_bearer_authenticator, 557 CheckStreamModel: self.create_check_stream, 558 CheckDynamicStreamModel: self.create_check_dynamic_stream, 559 CompositeErrorHandlerModel: self.create_composite_error_handler, 560 ConcurrencyLevelModel: self.create_concurrency_level, 561 ConstantBackoffStrategyModel: self.create_constant_backoff_strategy, 562 CsvDecoderModel: self.create_csv_decoder, 563 CursorPaginationModel: self.create_cursor_pagination, 564 CustomAuthenticatorModel: self.create_custom_component, 565 CustomBackoffStrategyModel: self.create_custom_component, 566 CustomDecoderModel: self.create_custom_component, 567 CustomErrorHandlerModel: self.create_custom_component, 568 CustomIncrementalSyncModel: self.create_custom_component, 569 CustomRecordExtractorModel: self.create_custom_component, 570 CustomRecordFilterModel: self.create_custom_component, 571 CustomRequesterModel: self.create_custom_component, 572 CustomRetrieverModel: self.create_custom_component, 573 CustomSchemaLoader: self.create_custom_component, 574 CustomSchemaNormalizationModel: self.create_custom_component, 575 CustomStateMigration: self.create_custom_component, 576 CustomPaginationStrategyModel: self.create_custom_component, 577 CustomPartitionRouterModel: self.create_custom_component, 578 CustomTransformationModel: self.create_custom_component, 579 DatetimeBasedCursorModel: self.create_datetime_based_cursor, 580 DeclarativeStreamModel: self.create_declarative_stream, 581 DefaultErrorHandlerModel: self.create_default_error_handler, 582 DefaultPaginatorModel: self.create_default_paginator, 583 DpathExtractorModel: self.create_dpath_extractor, 584 ResponseToFileExtractorModel: self.create_response_to_file_extractor, 585 ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy, 586 SessionTokenAuthenticatorModel: self.create_session_token_authenticator, 587 HttpRequesterModel: self.create_http_requester, 588 HttpResponseFilterModel: self.create_http_response_filter, 589 InlineSchemaLoaderModel: self.create_inline_schema_loader, 590 JsonDecoderModel: self.create_json_decoder, 591 JsonlDecoderModel: self.create_jsonl_decoder, 592 GzipDecoderModel: self.create_gzip_decoder, 593 KeysToLowerModel: self.create_keys_to_lower_transformation, 594 KeysToSnakeCaseModel: self.create_keys_to_snake_transformation, 595 KeysReplaceModel: self.create_keys_replace_transformation, 596 FlattenFieldsModel: self.create_flatten_fields, 597 DpathFlattenFieldsModel: self.create_dpath_flatten_fields, 598 IterableDecoderModel: self.create_iterable_decoder, 599 IncrementingCountCursorModel: self.create_incrementing_count_cursor, 600 XmlDecoderModel: self.create_xml_decoder, 601 JsonFileSchemaLoaderModel: self.create_json_file_schema_loader, 602 DynamicSchemaLoaderModel: self.create_dynamic_schema_loader, 603 SchemaTypeIdentifierModel: self.create_schema_type_identifier, 604 TypesMapModel: self.create_types_map, 605 ComplexFieldTypeModel: self.create_complex_field_type, 606 JwtAuthenticatorModel: self.create_jwt_authenticator, 607 LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration, 608 ListPartitionRouterModel: self.create_list_partition_router, 609 MinMaxDatetimeModel: self.create_min_max_datetime, 610 NoAuthModel: self.create_no_auth, 611 NoPaginationModel: self.create_no_pagination, 612 OAuthAuthenticatorModel: self.create_oauth_authenticator, 613 OffsetIncrementModel: self.create_offset_increment, 614 PageIncrementModel: self.create_page_increment, 615 ParentStreamConfigModel: self.create_parent_stream_config, 616 RecordFilterModel: self.create_record_filter, 617 RecordSelectorModel: self.create_record_selector, 618 RemoveFieldsModel: self.create_remove_fields, 619 RequestPathModel: self.create_request_path, 620 RequestOptionModel: self.create_request_option, 621 LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator, 622 SelectiveAuthenticatorModel: self.create_selective_authenticator, 623 SimpleRetrieverModel: self.create_simple_retriever, 624 StateDelegatingStreamModel: self.create_state_delegating_stream, 625 SpecModel: self.create_spec, 626 SubstreamPartitionRouterModel: self.create_substream_partition_router, 627 WaitTimeFromHeaderModel: self.create_wait_time_from_header, 628 WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header, 629 AsyncRetrieverModel: self.create_async_retriever, 630 HttpComponentsResolverModel: self.create_http_components_resolver, 631 ConfigComponentsResolverModel: self.create_config_components_resolver, 632 StreamConfigModel: self.create_stream_config, 633 ComponentMappingDefinitionModel: self.create_components_mapping_definition, 634 ZipfileDecoderModel: self.create_zipfile_decoder, 635 HTTPAPIBudgetModel: self.create_http_api_budget, 636 FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy, 637 MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy, 638 UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy, 639 RateModel: self.create_rate, 640 HttpRequestRegexMatcherModel: self.create_http_request_matcher, 641 } 642 643 # Needed for the case where we need to perform a second parse on the fields of a custom component 644 self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR} 645 646 def create_component( 647 self, 648 model_type: Type[BaseModel], 649 component_definition: ComponentDefinition, 650 config: Config, 651 **kwargs: Any, 652 ) -> Any: 653 """ 654 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 655 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 656 creating declarative components from that model. 657 658 :param model_type: The type of declarative component that is being initialized 659 :param component_definition: The mapping that represents a declarative component 660 :param config: The connector config that is provided by the customer 661 :return: The declarative component to be used at runtime 662 """ 663 664 component_type = component_definition.get("type") 665 if component_definition.get("type") != model_type.__name__: 666 raise ValueError( 667 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 668 ) 669 670 declarative_component_model = model_type.parse_obj(component_definition) 671 672 if not isinstance(declarative_component_model, model_type): 673 raise ValueError( 674 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 675 ) 676 677 return self._create_component_from_model( 678 model=declarative_component_model, config=config, **kwargs 679 ) 680 681 def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: 682 if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: 683 raise ValueError( 684 f"{model.__class__} with attributes {model} is not a valid component type" 685 ) 686 component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) 687 if not component_constructor: 688 raise ValueError(f"Could not find constructor for {model.__class__}") 689 return component_constructor(model=model, config=config, **kwargs) 690 691 @staticmethod 692 def create_added_field_definition( 693 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 694 ) -> AddedFieldDefinition: 695 interpolated_value = InterpolatedString.create( 696 model.value, parameters=model.parameters or {} 697 ) 698 return AddedFieldDefinition( 699 path=model.path, 700 value=interpolated_value, 701 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 702 parameters=model.parameters or {}, 703 ) 704 705 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 706 added_field_definitions = [ 707 self._create_component_from_model( 708 model=added_field_definition_model, 709 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 710 added_field_definition_model.value_type 711 ), 712 config=config, 713 ) 714 for added_field_definition_model in model.fields 715 ] 716 return AddFields( 717 fields=added_field_definitions, 718 condition=model.condition or "", 719 parameters=model.parameters or {}, 720 ) 721 722 def create_keys_to_lower_transformation( 723 self, model: KeysToLowerModel, config: Config, **kwargs: Any 724 ) -> KeysToLowerTransformation: 725 return KeysToLowerTransformation() 726 727 def create_keys_to_snake_transformation( 728 self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any 729 ) -> KeysToSnakeCaseTransformation: 730 return KeysToSnakeCaseTransformation() 731 732 def create_keys_replace_transformation( 733 self, model: KeysReplaceModel, config: Config, **kwargs: Any 734 ) -> KeysReplaceTransformation: 735 return KeysReplaceTransformation( 736 old=model.old, new=model.new, parameters=model.parameters or {} 737 ) 738 739 def create_flatten_fields( 740 self, model: FlattenFieldsModel, config: Config, **kwargs: Any 741 ) -> FlattenFields: 742 return FlattenFields( 743 flatten_lists=model.flatten_lists if model.flatten_lists is not None else True 744 ) 745 746 def create_dpath_flatten_fields( 747 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 748 ) -> DpathFlattenFields: 749 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 750 return DpathFlattenFields( 751 config=config, 752 field_path=model_field_path, 753 delete_origin_value=model.delete_origin_value 754 if model.delete_origin_value is not None 755 else False, 756 replace_record=model.replace_record if model.replace_record is not None else False, 757 parameters=model.parameters or {}, 758 ) 759 760 @staticmethod 761 def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: 762 if not value_type: 763 return None 764 names_to_types = { 765 ValueType.string: str, 766 ValueType.number: float, 767 ValueType.integer: int, 768 ValueType.boolean: bool, 769 } 770 return names_to_types[value_type] 771 772 def create_api_key_authenticator( 773 self, 774 model: ApiKeyAuthenticatorModel, 775 config: Config, 776 token_provider: Optional[TokenProvider] = None, 777 **kwargs: Any, 778 ) -> ApiKeyAuthenticator: 779 if model.inject_into is None and model.header is None: 780 raise ValueError( 781 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 782 ) 783 784 if model.inject_into is not None and model.header is not None: 785 raise ValueError( 786 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 787 ) 788 789 if token_provider is not None and model.api_token != "": 790 raise ValueError( 791 "If token_provider is set, api_token is ignored and has to be set to empty string." 792 ) 793 794 request_option = ( 795 self._create_component_from_model( 796 model.inject_into, config, parameters=model.parameters or {} 797 ) 798 if model.inject_into 799 else RequestOption( 800 inject_into=RequestOptionType.header, 801 field_name=model.header or "", 802 parameters=model.parameters or {}, 803 ) 804 ) 805 806 return ApiKeyAuthenticator( 807 token_provider=( 808 token_provider 809 if token_provider is not None 810 else InterpolatedStringTokenProvider( 811 api_token=model.api_token or "", 812 config=config, 813 parameters=model.parameters or {}, 814 ) 815 ), 816 request_option=request_option, 817 config=config, 818 parameters=model.parameters or {}, 819 ) 820 821 def create_legacy_to_per_partition_state_migration( 822 self, 823 model: LegacyToPerPartitionStateMigrationModel, 824 config: Mapping[str, Any], 825 declarative_stream: DeclarativeStreamModel, 826 ) -> LegacyToPerPartitionStateMigration: 827 retriever = declarative_stream.retriever 828 if not isinstance(retriever, SimpleRetrieverModel): 829 raise ValueError( 830 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}" 831 ) 832 partition_router = retriever.partition_router 833 if not isinstance( 834 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 835 ): 836 raise ValueError( 837 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 838 ) 839 if not hasattr(partition_router, "parent_stream_configs"): 840 raise ValueError( 841 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 842 ) 843 844 if not hasattr(declarative_stream, "incremental_sync"): 845 raise ValueError( 846 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 847 ) 848 849 return LegacyToPerPartitionStateMigration( 850 partition_router, # type: ignore # was already checked above 851 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 852 config, 853 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 854 ) 855 856 def create_session_token_authenticator( 857 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 858 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 859 decoder = ( 860 self._create_component_from_model(model=model.decoder, config=config) 861 if model.decoder 862 else JsonDecoder(parameters={}) 863 ) 864 login_requester = self._create_component_from_model( 865 model=model.login_requester, 866 config=config, 867 name=f"{name}_login_requester", 868 decoder=decoder, 869 ) 870 token_provider = SessionTokenProvider( 871 login_requester=login_requester, 872 session_token_path=model.session_token_path, 873 expiration_duration=parse_duration(model.expiration_duration) 874 if model.expiration_duration 875 else None, 876 parameters=model.parameters or {}, 877 message_repository=self._message_repository, 878 decoder=decoder, 879 ) 880 if model.request_authentication.type == "Bearer": 881 return ModelToComponentFactory.create_bearer_authenticator( 882 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 883 config, 884 token_provider=token_provider, 885 ) 886 else: 887 return self.create_api_key_authenticator( 888 ApiKeyAuthenticatorModel( 889 type="ApiKeyAuthenticator", 890 api_token="", 891 inject_into=model.request_authentication.inject_into, 892 ), # type: ignore # $parameters and headers default to None 893 config=config, 894 token_provider=token_provider, 895 ) 896 897 @staticmethod 898 def create_basic_http_authenticator( 899 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 900 ) -> BasicHttpAuthenticator: 901 return BasicHttpAuthenticator( 902 password=model.password or "", 903 username=model.username, 904 config=config, 905 parameters=model.parameters or {}, 906 ) 907 908 @staticmethod 909 def create_bearer_authenticator( 910 model: BearerAuthenticatorModel, 911 config: Config, 912 token_provider: Optional[TokenProvider] = None, 913 **kwargs: Any, 914 ) -> BearerAuthenticator: 915 if token_provider is not None and model.api_token != "": 916 raise ValueError( 917 "If token_provider is set, api_token is ignored and has to be set to empty string." 918 ) 919 return BearerAuthenticator( 920 token_provider=( 921 token_provider 922 if token_provider is not None 923 else InterpolatedStringTokenProvider( 924 api_token=model.api_token or "", 925 config=config, 926 parameters=model.parameters or {}, 927 ) 928 ), 929 config=config, 930 parameters=model.parameters or {}, 931 ) 932 933 @staticmethod 934 def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream: 935 return CheckStream(stream_names=model.stream_names, parameters={}) 936 937 @staticmethod 938 def create_check_dynamic_stream( 939 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 940 ) -> CheckDynamicStream: 941 assert model.use_check_availability is not None # for mypy 942 943 use_check_availability = model.use_check_availability 944 945 return CheckDynamicStream( 946 stream_count=model.stream_count, 947 use_check_availability=use_check_availability, 948 parameters={}, 949 ) 950 951 def create_composite_error_handler( 952 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 953 ) -> CompositeErrorHandler: 954 error_handlers = [ 955 self._create_component_from_model(model=error_handler_model, config=config) 956 for error_handler_model in model.error_handlers 957 ] 958 return CompositeErrorHandler( 959 error_handlers=error_handlers, parameters=model.parameters or {} 960 ) 961 962 @staticmethod 963 def create_concurrency_level( 964 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 965 ) -> ConcurrencyLevel: 966 return ConcurrencyLevel( 967 default_concurrency=model.default_concurrency, 968 max_concurrency=model.max_concurrency, 969 config=config, 970 parameters={}, 971 ) 972 973 @staticmethod 974 def apply_stream_state_migrations( 975 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 976 ) -> MutableMapping[str, Any]: 977 if stream_state_migrations: 978 for state_migration in stream_state_migrations: 979 if state_migration.should_migrate(stream_state): 980 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 981 stream_state = dict(state_migration.migrate(stream_state)) 982 return stream_state 983 984 def create_concurrent_cursor_from_datetime_based_cursor( 985 self, 986 model_type: Type[BaseModel], 987 component_definition: ComponentDefinition, 988 stream_name: str, 989 stream_namespace: Optional[str], 990 config: Config, 991 message_repository: Optional[MessageRepository] = None, 992 runtime_lookback_window: Optional[datetime.timedelta] = None, 993 stream_state_migrations: Optional[List[Any]] = None, 994 **kwargs: Any, 995 ) -> ConcurrentCursor: 996 # Per-partition incremental streams can dynamically create child cursors which will pass their current 997 # state via the stream_state keyword argument. Incremental syncs without parent streams use the 998 # incoming state and connector_state_manager that is initialized when the component factory is created 999 stream_state = ( 1000 self._connector_state_manager.get_stream_state(stream_name, stream_namespace) 1001 if "stream_state" not in kwargs 1002 else kwargs["stream_state"] 1003 ) 1004 stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state) 1005 1006 component_type = component_definition.get("type") 1007 if component_definition.get("type") != model_type.__name__: 1008 raise ValueError( 1009 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1010 ) 1011 1012 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1013 1014 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1015 raise ValueError( 1016 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1017 ) 1018 1019 interpolated_cursor_field = InterpolatedString.create( 1020 datetime_based_cursor_model.cursor_field, 1021 parameters=datetime_based_cursor_model.parameters or {}, 1022 ) 1023 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1024 1025 interpolated_partition_field_start = InterpolatedString.create( 1026 datetime_based_cursor_model.partition_field_start or "start_time", 1027 parameters=datetime_based_cursor_model.parameters or {}, 1028 ) 1029 interpolated_partition_field_end = InterpolatedString.create( 1030 datetime_based_cursor_model.partition_field_end or "end_time", 1031 parameters=datetime_based_cursor_model.parameters or {}, 1032 ) 1033 1034 slice_boundary_fields = ( 1035 interpolated_partition_field_start.eval(config=config), 1036 interpolated_partition_field_end.eval(config=config), 1037 ) 1038 1039 datetime_format = datetime_based_cursor_model.datetime_format 1040 1041 cursor_granularity = ( 1042 parse_duration(datetime_based_cursor_model.cursor_granularity) 1043 if datetime_based_cursor_model.cursor_granularity 1044 else None 1045 ) 1046 1047 lookback_window = None 1048 interpolated_lookback_window = ( 1049 InterpolatedString.create( 1050 datetime_based_cursor_model.lookback_window, 1051 parameters=datetime_based_cursor_model.parameters or {}, 1052 ) 1053 if datetime_based_cursor_model.lookback_window 1054 else None 1055 ) 1056 if interpolated_lookback_window: 1057 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1058 if evaluated_lookback_window: 1059 lookback_window = parse_duration(evaluated_lookback_window) 1060 1061 connector_state_converter: DateTimeStreamStateConverter 1062 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1063 datetime_format=datetime_format, 1064 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1065 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1066 cursor_granularity=cursor_granularity, 1067 ) 1068 1069 # Adjusts the stream state by applying the runtime lookback window. 1070 # This is used to ensure correct state handling in case of failed partitions. 1071 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1072 if runtime_lookback_window and stream_state_value: 1073 new_stream_state = ( 1074 connector_state_converter.parse_timestamp(stream_state_value) 1075 - runtime_lookback_window 1076 ) 1077 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1078 new_stream_state 1079 ) 1080 1081 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1082 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1083 start_date_runtime_value = self.create_min_max_datetime( 1084 model=datetime_based_cursor_model.start_datetime, config=config 1085 ) 1086 else: 1087 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1088 1089 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1090 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1091 end_date_runtime_value = self.create_min_max_datetime( 1092 model=datetime_based_cursor_model.end_datetime, config=config 1093 ) 1094 else: 1095 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1096 1097 interpolated_start_date = MinMaxDatetime.create( 1098 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1099 parameters=datetime_based_cursor_model.parameters, 1100 ) 1101 interpolated_end_date = ( 1102 None 1103 if not end_date_runtime_value 1104 else MinMaxDatetime.create( 1105 end_date_runtime_value, datetime_based_cursor_model.parameters 1106 ) 1107 ) 1108 1109 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1110 if not interpolated_start_date.datetime_format: 1111 interpolated_start_date.datetime_format = datetime_format 1112 if interpolated_end_date and not interpolated_end_date.datetime_format: 1113 interpolated_end_date.datetime_format = datetime_format 1114 1115 start_date = interpolated_start_date.get_datetime(config=config) 1116 end_date_provider = ( 1117 partial(interpolated_end_date.get_datetime, config) 1118 if interpolated_end_date 1119 else connector_state_converter.get_end_provider() 1120 ) 1121 1122 if ( 1123 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1124 ) or ( 1125 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1126 ): 1127 raise ValueError( 1128 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1129 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1130 ) 1131 1132 # When step is not defined, default to a step size from the starting date to the present moment 1133 step_length = datetime.timedelta.max 1134 interpolated_step = ( 1135 InterpolatedString.create( 1136 datetime_based_cursor_model.step, 1137 parameters=datetime_based_cursor_model.parameters or {}, 1138 ) 1139 if datetime_based_cursor_model.step 1140 else None 1141 ) 1142 if interpolated_step: 1143 evaluated_step = interpolated_step.eval(config) 1144 if evaluated_step: 1145 step_length = parse_duration(evaluated_step) 1146 1147 clamping_strategy: ClampingStrategy = NoClamping() 1148 if datetime_based_cursor_model.clamping: 1149 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1150 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1151 # object which we want to keep agnostic of being low-code 1152 target = InterpolatedString( 1153 string=datetime_based_cursor_model.clamping.target, 1154 parameters=datetime_based_cursor_model.parameters or {}, 1155 ) 1156 evaluated_target = target.eval(config=config) 1157 match evaluated_target: 1158 case "DAY": 1159 clamping_strategy = DayClampingStrategy() 1160 end_date_provider = ClampingEndProvider( 1161 DayClampingStrategy(is_ceiling=False), 1162 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1163 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1164 ) 1165 case "WEEK": 1166 if ( 1167 not datetime_based_cursor_model.clamping.target_details 1168 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1169 ): 1170 raise ValueError( 1171 "Given WEEK clamping, weekday needs to be provided as target_details" 1172 ) 1173 weekday = self._assemble_weekday( 1174 datetime_based_cursor_model.clamping.target_details["weekday"] 1175 ) 1176 clamping_strategy = WeekClampingStrategy(weekday) 1177 end_date_provider = ClampingEndProvider( 1178 WeekClampingStrategy(weekday, is_ceiling=False), 1179 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1180 granularity=cursor_granularity or datetime.timedelta(days=1), 1181 ) 1182 case "MONTH": 1183 clamping_strategy = MonthClampingStrategy() 1184 end_date_provider = ClampingEndProvider( 1185 MonthClampingStrategy(is_ceiling=False), 1186 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1187 granularity=cursor_granularity or datetime.timedelta(days=1), 1188 ) 1189 case _: 1190 raise ValueError( 1191 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1192 ) 1193 1194 return ConcurrentCursor( 1195 stream_name=stream_name, 1196 stream_namespace=stream_namespace, 1197 stream_state=stream_state, 1198 message_repository=message_repository or self._message_repository, 1199 connector_state_manager=self._connector_state_manager, 1200 connector_state_converter=connector_state_converter, 1201 cursor_field=cursor_field, 1202 slice_boundary_fields=slice_boundary_fields, 1203 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1204 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1205 lookback_window=lookback_window, 1206 slice_range=step_length, 1207 cursor_granularity=cursor_granularity, 1208 clamping_strategy=clamping_strategy, 1209 ) 1210 1211 def create_concurrent_cursor_from_incrementing_count_cursor( 1212 self, 1213 model_type: Type[BaseModel], 1214 component_definition: ComponentDefinition, 1215 stream_name: str, 1216 stream_namespace: Optional[str], 1217 config: Config, 1218 message_repository: Optional[MessageRepository] = None, 1219 **kwargs: Any, 1220 ) -> ConcurrentCursor: 1221 # Per-partition incremental streams can dynamically create child cursors which will pass their current 1222 # state via the stream_state keyword argument. Incremental syncs without parent streams use the 1223 # incoming state and connector_state_manager that is initialized when the component factory is created 1224 stream_state = ( 1225 self._connector_state_manager.get_stream_state(stream_name, stream_namespace) 1226 if "stream_state" not in kwargs 1227 else kwargs["stream_state"] 1228 ) 1229 1230 component_type = component_definition.get("type") 1231 if component_definition.get("type") != model_type.__name__: 1232 raise ValueError( 1233 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1234 ) 1235 1236 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1237 1238 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1239 raise ValueError( 1240 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1241 ) 1242 1243 interpolated_start_value = ( 1244 InterpolatedString.create( 1245 incrementing_count_cursor_model.start_value, # type: ignore 1246 parameters=incrementing_count_cursor_model.parameters or {}, 1247 ) 1248 if incrementing_count_cursor_model.start_value 1249 else 0 1250 ) 1251 1252 interpolated_cursor_field = InterpolatedString.create( 1253 incrementing_count_cursor_model.cursor_field, 1254 parameters=incrementing_count_cursor_model.parameters or {}, 1255 ) 1256 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1257 1258 connector_state_converter = IncrementingCountStreamStateConverter( 1259 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1260 ) 1261 1262 return ConcurrentCursor( 1263 stream_name=stream_name, 1264 stream_namespace=stream_namespace, 1265 stream_state=stream_state, 1266 message_repository=message_repository or self._message_repository, 1267 connector_state_manager=self._connector_state_manager, 1268 connector_state_converter=connector_state_converter, 1269 cursor_field=cursor_field, 1270 slice_boundary_fields=None, 1271 start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1272 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1273 ) 1274 1275 def _assemble_weekday(self, weekday: str) -> Weekday: 1276 match weekday: 1277 case "MONDAY": 1278 return Weekday.MONDAY 1279 case "TUESDAY": 1280 return Weekday.TUESDAY 1281 case "WEDNESDAY": 1282 return Weekday.WEDNESDAY 1283 case "THURSDAY": 1284 return Weekday.THURSDAY 1285 case "FRIDAY": 1286 return Weekday.FRIDAY 1287 case "SATURDAY": 1288 return Weekday.SATURDAY 1289 case "SUNDAY": 1290 return Weekday.SUNDAY 1291 case _: 1292 raise ValueError(f"Unknown weekday {weekday}") 1293 1294 def create_concurrent_cursor_from_perpartition_cursor( 1295 self, 1296 state_manager: ConnectorStateManager, 1297 model_type: Type[BaseModel], 1298 component_definition: ComponentDefinition, 1299 stream_name: str, 1300 stream_namespace: Optional[str], 1301 config: Config, 1302 stream_state: MutableMapping[str, Any], 1303 partition_router: PartitionRouter, 1304 stream_state_migrations: Optional[List[Any]] = None, 1305 **kwargs: Any, 1306 ) -> ConcurrentPerPartitionCursor: 1307 component_type = component_definition.get("type") 1308 if component_definition.get("type") != model_type.__name__: 1309 raise ValueError( 1310 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1311 ) 1312 1313 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1314 1315 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1316 raise ValueError( 1317 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1318 ) 1319 1320 interpolated_cursor_field = InterpolatedString.create( 1321 datetime_based_cursor_model.cursor_field, 1322 parameters=datetime_based_cursor_model.parameters or {}, 1323 ) 1324 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1325 1326 datetime_format = datetime_based_cursor_model.datetime_format 1327 1328 cursor_granularity = ( 1329 parse_duration(datetime_based_cursor_model.cursor_granularity) 1330 if datetime_based_cursor_model.cursor_granularity 1331 else None 1332 ) 1333 1334 connector_state_converter: DateTimeStreamStateConverter 1335 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1336 datetime_format=datetime_format, 1337 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1338 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1339 cursor_granularity=cursor_granularity, 1340 ) 1341 1342 # Create the cursor factory 1343 cursor_factory = ConcurrentCursorFactory( 1344 partial( 1345 self.create_concurrent_cursor_from_datetime_based_cursor, 1346 state_manager=state_manager, 1347 model_type=model_type, 1348 component_definition=component_definition, 1349 stream_name=stream_name, 1350 stream_namespace=stream_namespace, 1351 config=config, 1352 message_repository=NoopMessageRepository(), 1353 stream_state_migrations=stream_state_migrations, 1354 ) 1355 ) 1356 stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state) 1357 1358 # Return the concurrent cursor and state converter 1359 return ConcurrentPerPartitionCursor( 1360 cursor_factory=cursor_factory, 1361 partition_router=partition_router, 1362 stream_name=stream_name, 1363 stream_namespace=stream_namespace, 1364 stream_state=stream_state, 1365 message_repository=self._message_repository, # type: ignore 1366 connector_state_manager=state_manager, 1367 connector_state_converter=connector_state_converter, 1368 cursor_field=cursor_field, 1369 ) 1370 1371 @staticmethod 1372 def create_constant_backoff_strategy( 1373 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1374 ) -> ConstantBackoffStrategy: 1375 return ConstantBackoffStrategy( 1376 backoff_time_in_seconds=model.backoff_time_in_seconds, 1377 config=config, 1378 parameters=model.parameters or {}, 1379 ) 1380 1381 def create_cursor_pagination( 1382 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1383 ) -> CursorPaginationStrategy: 1384 if isinstance(decoder, PaginationDecoderDecorator): 1385 inner_decoder = decoder.decoder 1386 else: 1387 inner_decoder = decoder 1388 decoder = PaginationDecoderDecorator(decoder=decoder) 1389 1390 if self._is_supported_decoder_for_pagination(inner_decoder): 1391 decoder_to_use = decoder 1392 else: 1393 raise ValueError( 1394 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1395 ) 1396 1397 return CursorPaginationStrategy( 1398 cursor_value=model.cursor_value, 1399 decoder=decoder_to_use, 1400 page_size=model.page_size, 1401 stop_condition=model.stop_condition, 1402 config=config, 1403 parameters=model.parameters or {}, 1404 ) 1405 1406 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1407 """ 1408 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1409 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1410 :param model: The Pydantic model of the custom component being created 1411 :param config: The custom defined connector config 1412 :return: The declarative component built from the Pydantic model to be used at runtime 1413 """ 1414 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1415 component_fields = get_type_hints(custom_component_class) 1416 model_args = model.dict() 1417 model_args["config"] = config 1418 1419 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1420 # we defer to these arguments over the component's definition 1421 for key, arg in kwargs.items(): 1422 model_args[key] = arg 1423 1424 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1425 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1426 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1427 for model_field, model_value in model_args.items(): 1428 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1429 if ( 1430 isinstance(model_value, dict) 1431 and "type" not in model_value 1432 and model_field in component_fields 1433 ): 1434 derived_type = self._derive_component_type_from_type_hints( 1435 component_fields.get(model_field) 1436 ) 1437 if derived_type: 1438 model_value["type"] = derived_type 1439 1440 if self._is_component(model_value): 1441 model_args[model_field] = self._create_nested_component( 1442 model, model_field, model_value, config 1443 ) 1444 elif isinstance(model_value, list): 1445 vals = [] 1446 for v in model_value: 1447 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1448 derived_type = self._derive_component_type_from_type_hints( 1449 component_fields.get(model_field) 1450 ) 1451 if derived_type: 1452 v["type"] = derived_type 1453 if self._is_component(v): 1454 vals.append(self._create_nested_component(model, model_field, v, config)) 1455 else: 1456 vals.append(v) 1457 model_args[model_field] = vals 1458 1459 kwargs = { 1460 class_field: model_args[class_field] 1461 for class_field in component_fields.keys() 1462 if class_field in model_args 1463 } 1464 return custom_component_class(**kwargs) 1465 1466 @staticmethod 1467 def _get_class_from_fully_qualified_class_name( 1468 full_qualified_class_name: str, 1469 ) -> Any: 1470 """Get a class from its fully qualified name. 1471 1472 If a custom components module is needed, we assume it is already registered - probably 1473 as `source_declarative_manifest.components` or `components`. 1474 1475 Args: 1476 full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). 1477 1478 Returns: 1479 Any: The class object. 1480 1481 Raises: 1482 ValueError: If the class cannot be loaded. 1483 """ 1484 split = full_qualified_class_name.split(".") 1485 module_name_full = ".".join(split[:-1]) 1486 class_name = split[-1] 1487 1488 try: 1489 module_ref = importlib.import_module(module_name_full) 1490 except ModuleNotFoundError as e: 1491 if split[0] == "source_declarative_manifest": 1492 # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append 1493 try: 1494 import os 1495 1496 module_name_with_source_declarative_manifest = ".".join(split[1:-1]) 1497 module_ref = importlib.import_module( 1498 module_name_with_source_declarative_manifest 1499 ) 1500 except ModuleNotFoundError: 1501 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1502 else: 1503 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1504 1505 try: 1506 return getattr(module_ref, class_name) 1507 except AttributeError as e: 1508 raise ValueError( 1509 f"Could not load class `{class_name}` from module `{module_name_full}`.", 1510 ) from e 1511 1512 @staticmethod 1513 def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: 1514 interface = field_type 1515 while True: 1516 origin = get_origin(interface) 1517 if origin: 1518 # Unnest types until we reach the raw type 1519 # List[T] -> T 1520 # Optional[List[T]] -> T 1521 args = get_args(interface) 1522 interface = args[0] 1523 else: 1524 break 1525 if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface): 1526 return interface.__name__ 1527 return None 1528 1529 @staticmethod 1530 def is_builtin_type(cls: Optional[Type[Any]]) -> bool: 1531 if not cls: 1532 return False 1533 return cls.__module__ == "builtins" 1534 1535 @staticmethod 1536 def _extract_missing_parameters(error: TypeError) -> List[str]: 1537 parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error)) 1538 if parameter_search: 1539 return re.findall(r"\'(.+?)\'", parameter_search.group(1)) 1540 else: 1541 return [] 1542 1543 def _create_nested_component( 1544 self, model: Any, model_field: str, model_value: Any, config: Config 1545 ) -> Any: 1546 type_name = model_value.get("type", None) 1547 if not type_name: 1548 # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent 1549 return model_value 1550 1551 model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None) 1552 if model_type: 1553 parsed_model = model_type.parse_obj(model_value) 1554 try: 1555 # To improve usability of the language, certain fields are shared between components. This can come in the form of 1556 # a parent component passing some of its fields to a child component or the parent extracting fields from other child 1557 # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base 1558 # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created 1559 # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that 1560 # are needed by a component and could not be shared. 1561 model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__) 1562 constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs 1563 model_parameters = model_value.get("$parameters", {}) 1564 matching_parameters = { 1565 kwarg: model_parameters[kwarg] 1566 for kwarg in constructor_kwargs 1567 if kwarg in model_parameters 1568 } 1569 return self._create_component_from_model( 1570 model=parsed_model, config=config, **matching_parameters 1571 ) 1572 except TypeError as error: 1573 missing_parameters = self._extract_missing_parameters(error) 1574 if missing_parameters: 1575 raise ValueError( 1576 f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " 1577 + ", ".join( 1578 ( 1579 f"{type_name}.$parameters.{parameter}" 1580 for parameter in missing_parameters 1581 ) 1582 ) 1583 ) 1584 raise TypeError( 1585 f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" 1586 ) 1587 else: 1588 raise ValueError( 1589 f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" 1590 ) 1591 1592 @staticmethod 1593 def _is_component(model_value: Any) -> bool: 1594 return isinstance(model_value, dict) and model_value.get("type") is not None 1595 1596 def create_datetime_based_cursor( 1597 self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any 1598 ) -> DatetimeBasedCursor: 1599 start_datetime: Union[str, MinMaxDatetime] = ( 1600 model.start_datetime 1601 if isinstance(model.start_datetime, str) 1602 else self.create_min_max_datetime(model.start_datetime, config) 1603 ) 1604 end_datetime: Union[str, MinMaxDatetime, None] = None 1605 if model.is_data_feed and model.end_datetime: 1606 raise ValueError("Data feed does not support end_datetime") 1607 if model.is_data_feed and model.is_client_side_incremental: 1608 raise ValueError( 1609 "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them." 1610 ) 1611 if model.end_datetime: 1612 end_datetime = ( 1613 model.end_datetime 1614 if isinstance(model.end_datetime, str) 1615 else self.create_min_max_datetime(model.end_datetime, config) 1616 ) 1617 1618 end_time_option = ( 1619 self._create_component_from_model( 1620 model.end_time_option, config, parameters=model.parameters or {} 1621 ) 1622 if model.end_time_option 1623 else None 1624 ) 1625 start_time_option = ( 1626 self._create_component_from_model( 1627 model.start_time_option, config, parameters=model.parameters or {} 1628 ) 1629 if model.start_time_option 1630 else None 1631 ) 1632 1633 return DatetimeBasedCursor( 1634 cursor_field=model.cursor_field, 1635 cursor_datetime_formats=model.cursor_datetime_formats 1636 if model.cursor_datetime_formats 1637 else [], 1638 cursor_granularity=model.cursor_granularity, 1639 datetime_format=model.datetime_format, 1640 end_datetime=end_datetime, 1641 start_datetime=start_datetime, 1642 step=model.step, 1643 end_time_option=end_time_option, 1644 lookback_window=model.lookback_window, 1645 start_time_option=start_time_option, 1646 partition_field_end=model.partition_field_end, 1647 partition_field_start=model.partition_field_start, 1648 message_repository=self._message_repository, 1649 is_compare_strictly=model.is_compare_strictly, 1650 config=config, 1651 parameters=model.parameters or {}, 1652 ) 1653 1654 def create_declarative_stream( 1655 self, model: DeclarativeStreamModel, config: Config, **kwargs: Any 1656 ) -> DeclarativeStream: 1657 # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field 1658 # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the 1659 # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in 1660 # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one. 1661 combined_slicers = self._merge_stream_slicers(model=model, config=config) 1662 1663 primary_key = model.primary_key.__root__ if model.primary_key else None 1664 stop_condition_on_cursor = ( 1665 model.incremental_sync 1666 and hasattr(model.incremental_sync, "is_data_feed") 1667 and model.incremental_sync.is_data_feed 1668 ) 1669 client_side_incremental_sync = None 1670 if ( 1671 model.incremental_sync 1672 and hasattr(model.incremental_sync, "is_client_side_incremental") 1673 and model.incremental_sync.is_client_side_incremental 1674 ): 1675 supported_slicers = ( 1676 DatetimeBasedCursor, 1677 GlobalSubstreamCursor, 1678 PerPartitionWithGlobalCursor, 1679 ) 1680 if combined_slicers and not isinstance(combined_slicers, supported_slicers): 1681 raise ValueError( 1682 "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead" 1683 ) 1684 cursor = ( 1685 combined_slicers 1686 if isinstance( 1687 combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor) 1688 ) 1689 else self._create_component_from_model(model=model.incremental_sync, config=config) 1690 ) 1691 1692 client_side_incremental_sync = {"cursor": cursor} 1693 1694 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1695 cursor_model = model.incremental_sync 1696 1697 end_time_option = ( 1698 self._create_component_from_model( 1699 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 1700 ) 1701 if cursor_model.end_time_option 1702 else None 1703 ) 1704 start_time_option = ( 1705 self._create_component_from_model( 1706 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 1707 ) 1708 if cursor_model.start_time_option 1709 else None 1710 ) 1711 1712 request_options_provider = DatetimeBasedRequestOptionsProvider( 1713 start_time_option=start_time_option, 1714 end_time_option=end_time_option, 1715 partition_field_start=cursor_model.partition_field_end, 1716 partition_field_end=cursor_model.partition_field_end, 1717 config=config, 1718 parameters=model.parameters or {}, 1719 ) 1720 elif model.incremental_sync and isinstance( 1721 model.incremental_sync, IncrementingCountCursorModel 1722 ): 1723 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 1724 1725 start_time_option = ( 1726 self._create_component_from_model( 1727 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 1728 config, 1729 parameters=cursor_model.parameters or {}, 1730 ) 1731 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 1732 else None 1733 ) 1734 1735 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 1736 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 1737 partition_field_start = "start" 1738 1739 request_options_provider = DatetimeBasedRequestOptionsProvider( 1740 start_time_option=start_time_option, 1741 partition_field_start=partition_field_start, 1742 config=config, 1743 parameters=model.parameters or {}, 1744 ) 1745 else: 1746 request_options_provider = None 1747 1748 transformations = [] 1749 if model.transformations: 1750 for transformation_model in model.transformations: 1751 transformations.append( 1752 self._create_component_from_model(model=transformation_model, config=config) 1753 ) 1754 1755 retriever = self._create_component_from_model( 1756 model=model.retriever, 1757 config=config, 1758 name=model.name, 1759 primary_key=primary_key, 1760 stream_slicer=combined_slicers, 1761 request_options_provider=request_options_provider, 1762 stop_condition_on_cursor=stop_condition_on_cursor, 1763 client_side_incremental_sync=client_side_incremental_sync, 1764 transformations=transformations, 1765 incremental_sync=model.incremental_sync, 1766 ) 1767 cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None 1768 1769 if model.state_migrations: 1770 state_transformations = [ 1771 self._create_component_from_model(state_migration, config, declarative_stream=model) 1772 for state_migration in model.state_migrations 1773 ] 1774 else: 1775 state_transformations = [] 1776 1777 if model.schema_loader: 1778 schema_loader = self._create_component_from_model( 1779 model=model.schema_loader, config=config 1780 ) 1781 else: 1782 options = model.parameters or {} 1783 if "name" not in options: 1784 options["name"] = model.name 1785 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 1786 1787 return DeclarativeStream( 1788 name=model.name or "", 1789 primary_key=primary_key, 1790 retriever=retriever, 1791 schema_loader=schema_loader, 1792 stream_cursor_field=cursor_field or "", 1793 state_migrations=state_transformations, 1794 config=config, 1795 parameters=model.parameters or {}, 1796 ) 1797 1798 def _build_stream_slicer_from_partition_router( 1799 self, 1800 model: Union[ 1801 AsyncRetrieverModel, 1802 CustomRetrieverModel, 1803 SimpleRetrieverModel, 1804 ], 1805 config: Config, 1806 stream_name: Optional[str] = None, 1807 ) -> Optional[PartitionRouter]: 1808 if ( 1809 hasattr(model, "partition_router") 1810 and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel) 1811 and model.partition_router 1812 ): 1813 stream_slicer_model = model.partition_router 1814 if isinstance(stream_slicer_model, list): 1815 return CartesianProductStreamSlicer( 1816 [ 1817 self._create_component_from_model( 1818 model=slicer, config=config, stream_name=stream_name or "" 1819 ) 1820 for slicer in stream_slicer_model 1821 ], 1822 parameters={}, 1823 ) 1824 else: 1825 return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router 1826 model=stream_slicer_model, config=config, stream_name=stream_name or "" 1827 ) 1828 return None 1829 1830 def _build_incremental_cursor( 1831 self, 1832 model: DeclarativeStreamModel, 1833 stream_slicer: Optional[PartitionRouter], 1834 config: Config, 1835 ) -> Optional[StreamSlicer]: 1836 if model.incremental_sync and stream_slicer: 1837 if model.retriever.type == "AsyncRetriever": 1838 return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 1839 state_manager=self._connector_state_manager, 1840 model_type=DatetimeBasedCursorModel, 1841 component_definition=model.incremental_sync.__dict__, 1842 stream_name=model.name or "", 1843 stream_namespace=None, 1844 config=config or {}, 1845 stream_state={}, 1846 partition_router=stream_slicer, 1847 ) 1848 1849 incremental_sync_model = model.incremental_sync 1850 cursor_component = self._create_component_from_model( 1851 model=incremental_sync_model, config=config 1852 ) 1853 is_global_cursor = ( 1854 hasattr(incremental_sync_model, "global_substream_cursor") 1855 and incremental_sync_model.global_substream_cursor 1856 ) 1857 1858 if is_global_cursor: 1859 return GlobalSubstreamCursor( 1860 stream_cursor=cursor_component, partition_router=stream_slicer 1861 ) 1862 return PerPartitionWithGlobalCursor( 1863 cursor_factory=CursorFactory( 1864 lambda: self._create_component_from_model( 1865 model=incremental_sync_model, config=config 1866 ), 1867 ), 1868 partition_router=stream_slicer, 1869 stream_cursor=cursor_component, 1870 ) 1871 elif model.incremental_sync: 1872 if model.retriever.type == "AsyncRetriever": 1873 return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 1874 model_type=DatetimeBasedCursorModel, 1875 component_definition=model.incremental_sync.__dict__, 1876 stream_name=model.name or "", 1877 stream_namespace=None, 1878 config=config or {}, 1879 stream_state_migrations=model.state_migrations, 1880 ) 1881 return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync 1882 return None 1883 1884 def _build_resumable_cursor( 1885 self, 1886 model: Union[ 1887 AsyncRetrieverModel, 1888 CustomRetrieverModel, 1889 SimpleRetrieverModel, 1890 ], 1891 stream_slicer: Optional[PartitionRouter], 1892 ) -> Optional[StreamSlicer]: 1893 if hasattr(model, "paginator") and model.paginator and not stream_slicer: 1894 # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor` 1895 return ResumableFullRefreshCursor(parameters={}) 1896 elif stream_slicer: 1897 # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor` 1898 return PerPartitionCursor( 1899 cursor_factory=CursorFactory( 1900 create_function=partial(ChildPartitionResumableFullRefreshCursor, {}) 1901 ), 1902 partition_router=stream_slicer, 1903 ) 1904 return None 1905 1906 def _merge_stream_slicers( 1907 self, model: DeclarativeStreamModel, config: Config 1908 ) -> Optional[StreamSlicer]: 1909 retriever_model = model.retriever 1910 1911 stream_slicer = self._build_stream_slicer_from_partition_router( 1912 retriever_model, config, stream_name=model.name 1913 ) 1914 1915 if retriever_model.type == "AsyncRetriever": 1916 is_not_datetime_cursor = ( 1917 model.incremental_sync.type != "DatetimeBasedCursor" 1918 if model.incremental_sync 1919 else None 1920 ) 1921 is_partition_router = ( 1922 bool(retriever_model.partition_router) if model.incremental_sync else None 1923 ) 1924 1925 if is_not_datetime_cursor: 1926 # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the 1927 # support or unordered slices (for example, when we trigger reports for January and February, the report 1928 # in February can be completed first). Once we have support for custom concurrent cursor or have a new 1929 # implementation available in the CDK, we can enable more cursors here. 1930 raise ValueError( 1931 "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet." 1932 ) 1933 1934 if is_partition_router and not stream_slicer: 1935 # Note that this development is also done in parallel to the per partition development which once merged 1936 # we could support here by calling create_concurrent_cursor_from_perpartition_cursor 1937 raise ValueError("Per partition state is not supported yet for AsyncRetriever.") 1938 1939 if model.incremental_sync: 1940 return self._build_incremental_cursor(model, stream_slicer, config) 1941 1942 return ( 1943 stream_slicer 1944 if self._disable_resumable_full_refresh 1945 else self._build_resumable_cursor(retriever_model, stream_slicer) 1946 ) 1947 1948 def create_default_error_handler( 1949 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 1950 ) -> DefaultErrorHandler: 1951 backoff_strategies = [] 1952 if model.backoff_strategies: 1953 for backoff_strategy_model in model.backoff_strategies: 1954 backoff_strategies.append( 1955 self._create_component_from_model(model=backoff_strategy_model, config=config) 1956 ) 1957 1958 response_filters = [] 1959 if model.response_filters: 1960 for response_filter_model in model.response_filters: 1961 response_filters.append( 1962 self._create_component_from_model(model=response_filter_model, config=config) 1963 ) 1964 response_filters.append( 1965 HttpResponseFilter(config=config, parameters=model.parameters or {}) 1966 ) 1967 1968 return DefaultErrorHandler( 1969 backoff_strategies=backoff_strategies, 1970 max_retries=model.max_retries, 1971 response_filters=response_filters, 1972 config=config, 1973 parameters=model.parameters or {}, 1974 ) 1975 1976 def create_default_paginator( 1977 self, 1978 model: DefaultPaginatorModel, 1979 config: Config, 1980 *, 1981 url_base: str, 1982 decoder: Optional[Decoder] = None, 1983 cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None, 1984 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 1985 if decoder: 1986 if self._is_supported_decoder_for_pagination(decoder): 1987 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 1988 else: 1989 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 1990 else: 1991 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 1992 page_size_option = ( 1993 self._create_component_from_model(model=model.page_size_option, config=config) 1994 if model.page_size_option 1995 else None 1996 ) 1997 page_token_option = ( 1998 self._create_component_from_model(model=model.page_token_option, config=config) 1999 if model.page_token_option 2000 else None 2001 ) 2002 pagination_strategy = self._create_component_from_model( 2003 model=model.pagination_strategy, config=config, decoder=decoder_to_use 2004 ) 2005 if cursor_used_for_stop_condition: 2006 pagination_strategy = StopConditionPaginationStrategyDecorator( 2007 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2008 ) 2009 paginator = DefaultPaginator( 2010 decoder=decoder_to_use, 2011 page_size_option=page_size_option, 2012 page_token_option=page_token_option, 2013 pagination_strategy=pagination_strategy, 2014 url_base=url_base, 2015 config=config, 2016 parameters=model.parameters or {}, 2017 ) 2018 if self._limit_pages_fetched_per_slice: 2019 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2020 return paginator 2021 2022 def create_dpath_extractor( 2023 self, 2024 model: DpathExtractorModel, 2025 config: Config, 2026 decoder: Optional[Decoder] = None, 2027 **kwargs: Any, 2028 ) -> DpathExtractor: 2029 if decoder: 2030 decoder_to_use = decoder 2031 else: 2032 decoder_to_use = JsonDecoder(parameters={}) 2033 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2034 return DpathExtractor( 2035 decoder=decoder_to_use, 2036 field_path=model_field_path, 2037 config=config, 2038 parameters=model.parameters or {}, 2039 ) 2040 2041 def create_response_to_file_extractor( 2042 self, 2043 model: ResponseToFileExtractorModel, 2044 **kwargs: Any, 2045 ) -> ResponseToFileExtractor: 2046 return ResponseToFileExtractor(parameters=model.parameters or {}) 2047 2048 @staticmethod 2049 def create_exponential_backoff_strategy( 2050 model: ExponentialBackoffStrategyModel, config: Config 2051 ) -> ExponentialBackoffStrategy: 2052 return ExponentialBackoffStrategy( 2053 factor=model.factor or 5, parameters=model.parameters or {}, config=config 2054 ) 2055 2056 def create_http_requester( 2057 self, 2058 model: HttpRequesterModel, 2059 config: Config, 2060 decoder: Decoder = JsonDecoder(parameters={}), 2061 *, 2062 name: str, 2063 ) -> HttpRequester: 2064 authenticator = ( 2065 self._create_component_from_model( 2066 model=model.authenticator, 2067 config=config, 2068 url_base=model.url_base, 2069 name=name, 2070 decoder=decoder, 2071 ) 2072 if model.authenticator 2073 else None 2074 ) 2075 error_handler = ( 2076 self._create_component_from_model(model=model.error_handler, config=config) 2077 if model.error_handler 2078 else DefaultErrorHandler( 2079 backoff_strategies=[], 2080 response_filters=[], 2081 config=config, 2082 parameters=model.parameters or {}, 2083 ) 2084 ) 2085 2086 api_budget = self._api_budget 2087 2088 request_options_provider = InterpolatedRequestOptionsProvider( 2089 request_body_data=model.request_body_data, 2090 request_body_json=model.request_body_json, 2091 request_headers=model.request_headers, 2092 request_parameters=model.request_parameters, 2093 config=config, 2094 parameters=model.parameters or {}, 2095 ) 2096 2097 assert model.use_cache is not None # for mypy 2098 assert model.http_method is not None # for mypy 2099 2100 use_cache = model.use_cache and not self._disable_cache 2101 2102 return HttpRequester( 2103 name=name, 2104 url_base=model.url_base, 2105 path=model.path, 2106 authenticator=authenticator, 2107 error_handler=error_handler, 2108 api_budget=api_budget, 2109 http_method=HttpMethod[model.http_method.value], 2110 request_options_provider=request_options_provider, 2111 config=config, 2112 disable_retries=self._disable_retries, 2113 parameters=model.parameters or {}, 2114 message_repository=self._message_repository, 2115 use_cache=use_cache, 2116 decoder=decoder, 2117 stream_response=decoder.is_stream_response() if decoder else False, 2118 ) 2119 2120 @staticmethod 2121 def create_http_response_filter( 2122 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2123 ) -> HttpResponseFilter: 2124 if model.action: 2125 action = ResponseAction(model.action.value) 2126 else: 2127 action = None 2128 2129 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2130 2131 http_codes = ( 2132 set(model.http_codes) if model.http_codes else set() 2133 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2134 2135 return HttpResponseFilter( 2136 action=action, 2137 failure_type=failure_type, 2138 error_message=model.error_message or "", 2139 error_message_contains=model.error_message_contains or "", 2140 http_codes=http_codes, 2141 predicate=model.predicate or "", 2142 config=config, 2143 parameters=model.parameters or {}, 2144 ) 2145 2146 @staticmethod 2147 def create_inline_schema_loader( 2148 model: InlineSchemaLoaderModel, config: Config, **kwargs: Any 2149 ) -> InlineSchemaLoader: 2150 return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) 2151 2152 def create_complex_field_type( 2153 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2154 ) -> ComplexFieldType: 2155 items = ( 2156 self._create_component_from_model(model=model.items, config=config) 2157 if isinstance(model.items, ComplexFieldTypeModel) 2158 else model.items 2159 ) 2160 2161 return ComplexFieldType(field_type=model.field_type, items=items) 2162 2163 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2164 target_type = ( 2165 self._create_component_from_model(model=model.target_type, config=config) 2166 if isinstance(model.target_type, ComplexFieldTypeModel) 2167 else model.target_type 2168 ) 2169 2170 return TypesMap( 2171 target_type=target_type, 2172 current_type=model.current_type, 2173 condition=model.condition if model.condition is not None else "True", 2174 ) 2175 2176 def create_schema_type_identifier( 2177 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2178 ) -> SchemaTypeIdentifier: 2179 types_mapping = [] 2180 if model.types_mapping: 2181 types_mapping.extend( 2182 [ 2183 self._create_component_from_model(types_map, config=config) 2184 for types_map in model.types_mapping 2185 ] 2186 ) 2187 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2188 [x for x in model.schema_pointer] if model.schema_pointer else [] 2189 ) 2190 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2191 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2192 [x for x in model.type_pointer] if model.type_pointer else None 2193 ) 2194 2195 return SchemaTypeIdentifier( 2196 schema_pointer=model_schema_pointer, 2197 key_pointer=model_key_pointer, 2198 type_pointer=model_type_pointer, 2199 types_mapping=types_mapping, 2200 parameters=model.parameters or {}, 2201 ) 2202 2203 def create_dynamic_schema_loader( 2204 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2205 ) -> DynamicSchemaLoader: 2206 stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config) 2207 combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer) 2208 2209 schema_transformations = [] 2210 if model.schema_transformations: 2211 for transformation_model in model.schema_transformations: 2212 schema_transformations.append( 2213 self._create_component_from_model(model=transformation_model, config=config) 2214 ) 2215 2216 retriever = self._create_component_from_model( 2217 model=model.retriever, 2218 config=config, 2219 name="", 2220 primary_key=None, 2221 stream_slicer=combined_slicers, 2222 transformations=[], 2223 ) 2224 schema_type_identifier = self._create_component_from_model( 2225 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2226 ) 2227 return DynamicSchemaLoader( 2228 retriever=retriever, 2229 config=config, 2230 schema_transformations=schema_transformations, 2231 schema_type_identifier=schema_type_identifier, 2232 parameters=model.parameters or {}, 2233 ) 2234 2235 @staticmethod 2236 def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2237 return JsonDecoder(parameters={}) 2238 2239 def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2240 return CompositeRawDecoder( 2241 parser=ModelToComponentFactory._get_parser(model, config), 2242 stream_response=False if self._emit_connector_builder_messages else True, 2243 ) 2244 2245 def create_jsonl_decoder( 2246 self, model: JsonlDecoderModel, config: Config, **kwargs: Any 2247 ) -> Decoder: 2248 return CompositeRawDecoder( 2249 parser=ModelToComponentFactory._get_parser(model, config), 2250 stream_response=False if self._emit_connector_builder_messages else True, 2251 ) 2252 2253 def create_gzip_decoder( 2254 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2255 ) -> Decoder: 2256 _compressed_response_types = { 2257 "gzip", 2258 "x-gzip", 2259 "gzip, deflate", 2260 "x-gzip, deflate", 2261 "application/zip", 2262 "application/gzip", 2263 "application/x-gzip", 2264 "application/x-zip-compressed", 2265 } 2266 2267 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2268 2269 if self._emit_connector_builder_messages: 2270 # This is very surprising but if the response is not streamed, 2271 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2272 # which uses urllib3 directly and does not uncompress the data. 2273 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2274 2275 return CompositeRawDecoder.by_headers( 2276 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2277 stream_response=True, 2278 fallback_parser=gzip_parser.inner_parser, 2279 ) 2280 2281 @staticmethod 2282 def create_incrementing_count_cursor( 2283 model: IncrementingCountCursorModel, config: Config, **kwargs: Any 2284 ) -> DatetimeBasedCursor: 2285 # This should not actually get used anywhere at runtime, but needed to add this to pass checks since 2286 # we still parse models into components. The issue is that there's no runtime implementation of a 2287 # IncrementingCountCursor. 2288 # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor. 2289 return DatetimeBasedCursor( 2290 cursor_field=model.cursor_field, 2291 datetime_format="%Y-%m-%d", 2292 start_datetime="2024-12-12", 2293 config=config, 2294 parameters={}, 2295 ) 2296 2297 @staticmethod 2298 def create_iterable_decoder( 2299 model: IterableDecoderModel, config: Config, **kwargs: Any 2300 ) -> IterableDecoder: 2301 return IterableDecoder(parameters={}) 2302 2303 @staticmethod 2304 def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder: 2305 return XmlDecoder(parameters={}) 2306 2307 def create_zipfile_decoder( 2308 self, model: ZipfileDecoderModel, config: Config, **kwargs: Any 2309 ) -> ZipfileDecoder: 2310 return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config)) 2311 2312 @staticmethod 2313 def _get_parser(model: BaseModel, config: Config) -> Parser: 2314 if isinstance(model, JsonDecoderModel): 2315 # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases 2316 return JsonParser() 2317 elif isinstance(model, JsonlDecoderModel): 2318 return JsonLineParser() 2319 elif isinstance(model, CsvDecoderModel): 2320 return CsvParser(encoding=model.encoding, delimiter=model.delimiter) 2321 elif isinstance(model, GzipDecoderModel): 2322 return GzipParser( 2323 inner_parser=ModelToComponentFactory._get_parser(model.decoder, config) 2324 ) 2325 elif isinstance( 2326 model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel) 2327 ): 2328 raise ValueError(f"Decoder type {model} does not have parser associated to it") 2329 2330 raise ValueError(f"Unknown decoder type {model}") 2331 2332 @staticmethod 2333 def create_json_file_schema_loader( 2334 model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any 2335 ) -> JsonFileSchemaLoader: 2336 return JsonFileSchemaLoader( 2337 file_path=model.file_path or "", config=config, parameters=model.parameters or {} 2338 ) 2339 2340 @staticmethod 2341 def create_jwt_authenticator( 2342 model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2343 ) -> JwtAuthenticator: 2344 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2345 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2346 return JwtAuthenticator( 2347 config=config, 2348 parameters=model.parameters or {}, 2349 algorithm=JwtAlgorithm(model.algorithm.value), 2350 secret_key=model.secret_key, 2351 base64_encode_secret_key=model.base64_encode_secret_key, 2352 token_duration=model.token_duration, 2353 header_prefix=model.header_prefix, 2354 kid=jwt_headers.kid, 2355 typ=jwt_headers.typ, 2356 cty=jwt_headers.cty, 2357 iss=jwt_payload.iss, 2358 sub=jwt_payload.sub, 2359 aud=jwt_payload.aud, 2360 additional_jwt_headers=model.additional_jwt_headers, 2361 additional_jwt_payload=model.additional_jwt_payload, 2362 ) 2363 2364 def create_list_partition_router( 2365 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2366 ) -> ListPartitionRouter: 2367 request_option = ( 2368 self._create_component_from_model(model.request_option, config) 2369 if model.request_option 2370 else None 2371 ) 2372 return ListPartitionRouter( 2373 cursor_field=model.cursor_field, 2374 request_option=request_option, 2375 values=model.values, 2376 config=config, 2377 parameters=model.parameters or {}, 2378 ) 2379 2380 @staticmethod 2381 def create_min_max_datetime( 2382 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2383 ) -> MinMaxDatetime: 2384 return MinMaxDatetime( 2385 datetime=model.datetime, 2386 datetime_format=model.datetime_format or "", 2387 max_datetime=model.max_datetime or "", 2388 min_datetime=model.min_datetime or "", 2389 parameters=model.parameters or {}, 2390 ) 2391 2392 @staticmethod 2393 def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: 2394 return NoAuth(parameters=model.parameters or {}) 2395 2396 @staticmethod 2397 def create_no_pagination( 2398 model: NoPaginationModel, config: Config, **kwargs: Any 2399 ) -> NoPagination: 2400 return NoPagination(parameters={}) 2401 2402 def create_oauth_authenticator( 2403 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2404 ) -> DeclarativeOauth2Authenticator: 2405 profile_assertion = ( 2406 self._create_component_from_model(model.profile_assertion, config=config) 2407 if model.profile_assertion 2408 else None 2409 ) 2410 2411 if model.refresh_token_updater: 2412 # ignore type error because fixing it would have a lot of dependencies, revisit later 2413 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2414 config, 2415 InterpolatedString.create( 2416 model.token_refresh_endpoint, # type: ignore 2417 parameters=model.parameters or {}, 2418 ).eval(config), 2419 access_token_name=InterpolatedString.create( 2420 model.access_token_name or "access_token", parameters=model.parameters or {} 2421 ).eval(config), 2422 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2423 expires_in_name=InterpolatedString.create( 2424 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2425 ).eval(config), 2426 client_id_name=InterpolatedString.create( 2427 model.client_id_name or "client_id", parameters=model.parameters or {} 2428 ).eval(config), 2429 client_id=InterpolatedString.create( 2430 model.client_id, parameters=model.parameters or {} 2431 ).eval(config) 2432 if model.client_id 2433 else model.client_id, 2434 client_secret_name=InterpolatedString.create( 2435 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2436 ).eval(config), 2437 client_secret=InterpolatedString.create( 2438 model.client_secret, parameters=model.parameters or {} 2439 ).eval(config) 2440 if model.client_secret 2441 else model.client_secret, 2442 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2443 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2444 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2445 grant_type_name=InterpolatedString.create( 2446 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2447 ).eval(config), 2448 grant_type=InterpolatedString.create( 2449 model.grant_type or "refresh_token", parameters=model.parameters or {} 2450 ).eval(config), 2451 refresh_request_body=InterpolatedMapping( 2452 model.refresh_request_body or {}, parameters=model.parameters or {} 2453 ).eval(config), 2454 refresh_request_headers=InterpolatedMapping( 2455 model.refresh_request_headers or {}, parameters=model.parameters or {} 2456 ).eval(config), 2457 scopes=model.scopes, 2458 token_expiry_date_format=model.token_expiry_date_format, 2459 message_repository=self._message_repository, 2460 refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes, 2461 refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key, 2462 refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values, 2463 ) 2464 # ignore type error because fixing it would have a lot of dependencies, revisit later 2465 return DeclarativeOauth2Authenticator( # type: ignore 2466 access_token_name=model.access_token_name or "access_token", 2467 access_token_value=model.access_token_value, 2468 client_id_name=model.client_id_name or "client_id", 2469 client_id=model.client_id, 2470 client_secret_name=model.client_secret_name or "client_secret", 2471 client_secret=model.client_secret, 2472 expires_in_name=model.expires_in_name or "expires_in", 2473 grant_type_name=model.grant_type_name or "grant_type", 2474 grant_type=model.grant_type or "refresh_token", 2475 refresh_request_body=model.refresh_request_body, 2476 refresh_request_headers=model.refresh_request_headers, 2477 refresh_token_name=model.refresh_token_name or "refresh_token", 2478 refresh_token=model.refresh_token, 2479 scopes=model.scopes, 2480 token_expiry_date=model.token_expiry_date, 2481 token_expiry_date_format=model.token_expiry_date_format, 2482 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2483 token_refresh_endpoint=model.token_refresh_endpoint, 2484 config=config, 2485 parameters=model.parameters or {}, 2486 message_repository=self._message_repository, 2487 profile_assertion=profile_assertion, 2488 use_profile_assertion=model.use_profile_assertion, 2489 ) 2490 2491 def create_offset_increment( 2492 self, model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any 2493 ) -> OffsetIncrement: 2494 if isinstance(decoder, PaginationDecoderDecorator): 2495 inner_decoder = decoder.decoder 2496 else: 2497 inner_decoder = decoder 2498 decoder = PaginationDecoderDecorator(decoder=decoder) 2499 2500 if self._is_supported_decoder_for_pagination(inner_decoder): 2501 decoder_to_use = decoder 2502 else: 2503 raise ValueError( 2504 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2505 ) 2506 2507 return OffsetIncrement( 2508 page_size=model.page_size, 2509 config=config, 2510 decoder=decoder_to_use, 2511 inject_on_first_request=model.inject_on_first_request or False, 2512 parameters=model.parameters or {}, 2513 ) 2514 2515 @staticmethod 2516 def create_page_increment( 2517 model: PageIncrementModel, config: Config, **kwargs: Any 2518 ) -> PageIncrement: 2519 return PageIncrement( 2520 page_size=model.page_size, 2521 config=config, 2522 start_from_page=model.start_from_page or 0, 2523 inject_on_first_request=model.inject_on_first_request or False, 2524 parameters=model.parameters or {}, 2525 ) 2526 2527 def create_parent_stream_config( 2528 self, model: ParentStreamConfigModel, config: Config, **kwargs: Any 2529 ) -> ParentStreamConfig: 2530 declarative_stream = self._create_component_from_model( 2531 model.stream, config=config, **kwargs 2532 ) 2533 request_option = ( 2534 self._create_component_from_model(model.request_option, config=config) 2535 if model.request_option 2536 else None 2537 ) 2538 2539 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2540 raise ValueError( 2541 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2542 ) 2543 2544 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2545 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2546 ) 2547 2548 return ParentStreamConfig( 2549 parent_key=model.parent_key, 2550 request_option=request_option, 2551 stream=declarative_stream, 2552 partition_field=model.partition_field, 2553 config=config, 2554 incremental_dependency=model.incremental_dependency or False, 2555 parameters=model.parameters or {}, 2556 extra_fields=model.extra_fields, 2557 lazy_read_pointer=model_lazy_read_pointer, 2558 ) 2559 2560 @staticmethod 2561 def create_record_filter( 2562 model: RecordFilterModel, config: Config, **kwargs: Any 2563 ) -> RecordFilter: 2564 return RecordFilter( 2565 condition=model.condition or "", config=config, parameters=model.parameters or {} 2566 ) 2567 2568 @staticmethod 2569 def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: 2570 return RequestPath(parameters={}) 2571 2572 @staticmethod 2573 def create_request_option( 2574 model: RequestOptionModel, config: Config, **kwargs: Any 2575 ) -> RequestOption: 2576 inject_into = RequestOptionType(model.inject_into.value) 2577 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 2578 [ 2579 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 2580 for segment in model.field_path 2581 ] 2582 if model.field_path 2583 else None 2584 ) 2585 field_name = ( 2586 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 2587 if model.field_name 2588 else None 2589 ) 2590 return RequestOption( 2591 field_name=field_name, 2592 field_path=field_path, 2593 inject_into=inject_into, 2594 parameters=kwargs.get("parameters", {}), 2595 ) 2596 2597 def create_record_selector( 2598 self, 2599 model: RecordSelectorModel, 2600 config: Config, 2601 *, 2602 name: str, 2603 transformations: List[RecordTransformation] | None = None, 2604 decoder: Decoder | None = None, 2605 client_side_incremental_sync: Dict[str, Any] | None = None, 2606 **kwargs: Any, 2607 ) -> RecordSelector: 2608 extractor = self._create_component_from_model( 2609 model=model.extractor, decoder=decoder, config=config 2610 ) 2611 record_filter = ( 2612 self._create_component_from_model(model.record_filter, config=config) 2613 if model.record_filter 2614 else None 2615 ) 2616 2617 assert model.transform_before_filtering is not None # for mypy 2618 2619 transform_before_filtering = model.transform_before_filtering 2620 if client_side_incremental_sync: 2621 record_filter = ClientSideIncrementalRecordFilterDecorator( 2622 config=config, 2623 parameters=model.parameters, 2624 condition=model.record_filter.condition 2625 if (model.record_filter and hasattr(model.record_filter, "condition")) 2626 else None, 2627 **client_side_incremental_sync, 2628 ) 2629 transform_before_filtering = True 2630 2631 schema_normalization = ( 2632 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 2633 if isinstance(model.schema_normalization, SchemaNormalizationModel) 2634 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 2635 ) 2636 2637 return RecordSelector( 2638 extractor=extractor, 2639 name=name, 2640 config=config, 2641 record_filter=record_filter, 2642 transformations=transformations or [], 2643 schema_normalization=schema_normalization, 2644 parameters=model.parameters or {}, 2645 transform_before_filtering=transform_before_filtering, 2646 ) 2647 2648 @staticmethod 2649 def create_remove_fields( 2650 model: RemoveFieldsModel, config: Config, **kwargs: Any 2651 ) -> RemoveFields: 2652 return RemoveFields( 2653 field_pointers=model.field_pointers, condition=model.condition or "", parameters={} 2654 ) 2655 2656 def create_selective_authenticator( 2657 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 2658 ) -> DeclarativeAuthenticator: 2659 authenticators = { 2660 name: self._create_component_from_model(model=auth, config=config) 2661 for name, auth in model.authenticators.items() 2662 } 2663 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 2664 return SelectiveAuthenticator( # type: ignore[abstract] 2665 config=config, 2666 authenticators=authenticators, 2667 authenticator_selection_path=model.authenticator_selection_path, 2668 **kwargs, 2669 ) 2670 2671 @staticmethod 2672 def create_legacy_session_token_authenticator( 2673 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 2674 ) -> LegacySessionTokenAuthenticator: 2675 return LegacySessionTokenAuthenticator( 2676 api_url=url_base, 2677 header=model.header, 2678 login_url=model.login_url, 2679 password=model.password or "", 2680 session_token=model.session_token or "", 2681 session_token_response_key=model.session_token_response_key or "", 2682 username=model.username or "", 2683 validate_session_url=model.validate_session_url, 2684 config=config, 2685 parameters=model.parameters or {}, 2686 ) 2687 2688 def create_simple_retriever( 2689 self, 2690 model: SimpleRetrieverModel, 2691 config: Config, 2692 *, 2693 name: str, 2694 primary_key: Optional[Union[str, List[str], List[List[str]]]], 2695 stream_slicer: Optional[StreamSlicer], 2696 request_options_provider: Optional[RequestOptionsProvider] = None, 2697 stop_condition_on_cursor: bool = False, 2698 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 2699 transformations: List[RecordTransformation], 2700 incremental_sync: Optional[ 2701 Union[ 2702 IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel 2703 ] 2704 ] = None, 2705 **kwargs: Any, 2706 ) -> SimpleRetriever: 2707 decoder = ( 2708 self._create_component_from_model(model=model.decoder, config=config) 2709 if model.decoder 2710 else JsonDecoder(parameters={}) 2711 ) 2712 requester = self._create_component_from_model( 2713 model=model.requester, decoder=decoder, config=config, name=name 2714 ) 2715 record_selector = self._create_component_from_model( 2716 model=model.record_selector, 2717 name=name, 2718 config=config, 2719 decoder=decoder, 2720 transformations=transformations, 2721 client_side_incremental_sync=client_side_incremental_sync, 2722 ) 2723 url_base = ( 2724 model.requester.url_base 2725 if hasattr(model.requester, "url_base") 2726 else requester.get_url_base() 2727 ) 2728 2729 # Define cursor only if per partition or common incremental support is needed 2730 cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None 2731 2732 if ( 2733 not isinstance(stream_slicer, DatetimeBasedCursor) 2734 or type(stream_slicer) is not DatetimeBasedCursor 2735 ): 2736 # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods). 2737 # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement 2738 # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's 2739 # request_options_provider 2740 request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={}) 2741 elif not request_options_provider: 2742 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 2743 2744 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 2745 2746 cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None 2747 paginator = ( 2748 self._create_component_from_model( 2749 model=model.paginator, 2750 config=config, 2751 url_base=url_base, 2752 decoder=decoder, 2753 cursor_used_for_stop_condition=cursor_used_for_stop_condition, 2754 ) 2755 if model.paginator 2756 else NoPagination(parameters={}) 2757 ) 2758 2759 ignore_stream_slicer_parameters_on_paginated_requests = ( 2760 model.ignore_stream_slicer_parameters_on_paginated_requests or False 2761 ) 2762 2763 if ( 2764 model.partition_router 2765 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 2766 and not bool(self._connector_state_manager.get_stream_state(name, None)) 2767 and any( 2768 parent_stream_config.lazy_read_pointer 2769 for parent_stream_config in model.partition_router.parent_stream_configs 2770 ) 2771 ): 2772 if incremental_sync: 2773 if incremental_sync.type != "DatetimeBasedCursor": 2774 raise ValueError( 2775 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 2776 ) 2777 2778 elif incremental_sync.step or incremental_sync.cursor_granularity: 2779 raise ValueError( 2780 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 2781 ) 2782 2783 if model.decoder and model.decoder.type != "JsonDecoder": 2784 raise ValueError( 2785 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 2786 ) 2787 2788 return LazySimpleRetriever( 2789 name=name, 2790 paginator=paginator, 2791 primary_key=primary_key, 2792 requester=requester, 2793 record_selector=record_selector, 2794 stream_slicer=stream_slicer, 2795 request_option_provider=request_options_provider, 2796 cursor=cursor, 2797 config=config, 2798 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 2799 parameters=model.parameters or {}, 2800 ) 2801 2802 if self._limit_slices_fetched or self._emit_connector_builder_messages: 2803 return SimpleRetrieverTestReadDecorator( 2804 name=name, 2805 paginator=paginator, 2806 primary_key=primary_key, 2807 requester=requester, 2808 record_selector=record_selector, 2809 stream_slicer=stream_slicer, 2810 request_option_provider=request_options_provider, 2811 cursor=cursor, 2812 config=config, 2813 maximum_number_of_slices=self._limit_slices_fetched or 5, 2814 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 2815 parameters=model.parameters or {}, 2816 ) 2817 return SimpleRetriever( 2818 name=name, 2819 paginator=paginator, 2820 primary_key=primary_key, 2821 requester=requester, 2822 record_selector=record_selector, 2823 stream_slicer=stream_slicer, 2824 request_option_provider=request_options_provider, 2825 cursor=cursor, 2826 config=config, 2827 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 2828 parameters=model.parameters or {}, 2829 ) 2830 2831 def create_state_delegating_stream( 2832 self, 2833 model: StateDelegatingStreamModel, 2834 config: Config, 2835 has_parent_state: Optional[bool] = None, 2836 **kwargs: Any, 2837 ) -> DeclarativeStream: 2838 if ( 2839 model.full_refresh_stream.name != model.name 2840 or model.name != model.incremental_stream.name 2841 ): 2842 raise ValueError( 2843 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 2844 ) 2845 2846 stream_model = ( 2847 model.incremental_stream 2848 if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state 2849 else model.full_refresh_stream 2850 ) 2851 2852 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description 2853 2854 def _create_async_job_status_mapping( 2855 self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any 2856 ) -> Mapping[str, AsyncJobStatus]: 2857 api_status_to_cdk_status = {} 2858 for cdk_status, api_statuses in model.dict().items(): 2859 if cdk_status == "type": 2860 # This is an element of the dict because of the typing of the CDK but it is not a CDK status 2861 continue 2862 2863 for status in api_statuses: 2864 if status in api_status_to_cdk_status: 2865 raise ValueError( 2866 f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once" 2867 ) 2868 api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status) 2869 return api_status_to_cdk_status 2870 2871 def _get_async_job_status(self, status: str) -> AsyncJobStatus: 2872 match status: 2873 case "running": 2874 return AsyncJobStatus.RUNNING 2875 case "completed": 2876 return AsyncJobStatus.COMPLETED 2877 case "failed": 2878 return AsyncJobStatus.FAILED 2879 case "timeout": 2880 return AsyncJobStatus.TIMED_OUT 2881 case _: 2882 raise ValueError(f"Unsupported CDK status {status}") 2883 2884 def create_async_retriever( 2885 self, 2886 model: AsyncRetrieverModel, 2887 config: Config, 2888 *, 2889 name: str, 2890 primary_key: Optional[ 2891 Union[str, List[str], List[List[str]]] 2892 ], # this seems to be needed to match create_simple_retriever 2893 stream_slicer: Optional[StreamSlicer], 2894 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 2895 transformations: List[RecordTransformation], 2896 **kwargs: Any, 2897 ) -> AsyncRetriever: 2898 def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever: 2899 record_selector = RecordSelector( 2900 extractor=download_extractor, 2901 name=name, 2902 record_filter=None, 2903 transformations=transformations, 2904 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 2905 config=config, 2906 parameters={}, 2907 ) 2908 paginator = ( 2909 self._create_component_from_model( 2910 model=model.download_paginator, 2911 decoder=decoder, 2912 config=config, 2913 url_base="", 2914 ) 2915 if model.download_paginator 2916 else NoPagination(parameters={}) 2917 ) 2918 maximum_number_of_slices = self._limit_slices_fetched or 5 2919 2920 if self._limit_slices_fetched or self._emit_connector_builder_messages: 2921 return SimpleRetrieverTestReadDecorator( 2922 requester=download_requester, 2923 record_selector=record_selector, 2924 primary_key=None, 2925 name=job_download_components_name, 2926 paginator=paginator, 2927 config=config, 2928 parameters={}, 2929 maximum_number_of_slices=maximum_number_of_slices, 2930 ) 2931 2932 return SimpleRetriever( 2933 requester=download_requester, 2934 record_selector=record_selector, 2935 primary_key=None, 2936 name=job_download_components_name, 2937 paginator=paginator, 2938 config=config, 2939 parameters={}, 2940 ) 2941 2942 def _get_job_timeout() -> datetime.timedelta: 2943 user_defined_timeout: Optional[int] = ( 2944 int( 2945 InterpolatedString.create( 2946 str(model.polling_job_timeout), 2947 parameters={}, 2948 ).eval(config) 2949 ) 2950 if model.polling_job_timeout 2951 else None 2952 ) 2953 2954 # check for user defined timeout during the test read or 15 minutes 2955 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 2956 # default value for non-connector builder is 60 minutes. 2957 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 2958 2959 return ( 2960 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 2961 ) 2962 2963 decoder = ( 2964 self._create_component_from_model(model=model.decoder, config=config) 2965 if model.decoder 2966 else JsonDecoder(parameters={}) 2967 ) 2968 record_selector = self._create_component_from_model( 2969 model=model.record_selector, 2970 config=config, 2971 decoder=decoder, 2972 name=name, 2973 transformations=transformations, 2974 client_side_incremental_sync=client_side_incremental_sync, 2975 ) 2976 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 2977 creation_requester = self._create_component_from_model( 2978 model=model.creation_requester, 2979 decoder=decoder, 2980 config=config, 2981 name=f"job creation - {name}", 2982 ) 2983 polling_requester = self._create_component_from_model( 2984 model=model.polling_requester, 2985 decoder=decoder, 2986 config=config, 2987 name=f"job polling - {name}", 2988 ) 2989 job_download_components_name = f"job download - {name}" 2990 download_decoder = ( 2991 self._create_component_from_model(model=model.download_decoder, config=config) 2992 if model.download_decoder 2993 else JsonDecoder(parameters={}) 2994 ) 2995 download_extractor = ( 2996 self._create_component_from_model( 2997 model=model.download_extractor, 2998 config=config, 2999 decoder=download_decoder, 3000 parameters=model.parameters, 3001 ) 3002 if model.download_extractor 3003 else DpathExtractor( 3004 [], 3005 config=config, 3006 decoder=download_decoder, 3007 parameters=model.parameters or {}, 3008 ) 3009 ) 3010 download_requester = self._create_component_from_model( 3011 model=model.download_requester, 3012 decoder=download_decoder, 3013 config=config, 3014 name=job_download_components_name, 3015 ) 3016 download_retriever = _get_download_retriever() 3017 abort_requester = ( 3018 self._create_component_from_model( 3019 model=model.abort_requester, 3020 decoder=decoder, 3021 config=config, 3022 name=f"job abort - {name}", 3023 ) 3024 if model.abort_requester 3025 else None 3026 ) 3027 delete_requester = ( 3028 self._create_component_from_model( 3029 model=model.delete_requester, 3030 decoder=decoder, 3031 config=config, 3032 name=f"job delete - {name}", 3033 ) 3034 if model.delete_requester 3035 else None 3036 ) 3037 download_target_requester = ( 3038 self._create_component_from_model( 3039 model=model.download_target_requester, 3040 decoder=decoder, 3041 config=config, 3042 name=f"job extract_url - {name}", 3043 ) 3044 if model.download_target_requester 3045 else None 3046 ) 3047 status_extractor = self._create_component_from_model( 3048 model=model.status_extractor, decoder=decoder, config=config, name=name 3049 ) 3050 download_target_extractor = self._create_component_from_model( 3051 model=model.download_target_extractor, 3052 decoder=decoder, 3053 config=config, 3054 name=name, 3055 ) 3056 3057 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3058 creation_requester=creation_requester, 3059 polling_requester=polling_requester, 3060 download_retriever=download_retriever, 3061 download_target_requester=download_target_requester, 3062 abort_requester=abort_requester, 3063 delete_requester=delete_requester, 3064 status_extractor=status_extractor, 3065 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3066 download_target_extractor=download_target_extractor, 3067 job_timeout=_get_job_timeout(), 3068 ) 3069 3070 async_job_partition_router = AsyncJobPartitionRouter( 3071 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3072 job_repository, 3073 stream_slices, 3074 self._job_tracker, 3075 self._message_repository, 3076 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3077 has_bulk_parent=False, 3078 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3079 # `None` == default retry is set to 3 attempts, under the hood. 3080 job_max_retry=1 if self._emit_connector_builder_messages else None, 3081 ), 3082 stream_slicer=stream_slicer, 3083 config=config, 3084 parameters=model.parameters or {}, 3085 ) 3086 3087 return AsyncRetriever( 3088 record_selector=record_selector, 3089 stream_slicer=async_job_partition_router, 3090 config=config, 3091 parameters=model.parameters or {}, 3092 ) 3093 3094 @staticmethod 3095 def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3096 return Spec( 3097 connection_specification=model.connection_specification, 3098 documentation_url=model.documentation_url, 3099 advanced_auth=model.advanced_auth, 3100 parameters={}, 3101 ) 3102 3103 def create_substream_partition_router( 3104 self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any 3105 ) -> SubstreamPartitionRouter: 3106 parent_stream_configs = [] 3107 if model.parent_stream_configs: 3108 parent_stream_configs.extend( 3109 [ 3110 self._create_message_repository_substream_wrapper( 3111 model=parent_stream_config, config=config, **kwargs 3112 ) 3113 for parent_stream_config in model.parent_stream_configs 3114 ] 3115 ) 3116 3117 return SubstreamPartitionRouter( 3118 parent_stream_configs=parent_stream_configs, 3119 parameters=model.parameters or {}, 3120 config=config, 3121 ) 3122 3123 def _create_message_repository_substream_wrapper( 3124 self, model: ParentStreamConfigModel, config: Config, **kwargs: Any 3125 ) -> Any: 3126 substream_factory = ModelToComponentFactory( 3127 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3128 limit_slices_fetched=self._limit_slices_fetched, 3129 emit_connector_builder_messages=self._emit_connector_builder_messages, 3130 disable_retries=self._disable_retries, 3131 disable_cache=self._disable_cache, 3132 message_repository=LogAppenderMessageRepositoryDecorator( 3133 {"airbyte_cdk": {"stream": {"is_substream": True}}, "http": {"is_auxiliary": True}}, 3134 self._message_repository, 3135 self._evaluate_log_level(self._emit_connector_builder_messages), 3136 ), 3137 ) 3138 3139 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3140 has_parent_state = bool( 3141 self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None) 3142 if model.incremental_dependency 3143 else False 3144 ) 3145 return substream_factory._create_component_from_model( 3146 model=model, config=config, has_parent_state=has_parent_state, **kwargs 3147 ) 3148 3149 @staticmethod 3150 def create_wait_time_from_header( 3151 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 3152 ) -> WaitTimeFromHeaderBackoffStrategy: 3153 return WaitTimeFromHeaderBackoffStrategy( 3154 header=model.header, 3155 parameters=model.parameters or {}, 3156 config=config, 3157 regex=model.regex, 3158 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 3159 if model.max_waiting_time_in_seconds is not None 3160 else None, 3161 ) 3162 3163 @staticmethod 3164 def create_wait_until_time_from_header( 3165 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 3166 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 3167 return WaitUntilTimeFromHeaderBackoffStrategy( 3168 header=model.header, 3169 parameters=model.parameters or {}, 3170 config=config, 3171 min_wait=model.min_wait, 3172 regex=model.regex, 3173 ) 3174 3175 def get_message_repository(self) -> MessageRepository: 3176 return self._message_repository 3177 3178 def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: 3179 return Level.DEBUG if emit_connector_builder_messages else Level.INFO 3180 3181 @staticmethod 3182 def create_components_mapping_definition( 3183 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 3184 ) -> ComponentMappingDefinition: 3185 interpolated_value = InterpolatedString.create( 3186 model.value, parameters=model.parameters or {} 3187 ) 3188 field_path = [ 3189 InterpolatedString.create(path, parameters=model.parameters or {}) 3190 for path in model.field_path 3191 ] 3192 return ComponentMappingDefinition( 3193 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 3194 value=interpolated_value, 3195 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 3196 parameters=model.parameters or {}, 3197 ) 3198 3199 def create_http_components_resolver( 3200 self, model: HttpComponentsResolverModel, config: Config 3201 ) -> Any: 3202 stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config) 3203 combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer) 3204 3205 retriever = self._create_component_from_model( 3206 model=model.retriever, 3207 config=config, 3208 name="", 3209 primary_key=None, 3210 stream_slicer=stream_slicer if stream_slicer else combined_slicers, 3211 transformations=[], 3212 ) 3213 3214 components_mapping = [ 3215 self._create_component_from_model( 3216 model=components_mapping_definition_model, 3217 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3218 components_mapping_definition_model.value_type 3219 ), 3220 config=config, 3221 ) 3222 for components_mapping_definition_model in model.components_mapping 3223 ] 3224 3225 return HttpComponentsResolver( 3226 retriever=retriever, 3227 config=config, 3228 components_mapping=components_mapping, 3229 parameters=model.parameters or {}, 3230 ) 3231 3232 @staticmethod 3233 def create_stream_config( 3234 model: StreamConfigModel, config: Config, **kwargs: Any 3235 ) -> StreamConfig: 3236 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 3237 [x for x in model.configs_pointer] if model.configs_pointer else [] 3238 ) 3239 3240 return StreamConfig( 3241 configs_pointer=model_configs_pointer, 3242 parameters=model.parameters or {}, 3243 ) 3244 3245 def create_config_components_resolver( 3246 self, model: ConfigComponentsResolverModel, config: Config 3247 ) -> Any: 3248 stream_config = self._create_component_from_model( 3249 model.stream_config, config=config, parameters=model.parameters or {} 3250 ) 3251 3252 components_mapping = [ 3253 self._create_component_from_model( 3254 model=components_mapping_definition_model, 3255 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3256 components_mapping_definition_model.value_type 3257 ), 3258 config=config, 3259 ) 3260 for components_mapping_definition_model in model.components_mapping 3261 ] 3262 3263 return ConfigComponentsResolver( 3264 stream_config=stream_config, 3265 config=config, 3266 components_mapping=components_mapping, 3267 parameters=model.parameters or {}, 3268 ) 3269 3270 _UNSUPPORTED_DECODER_ERROR = ( 3271 "Specified decoder of {decoder_type} is not supported for pagination." 3272 "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead." 3273 "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`." 3274 ) 3275 3276 def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool: 3277 if isinstance(decoder, (JsonDecoder, XmlDecoder)): 3278 return True 3279 elif isinstance(decoder, CompositeRawDecoder): 3280 return self._is_supported_parser_for_pagination(decoder.parser) 3281 else: 3282 return False 3283 3284 def _is_supported_parser_for_pagination(self, parser: Parser) -> bool: 3285 if isinstance(parser, JsonParser): 3286 return True 3287 elif isinstance(parser, GzipParser): 3288 return isinstance(parser.inner_parser, JsonParser) 3289 else: 3290 return False 3291 3292 def create_http_api_budget( 3293 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 3294 ) -> HttpAPIBudget: 3295 policies = [ 3296 self._create_component_from_model(model=policy, config=config) 3297 for policy in model.policies 3298 ] 3299 3300 return HttpAPIBudget( 3301 policies=policies, 3302 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 3303 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 3304 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 3305 ) 3306 3307 def create_fixed_window_call_rate_policy( 3308 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 3309 ) -> FixedWindowCallRatePolicy: 3310 matchers = [ 3311 self._create_component_from_model(model=matcher, config=config) 3312 for matcher in model.matchers 3313 ] 3314 3315 # Set the initial reset timestamp to 10 days from now. 3316 # This value will be updated by the first request. 3317 return FixedWindowCallRatePolicy( 3318 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 3319 period=parse_duration(model.period), 3320 call_limit=model.call_limit, 3321 matchers=matchers, 3322 ) 3323 3324 def create_moving_window_call_rate_policy( 3325 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 3326 ) -> MovingWindowCallRatePolicy: 3327 rates = [ 3328 self._create_component_from_model(model=rate, config=config) for rate in model.rates 3329 ] 3330 matchers = [ 3331 self._create_component_from_model(model=matcher, config=config) 3332 for matcher in model.matchers 3333 ] 3334 return MovingWindowCallRatePolicy( 3335 rates=rates, 3336 matchers=matchers, 3337 ) 3338 3339 def create_unlimited_call_rate_policy( 3340 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 3341 ) -> UnlimitedCallRatePolicy: 3342 matchers = [ 3343 self._create_component_from_model(model=matcher, config=config) 3344 for matcher in model.matchers 3345 ] 3346 3347 return UnlimitedCallRatePolicy( 3348 matchers=matchers, 3349 ) 3350 3351 def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate: 3352 interpolated_limit = InterpolatedString.create(str(model.limit), parameters={}) 3353 return Rate( 3354 limit=int(interpolated_limit.eval(config=config)), 3355 interval=parse_duration(model.interval), 3356 ) 3357 3358 def create_http_request_matcher( 3359 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 3360 ) -> HttpRequestRegexMatcher: 3361 return HttpRequestRegexMatcher( 3362 method=model.method, 3363 url_base=model.url_base, 3364 url_path_pattern=model.url_path_pattern, 3365 params=model.params, 3366 headers=model.headers, 3367 ) 3368 3369 def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None: 3370 self._api_budget = self.create_component( 3371 model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config 3372 )
ComponentDefinition =
typing.Mapping[str, typing.Any]
SCHEMA_TRANSFORMER_TYPE_MAPPING =
{<SchemaNormalization.None_: 'None'>: <TransformConfig.NoTransform: 1>, <SchemaNormalization.Default: 'Default'>: <TransformConfig.DefaultSchemaNormalization: 2>}
class
ModelToComponentFactory:
522class ModelToComponentFactory: 523 EPOCH_DATETIME_FORMAT = "%s" 524 525 def __init__( 526 self, 527 limit_pages_fetched_per_slice: Optional[int] = None, 528 limit_slices_fetched: Optional[int] = None, 529 emit_connector_builder_messages: bool = False, 530 disable_retries: bool = False, 531 disable_cache: bool = False, 532 disable_resumable_full_refresh: bool = False, 533 message_repository: Optional[MessageRepository] = None, 534 connector_state_manager: Optional[ConnectorStateManager] = None, 535 max_concurrent_async_job_count: Optional[int] = None, 536 ): 537 self._init_mappings() 538 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 539 self._limit_slices_fetched = limit_slices_fetched 540 self._emit_connector_builder_messages = emit_connector_builder_messages 541 self._disable_retries = disable_retries 542 self._disable_cache = disable_cache 543 self._disable_resumable_full_refresh = disable_resumable_full_refresh 544 self._message_repository = message_repository or InMemoryMessageRepository( 545 self._evaluate_log_level(emit_connector_builder_messages) 546 ) 547 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 548 self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None 549 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 550 551 def _init_mappings(self) -> None: 552 self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = { 553 AddedFieldDefinitionModel: self.create_added_field_definition, 554 AddFieldsModel: self.create_add_fields, 555 ApiKeyAuthenticatorModel: self.create_api_key_authenticator, 556 BasicHttpAuthenticatorModel: self.create_basic_http_authenticator, 557 BearerAuthenticatorModel: self.create_bearer_authenticator, 558 CheckStreamModel: self.create_check_stream, 559 CheckDynamicStreamModel: self.create_check_dynamic_stream, 560 CompositeErrorHandlerModel: self.create_composite_error_handler, 561 ConcurrencyLevelModel: self.create_concurrency_level, 562 ConstantBackoffStrategyModel: self.create_constant_backoff_strategy, 563 CsvDecoderModel: self.create_csv_decoder, 564 CursorPaginationModel: self.create_cursor_pagination, 565 CustomAuthenticatorModel: self.create_custom_component, 566 CustomBackoffStrategyModel: self.create_custom_component, 567 CustomDecoderModel: self.create_custom_component, 568 CustomErrorHandlerModel: self.create_custom_component, 569 CustomIncrementalSyncModel: self.create_custom_component, 570 CustomRecordExtractorModel: self.create_custom_component, 571 CustomRecordFilterModel: self.create_custom_component, 572 CustomRequesterModel: self.create_custom_component, 573 CustomRetrieverModel: self.create_custom_component, 574 CustomSchemaLoader: self.create_custom_component, 575 CustomSchemaNormalizationModel: self.create_custom_component, 576 CustomStateMigration: self.create_custom_component, 577 CustomPaginationStrategyModel: self.create_custom_component, 578 CustomPartitionRouterModel: self.create_custom_component, 579 CustomTransformationModel: self.create_custom_component, 580 DatetimeBasedCursorModel: self.create_datetime_based_cursor, 581 DeclarativeStreamModel: self.create_declarative_stream, 582 DefaultErrorHandlerModel: self.create_default_error_handler, 583 DefaultPaginatorModel: self.create_default_paginator, 584 DpathExtractorModel: self.create_dpath_extractor, 585 ResponseToFileExtractorModel: self.create_response_to_file_extractor, 586 ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy, 587 SessionTokenAuthenticatorModel: self.create_session_token_authenticator, 588 HttpRequesterModel: self.create_http_requester, 589 HttpResponseFilterModel: self.create_http_response_filter, 590 InlineSchemaLoaderModel: self.create_inline_schema_loader, 591 JsonDecoderModel: self.create_json_decoder, 592 JsonlDecoderModel: self.create_jsonl_decoder, 593 GzipDecoderModel: self.create_gzip_decoder, 594 KeysToLowerModel: self.create_keys_to_lower_transformation, 595 KeysToSnakeCaseModel: self.create_keys_to_snake_transformation, 596 KeysReplaceModel: self.create_keys_replace_transformation, 597 FlattenFieldsModel: self.create_flatten_fields, 598 DpathFlattenFieldsModel: self.create_dpath_flatten_fields, 599 IterableDecoderModel: self.create_iterable_decoder, 600 IncrementingCountCursorModel: self.create_incrementing_count_cursor, 601 XmlDecoderModel: self.create_xml_decoder, 602 JsonFileSchemaLoaderModel: self.create_json_file_schema_loader, 603 DynamicSchemaLoaderModel: self.create_dynamic_schema_loader, 604 SchemaTypeIdentifierModel: self.create_schema_type_identifier, 605 TypesMapModel: self.create_types_map, 606 ComplexFieldTypeModel: self.create_complex_field_type, 607 JwtAuthenticatorModel: self.create_jwt_authenticator, 608 LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration, 609 ListPartitionRouterModel: self.create_list_partition_router, 610 MinMaxDatetimeModel: self.create_min_max_datetime, 611 NoAuthModel: self.create_no_auth, 612 NoPaginationModel: self.create_no_pagination, 613 OAuthAuthenticatorModel: self.create_oauth_authenticator, 614 OffsetIncrementModel: self.create_offset_increment, 615 PageIncrementModel: self.create_page_increment, 616 ParentStreamConfigModel: self.create_parent_stream_config, 617 RecordFilterModel: self.create_record_filter, 618 RecordSelectorModel: self.create_record_selector, 619 RemoveFieldsModel: self.create_remove_fields, 620 RequestPathModel: self.create_request_path, 621 RequestOptionModel: self.create_request_option, 622 LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator, 623 SelectiveAuthenticatorModel: self.create_selective_authenticator, 624 SimpleRetrieverModel: self.create_simple_retriever, 625 StateDelegatingStreamModel: self.create_state_delegating_stream, 626 SpecModel: self.create_spec, 627 SubstreamPartitionRouterModel: self.create_substream_partition_router, 628 WaitTimeFromHeaderModel: self.create_wait_time_from_header, 629 WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header, 630 AsyncRetrieverModel: self.create_async_retriever, 631 HttpComponentsResolverModel: self.create_http_components_resolver, 632 ConfigComponentsResolverModel: self.create_config_components_resolver, 633 StreamConfigModel: self.create_stream_config, 634 ComponentMappingDefinitionModel: self.create_components_mapping_definition, 635 ZipfileDecoderModel: self.create_zipfile_decoder, 636 HTTPAPIBudgetModel: self.create_http_api_budget, 637 FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy, 638 MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy, 639 UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy, 640 RateModel: self.create_rate, 641 HttpRequestRegexMatcherModel: self.create_http_request_matcher, 642 } 643 644 # Needed for the case where we need to perform a second parse on the fields of a custom component 645 self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR} 646 647 def create_component( 648 self, 649 model_type: Type[BaseModel], 650 component_definition: ComponentDefinition, 651 config: Config, 652 **kwargs: Any, 653 ) -> Any: 654 """ 655 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 656 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 657 creating declarative components from that model. 658 659 :param model_type: The type of declarative component that is being initialized 660 :param component_definition: The mapping that represents a declarative component 661 :param config: The connector config that is provided by the customer 662 :return: The declarative component to be used at runtime 663 """ 664 665 component_type = component_definition.get("type") 666 if component_definition.get("type") != model_type.__name__: 667 raise ValueError( 668 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 669 ) 670 671 declarative_component_model = model_type.parse_obj(component_definition) 672 673 if not isinstance(declarative_component_model, model_type): 674 raise ValueError( 675 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 676 ) 677 678 return self._create_component_from_model( 679 model=declarative_component_model, config=config, **kwargs 680 ) 681 682 def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: 683 if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: 684 raise ValueError( 685 f"{model.__class__} with attributes {model} is not a valid component type" 686 ) 687 component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) 688 if not component_constructor: 689 raise ValueError(f"Could not find constructor for {model.__class__}") 690 return component_constructor(model=model, config=config, **kwargs) 691 692 @staticmethod 693 def create_added_field_definition( 694 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 695 ) -> AddedFieldDefinition: 696 interpolated_value = InterpolatedString.create( 697 model.value, parameters=model.parameters or {} 698 ) 699 return AddedFieldDefinition( 700 path=model.path, 701 value=interpolated_value, 702 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 703 parameters=model.parameters or {}, 704 ) 705 706 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 707 added_field_definitions = [ 708 self._create_component_from_model( 709 model=added_field_definition_model, 710 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 711 added_field_definition_model.value_type 712 ), 713 config=config, 714 ) 715 for added_field_definition_model in model.fields 716 ] 717 return AddFields( 718 fields=added_field_definitions, 719 condition=model.condition or "", 720 parameters=model.parameters or {}, 721 ) 722 723 def create_keys_to_lower_transformation( 724 self, model: KeysToLowerModel, config: Config, **kwargs: Any 725 ) -> KeysToLowerTransformation: 726 return KeysToLowerTransformation() 727 728 def create_keys_to_snake_transformation( 729 self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any 730 ) -> KeysToSnakeCaseTransformation: 731 return KeysToSnakeCaseTransformation() 732 733 def create_keys_replace_transformation( 734 self, model: KeysReplaceModel, config: Config, **kwargs: Any 735 ) -> KeysReplaceTransformation: 736 return KeysReplaceTransformation( 737 old=model.old, new=model.new, parameters=model.parameters or {} 738 ) 739 740 def create_flatten_fields( 741 self, model: FlattenFieldsModel, config: Config, **kwargs: Any 742 ) -> FlattenFields: 743 return FlattenFields( 744 flatten_lists=model.flatten_lists if model.flatten_lists is not None else True 745 ) 746 747 def create_dpath_flatten_fields( 748 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 749 ) -> DpathFlattenFields: 750 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 751 return DpathFlattenFields( 752 config=config, 753 field_path=model_field_path, 754 delete_origin_value=model.delete_origin_value 755 if model.delete_origin_value is not None 756 else False, 757 replace_record=model.replace_record if model.replace_record is not None else False, 758 parameters=model.parameters or {}, 759 ) 760 761 @staticmethod 762 def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: 763 if not value_type: 764 return None 765 names_to_types = { 766 ValueType.string: str, 767 ValueType.number: float, 768 ValueType.integer: int, 769 ValueType.boolean: bool, 770 } 771 return names_to_types[value_type] 772 773 def create_api_key_authenticator( 774 self, 775 model: ApiKeyAuthenticatorModel, 776 config: Config, 777 token_provider: Optional[TokenProvider] = None, 778 **kwargs: Any, 779 ) -> ApiKeyAuthenticator: 780 if model.inject_into is None and model.header is None: 781 raise ValueError( 782 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 783 ) 784 785 if model.inject_into is not None and model.header is not None: 786 raise ValueError( 787 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 788 ) 789 790 if token_provider is not None and model.api_token != "": 791 raise ValueError( 792 "If token_provider is set, api_token is ignored and has to be set to empty string." 793 ) 794 795 request_option = ( 796 self._create_component_from_model( 797 model.inject_into, config, parameters=model.parameters or {} 798 ) 799 if model.inject_into 800 else RequestOption( 801 inject_into=RequestOptionType.header, 802 field_name=model.header or "", 803 parameters=model.parameters or {}, 804 ) 805 ) 806 807 return ApiKeyAuthenticator( 808 token_provider=( 809 token_provider 810 if token_provider is not None 811 else InterpolatedStringTokenProvider( 812 api_token=model.api_token or "", 813 config=config, 814 parameters=model.parameters or {}, 815 ) 816 ), 817 request_option=request_option, 818 config=config, 819 parameters=model.parameters or {}, 820 ) 821 822 def create_legacy_to_per_partition_state_migration( 823 self, 824 model: LegacyToPerPartitionStateMigrationModel, 825 config: Mapping[str, Any], 826 declarative_stream: DeclarativeStreamModel, 827 ) -> LegacyToPerPartitionStateMigration: 828 retriever = declarative_stream.retriever 829 if not isinstance(retriever, SimpleRetrieverModel): 830 raise ValueError( 831 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}" 832 ) 833 partition_router = retriever.partition_router 834 if not isinstance( 835 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 836 ): 837 raise ValueError( 838 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 839 ) 840 if not hasattr(partition_router, "parent_stream_configs"): 841 raise ValueError( 842 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 843 ) 844 845 if not hasattr(declarative_stream, "incremental_sync"): 846 raise ValueError( 847 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 848 ) 849 850 return LegacyToPerPartitionStateMigration( 851 partition_router, # type: ignore # was already checked above 852 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 853 config, 854 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 855 ) 856 857 def create_session_token_authenticator( 858 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 859 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 860 decoder = ( 861 self._create_component_from_model(model=model.decoder, config=config) 862 if model.decoder 863 else JsonDecoder(parameters={}) 864 ) 865 login_requester = self._create_component_from_model( 866 model=model.login_requester, 867 config=config, 868 name=f"{name}_login_requester", 869 decoder=decoder, 870 ) 871 token_provider = SessionTokenProvider( 872 login_requester=login_requester, 873 session_token_path=model.session_token_path, 874 expiration_duration=parse_duration(model.expiration_duration) 875 if model.expiration_duration 876 else None, 877 parameters=model.parameters or {}, 878 message_repository=self._message_repository, 879 decoder=decoder, 880 ) 881 if model.request_authentication.type == "Bearer": 882 return ModelToComponentFactory.create_bearer_authenticator( 883 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 884 config, 885 token_provider=token_provider, 886 ) 887 else: 888 return self.create_api_key_authenticator( 889 ApiKeyAuthenticatorModel( 890 type="ApiKeyAuthenticator", 891 api_token="", 892 inject_into=model.request_authentication.inject_into, 893 ), # type: ignore # $parameters and headers default to None 894 config=config, 895 token_provider=token_provider, 896 ) 897 898 @staticmethod 899 def create_basic_http_authenticator( 900 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 901 ) -> BasicHttpAuthenticator: 902 return BasicHttpAuthenticator( 903 password=model.password or "", 904 username=model.username, 905 config=config, 906 parameters=model.parameters or {}, 907 ) 908 909 @staticmethod 910 def create_bearer_authenticator( 911 model: BearerAuthenticatorModel, 912 config: Config, 913 token_provider: Optional[TokenProvider] = None, 914 **kwargs: Any, 915 ) -> BearerAuthenticator: 916 if token_provider is not None and model.api_token != "": 917 raise ValueError( 918 "If token_provider is set, api_token is ignored and has to be set to empty string." 919 ) 920 return BearerAuthenticator( 921 token_provider=( 922 token_provider 923 if token_provider is not None 924 else InterpolatedStringTokenProvider( 925 api_token=model.api_token or "", 926 config=config, 927 parameters=model.parameters or {}, 928 ) 929 ), 930 config=config, 931 parameters=model.parameters or {}, 932 ) 933 934 @staticmethod 935 def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream: 936 return CheckStream(stream_names=model.stream_names, parameters={}) 937 938 @staticmethod 939 def create_check_dynamic_stream( 940 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 941 ) -> CheckDynamicStream: 942 assert model.use_check_availability is not None # for mypy 943 944 use_check_availability = model.use_check_availability 945 946 return CheckDynamicStream( 947 stream_count=model.stream_count, 948 use_check_availability=use_check_availability, 949 parameters={}, 950 ) 951 952 def create_composite_error_handler( 953 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 954 ) -> CompositeErrorHandler: 955 error_handlers = [ 956 self._create_component_from_model(model=error_handler_model, config=config) 957 for error_handler_model in model.error_handlers 958 ] 959 return CompositeErrorHandler( 960 error_handlers=error_handlers, parameters=model.parameters or {} 961 ) 962 963 @staticmethod 964 def create_concurrency_level( 965 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 966 ) -> ConcurrencyLevel: 967 return ConcurrencyLevel( 968 default_concurrency=model.default_concurrency, 969 max_concurrency=model.max_concurrency, 970 config=config, 971 parameters={}, 972 ) 973 974 @staticmethod 975 def apply_stream_state_migrations( 976 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 977 ) -> MutableMapping[str, Any]: 978 if stream_state_migrations: 979 for state_migration in stream_state_migrations: 980 if state_migration.should_migrate(stream_state): 981 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 982 stream_state = dict(state_migration.migrate(stream_state)) 983 return stream_state 984 985 def create_concurrent_cursor_from_datetime_based_cursor( 986 self, 987 model_type: Type[BaseModel], 988 component_definition: ComponentDefinition, 989 stream_name: str, 990 stream_namespace: Optional[str], 991 config: Config, 992 message_repository: Optional[MessageRepository] = None, 993 runtime_lookback_window: Optional[datetime.timedelta] = None, 994 stream_state_migrations: Optional[List[Any]] = None, 995 **kwargs: Any, 996 ) -> ConcurrentCursor: 997 # Per-partition incremental streams can dynamically create child cursors which will pass their current 998 # state via the stream_state keyword argument. Incremental syncs without parent streams use the 999 # incoming state and connector_state_manager that is initialized when the component factory is created 1000 stream_state = ( 1001 self._connector_state_manager.get_stream_state(stream_name, stream_namespace) 1002 if "stream_state" not in kwargs 1003 else kwargs["stream_state"] 1004 ) 1005 stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state) 1006 1007 component_type = component_definition.get("type") 1008 if component_definition.get("type") != model_type.__name__: 1009 raise ValueError( 1010 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1011 ) 1012 1013 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1014 1015 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1016 raise ValueError( 1017 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1018 ) 1019 1020 interpolated_cursor_field = InterpolatedString.create( 1021 datetime_based_cursor_model.cursor_field, 1022 parameters=datetime_based_cursor_model.parameters or {}, 1023 ) 1024 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1025 1026 interpolated_partition_field_start = InterpolatedString.create( 1027 datetime_based_cursor_model.partition_field_start or "start_time", 1028 parameters=datetime_based_cursor_model.parameters or {}, 1029 ) 1030 interpolated_partition_field_end = InterpolatedString.create( 1031 datetime_based_cursor_model.partition_field_end or "end_time", 1032 parameters=datetime_based_cursor_model.parameters or {}, 1033 ) 1034 1035 slice_boundary_fields = ( 1036 interpolated_partition_field_start.eval(config=config), 1037 interpolated_partition_field_end.eval(config=config), 1038 ) 1039 1040 datetime_format = datetime_based_cursor_model.datetime_format 1041 1042 cursor_granularity = ( 1043 parse_duration(datetime_based_cursor_model.cursor_granularity) 1044 if datetime_based_cursor_model.cursor_granularity 1045 else None 1046 ) 1047 1048 lookback_window = None 1049 interpolated_lookback_window = ( 1050 InterpolatedString.create( 1051 datetime_based_cursor_model.lookback_window, 1052 parameters=datetime_based_cursor_model.parameters or {}, 1053 ) 1054 if datetime_based_cursor_model.lookback_window 1055 else None 1056 ) 1057 if interpolated_lookback_window: 1058 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1059 if evaluated_lookback_window: 1060 lookback_window = parse_duration(evaluated_lookback_window) 1061 1062 connector_state_converter: DateTimeStreamStateConverter 1063 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1064 datetime_format=datetime_format, 1065 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1066 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1067 cursor_granularity=cursor_granularity, 1068 ) 1069 1070 # Adjusts the stream state by applying the runtime lookback window. 1071 # This is used to ensure correct state handling in case of failed partitions. 1072 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1073 if runtime_lookback_window and stream_state_value: 1074 new_stream_state = ( 1075 connector_state_converter.parse_timestamp(stream_state_value) 1076 - runtime_lookback_window 1077 ) 1078 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1079 new_stream_state 1080 ) 1081 1082 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1083 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1084 start_date_runtime_value = self.create_min_max_datetime( 1085 model=datetime_based_cursor_model.start_datetime, config=config 1086 ) 1087 else: 1088 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1089 1090 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1091 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1092 end_date_runtime_value = self.create_min_max_datetime( 1093 model=datetime_based_cursor_model.end_datetime, config=config 1094 ) 1095 else: 1096 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1097 1098 interpolated_start_date = MinMaxDatetime.create( 1099 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1100 parameters=datetime_based_cursor_model.parameters, 1101 ) 1102 interpolated_end_date = ( 1103 None 1104 if not end_date_runtime_value 1105 else MinMaxDatetime.create( 1106 end_date_runtime_value, datetime_based_cursor_model.parameters 1107 ) 1108 ) 1109 1110 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1111 if not interpolated_start_date.datetime_format: 1112 interpolated_start_date.datetime_format = datetime_format 1113 if interpolated_end_date and not interpolated_end_date.datetime_format: 1114 interpolated_end_date.datetime_format = datetime_format 1115 1116 start_date = interpolated_start_date.get_datetime(config=config) 1117 end_date_provider = ( 1118 partial(interpolated_end_date.get_datetime, config) 1119 if interpolated_end_date 1120 else connector_state_converter.get_end_provider() 1121 ) 1122 1123 if ( 1124 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1125 ) or ( 1126 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1127 ): 1128 raise ValueError( 1129 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1130 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1131 ) 1132 1133 # When step is not defined, default to a step size from the starting date to the present moment 1134 step_length = datetime.timedelta.max 1135 interpolated_step = ( 1136 InterpolatedString.create( 1137 datetime_based_cursor_model.step, 1138 parameters=datetime_based_cursor_model.parameters or {}, 1139 ) 1140 if datetime_based_cursor_model.step 1141 else None 1142 ) 1143 if interpolated_step: 1144 evaluated_step = interpolated_step.eval(config) 1145 if evaluated_step: 1146 step_length = parse_duration(evaluated_step) 1147 1148 clamping_strategy: ClampingStrategy = NoClamping() 1149 if datetime_based_cursor_model.clamping: 1150 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1151 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1152 # object which we want to keep agnostic of being low-code 1153 target = InterpolatedString( 1154 string=datetime_based_cursor_model.clamping.target, 1155 parameters=datetime_based_cursor_model.parameters or {}, 1156 ) 1157 evaluated_target = target.eval(config=config) 1158 match evaluated_target: 1159 case "DAY": 1160 clamping_strategy = DayClampingStrategy() 1161 end_date_provider = ClampingEndProvider( 1162 DayClampingStrategy(is_ceiling=False), 1163 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1164 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1165 ) 1166 case "WEEK": 1167 if ( 1168 not datetime_based_cursor_model.clamping.target_details 1169 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1170 ): 1171 raise ValueError( 1172 "Given WEEK clamping, weekday needs to be provided as target_details" 1173 ) 1174 weekday = self._assemble_weekday( 1175 datetime_based_cursor_model.clamping.target_details["weekday"] 1176 ) 1177 clamping_strategy = WeekClampingStrategy(weekday) 1178 end_date_provider = ClampingEndProvider( 1179 WeekClampingStrategy(weekday, is_ceiling=False), 1180 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1181 granularity=cursor_granularity or datetime.timedelta(days=1), 1182 ) 1183 case "MONTH": 1184 clamping_strategy = MonthClampingStrategy() 1185 end_date_provider = ClampingEndProvider( 1186 MonthClampingStrategy(is_ceiling=False), 1187 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1188 granularity=cursor_granularity or datetime.timedelta(days=1), 1189 ) 1190 case _: 1191 raise ValueError( 1192 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1193 ) 1194 1195 return ConcurrentCursor( 1196 stream_name=stream_name, 1197 stream_namespace=stream_namespace, 1198 stream_state=stream_state, 1199 message_repository=message_repository or self._message_repository, 1200 connector_state_manager=self._connector_state_manager, 1201 connector_state_converter=connector_state_converter, 1202 cursor_field=cursor_field, 1203 slice_boundary_fields=slice_boundary_fields, 1204 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1205 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1206 lookback_window=lookback_window, 1207 slice_range=step_length, 1208 cursor_granularity=cursor_granularity, 1209 clamping_strategy=clamping_strategy, 1210 ) 1211 1212 def create_concurrent_cursor_from_incrementing_count_cursor( 1213 self, 1214 model_type: Type[BaseModel], 1215 component_definition: ComponentDefinition, 1216 stream_name: str, 1217 stream_namespace: Optional[str], 1218 config: Config, 1219 message_repository: Optional[MessageRepository] = None, 1220 **kwargs: Any, 1221 ) -> ConcurrentCursor: 1222 # Per-partition incremental streams can dynamically create child cursors which will pass their current 1223 # state via the stream_state keyword argument. Incremental syncs without parent streams use the 1224 # incoming state and connector_state_manager that is initialized when the component factory is created 1225 stream_state = ( 1226 self._connector_state_manager.get_stream_state(stream_name, stream_namespace) 1227 if "stream_state" not in kwargs 1228 else kwargs["stream_state"] 1229 ) 1230 1231 component_type = component_definition.get("type") 1232 if component_definition.get("type") != model_type.__name__: 1233 raise ValueError( 1234 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1235 ) 1236 1237 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1238 1239 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1240 raise ValueError( 1241 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1242 ) 1243 1244 interpolated_start_value = ( 1245 InterpolatedString.create( 1246 incrementing_count_cursor_model.start_value, # type: ignore 1247 parameters=incrementing_count_cursor_model.parameters or {}, 1248 ) 1249 if incrementing_count_cursor_model.start_value 1250 else 0 1251 ) 1252 1253 interpolated_cursor_field = InterpolatedString.create( 1254 incrementing_count_cursor_model.cursor_field, 1255 parameters=incrementing_count_cursor_model.parameters or {}, 1256 ) 1257 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1258 1259 connector_state_converter = IncrementingCountStreamStateConverter( 1260 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1261 ) 1262 1263 return ConcurrentCursor( 1264 stream_name=stream_name, 1265 stream_namespace=stream_namespace, 1266 stream_state=stream_state, 1267 message_repository=message_repository or self._message_repository, 1268 connector_state_manager=self._connector_state_manager, 1269 connector_state_converter=connector_state_converter, 1270 cursor_field=cursor_field, 1271 slice_boundary_fields=None, 1272 start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1273 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1274 ) 1275 1276 def _assemble_weekday(self, weekday: str) -> Weekday: 1277 match weekday: 1278 case "MONDAY": 1279 return Weekday.MONDAY 1280 case "TUESDAY": 1281 return Weekday.TUESDAY 1282 case "WEDNESDAY": 1283 return Weekday.WEDNESDAY 1284 case "THURSDAY": 1285 return Weekday.THURSDAY 1286 case "FRIDAY": 1287 return Weekday.FRIDAY 1288 case "SATURDAY": 1289 return Weekday.SATURDAY 1290 case "SUNDAY": 1291 return Weekday.SUNDAY 1292 case _: 1293 raise ValueError(f"Unknown weekday {weekday}") 1294 1295 def create_concurrent_cursor_from_perpartition_cursor( 1296 self, 1297 state_manager: ConnectorStateManager, 1298 model_type: Type[BaseModel], 1299 component_definition: ComponentDefinition, 1300 stream_name: str, 1301 stream_namespace: Optional[str], 1302 config: Config, 1303 stream_state: MutableMapping[str, Any], 1304 partition_router: PartitionRouter, 1305 stream_state_migrations: Optional[List[Any]] = None, 1306 **kwargs: Any, 1307 ) -> ConcurrentPerPartitionCursor: 1308 component_type = component_definition.get("type") 1309 if component_definition.get("type") != model_type.__name__: 1310 raise ValueError( 1311 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1312 ) 1313 1314 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1315 1316 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1317 raise ValueError( 1318 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1319 ) 1320 1321 interpolated_cursor_field = InterpolatedString.create( 1322 datetime_based_cursor_model.cursor_field, 1323 parameters=datetime_based_cursor_model.parameters or {}, 1324 ) 1325 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1326 1327 datetime_format = datetime_based_cursor_model.datetime_format 1328 1329 cursor_granularity = ( 1330 parse_duration(datetime_based_cursor_model.cursor_granularity) 1331 if datetime_based_cursor_model.cursor_granularity 1332 else None 1333 ) 1334 1335 connector_state_converter: DateTimeStreamStateConverter 1336 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1337 datetime_format=datetime_format, 1338 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1339 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1340 cursor_granularity=cursor_granularity, 1341 ) 1342 1343 # Create the cursor factory 1344 cursor_factory = ConcurrentCursorFactory( 1345 partial( 1346 self.create_concurrent_cursor_from_datetime_based_cursor, 1347 state_manager=state_manager, 1348 model_type=model_type, 1349 component_definition=component_definition, 1350 stream_name=stream_name, 1351 stream_namespace=stream_namespace, 1352 config=config, 1353 message_repository=NoopMessageRepository(), 1354 stream_state_migrations=stream_state_migrations, 1355 ) 1356 ) 1357 stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state) 1358 1359 # Return the concurrent cursor and state converter 1360 return ConcurrentPerPartitionCursor( 1361 cursor_factory=cursor_factory, 1362 partition_router=partition_router, 1363 stream_name=stream_name, 1364 stream_namespace=stream_namespace, 1365 stream_state=stream_state, 1366 message_repository=self._message_repository, # type: ignore 1367 connector_state_manager=state_manager, 1368 connector_state_converter=connector_state_converter, 1369 cursor_field=cursor_field, 1370 ) 1371 1372 @staticmethod 1373 def create_constant_backoff_strategy( 1374 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1375 ) -> ConstantBackoffStrategy: 1376 return ConstantBackoffStrategy( 1377 backoff_time_in_seconds=model.backoff_time_in_seconds, 1378 config=config, 1379 parameters=model.parameters or {}, 1380 ) 1381 1382 def create_cursor_pagination( 1383 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1384 ) -> CursorPaginationStrategy: 1385 if isinstance(decoder, PaginationDecoderDecorator): 1386 inner_decoder = decoder.decoder 1387 else: 1388 inner_decoder = decoder 1389 decoder = PaginationDecoderDecorator(decoder=decoder) 1390 1391 if self._is_supported_decoder_for_pagination(inner_decoder): 1392 decoder_to_use = decoder 1393 else: 1394 raise ValueError( 1395 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1396 ) 1397 1398 return CursorPaginationStrategy( 1399 cursor_value=model.cursor_value, 1400 decoder=decoder_to_use, 1401 page_size=model.page_size, 1402 stop_condition=model.stop_condition, 1403 config=config, 1404 parameters=model.parameters or {}, 1405 ) 1406 1407 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1408 """ 1409 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1410 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1411 :param model: The Pydantic model of the custom component being created 1412 :param config: The custom defined connector config 1413 :return: The declarative component built from the Pydantic model to be used at runtime 1414 """ 1415 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1416 component_fields = get_type_hints(custom_component_class) 1417 model_args = model.dict() 1418 model_args["config"] = config 1419 1420 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1421 # we defer to these arguments over the component's definition 1422 for key, arg in kwargs.items(): 1423 model_args[key] = arg 1424 1425 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1426 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1427 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1428 for model_field, model_value in model_args.items(): 1429 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1430 if ( 1431 isinstance(model_value, dict) 1432 and "type" not in model_value 1433 and model_field in component_fields 1434 ): 1435 derived_type = self._derive_component_type_from_type_hints( 1436 component_fields.get(model_field) 1437 ) 1438 if derived_type: 1439 model_value["type"] = derived_type 1440 1441 if self._is_component(model_value): 1442 model_args[model_field] = self._create_nested_component( 1443 model, model_field, model_value, config 1444 ) 1445 elif isinstance(model_value, list): 1446 vals = [] 1447 for v in model_value: 1448 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1449 derived_type = self._derive_component_type_from_type_hints( 1450 component_fields.get(model_field) 1451 ) 1452 if derived_type: 1453 v["type"] = derived_type 1454 if self._is_component(v): 1455 vals.append(self._create_nested_component(model, model_field, v, config)) 1456 else: 1457 vals.append(v) 1458 model_args[model_field] = vals 1459 1460 kwargs = { 1461 class_field: model_args[class_field] 1462 for class_field in component_fields.keys() 1463 if class_field in model_args 1464 } 1465 return custom_component_class(**kwargs) 1466 1467 @staticmethod 1468 def _get_class_from_fully_qualified_class_name( 1469 full_qualified_class_name: str, 1470 ) -> Any: 1471 """Get a class from its fully qualified name. 1472 1473 If a custom components module is needed, we assume it is already registered - probably 1474 as `source_declarative_manifest.components` or `components`. 1475 1476 Args: 1477 full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). 1478 1479 Returns: 1480 Any: The class object. 1481 1482 Raises: 1483 ValueError: If the class cannot be loaded. 1484 """ 1485 split = full_qualified_class_name.split(".") 1486 module_name_full = ".".join(split[:-1]) 1487 class_name = split[-1] 1488 1489 try: 1490 module_ref = importlib.import_module(module_name_full) 1491 except ModuleNotFoundError as e: 1492 if split[0] == "source_declarative_manifest": 1493 # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append 1494 try: 1495 import os 1496 1497 module_name_with_source_declarative_manifest = ".".join(split[1:-1]) 1498 module_ref = importlib.import_module( 1499 module_name_with_source_declarative_manifest 1500 ) 1501 except ModuleNotFoundError: 1502 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1503 else: 1504 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1505 1506 try: 1507 return getattr(module_ref, class_name) 1508 except AttributeError as e: 1509 raise ValueError( 1510 f"Could not load class `{class_name}` from module `{module_name_full}`.", 1511 ) from e 1512 1513 @staticmethod 1514 def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: 1515 interface = field_type 1516 while True: 1517 origin = get_origin(interface) 1518 if origin: 1519 # Unnest types until we reach the raw type 1520 # List[T] -> T 1521 # Optional[List[T]] -> T 1522 args = get_args(interface) 1523 interface = args[0] 1524 else: 1525 break 1526 if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface): 1527 return interface.__name__ 1528 return None 1529 1530 @staticmethod 1531 def is_builtin_type(cls: Optional[Type[Any]]) -> bool: 1532 if not cls: 1533 return False 1534 return cls.__module__ == "builtins" 1535 1536 @staticmethod 1537 def _extract_missing_parameters(error: TypeError) -> List[str]: 1538 parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error)) 1539 if parameter_search: 1540 return re.findall(r"\'(.+?)\'", parameter_search.group(1)) 1541 else: 1542 return [] 1543 1544 def _create_nested_component( 1545 self, model: Any, model_field: str, model_value: Any, config: Config 1546 ) -> Any: 1547 type_name = model_value.get("type", None) 1548 if not type_name: 1549 # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent 1550 return model_value 1551 1552 model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None) 1553 if model_type: 1554 parsed_model = model_type.parse_obj(model_value) 1555 try: 1556 # To improve usability of the language, certain fields are shared between components. This can come in the form of 1557 # a parent component passing some of its fields to a child component or the parent extracting fields from other child 1558 # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base 1559 # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created 1560 # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that 1561 # are needed by a component and could not be shared. 1562 model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__) 1563 constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs 1564 model_parameters = model_value.get("$parameters", {}) 1565 matching_parameters = { 1566 kwarg: model_parameters[kwarg] 1567 for kwarg in constructor_kwargs 1568 if kwarg in model_parameters 1569 } 1570 return self._create_component_from_model( 1571 model=parsed_model, config=config, **matching_parameters 1572 ) 1573 except TypeError as error: 1574 missing_parameters = self._extract_missing_parameters(error) 1575 if missing_parameters: 1576 raise ValueError( 1577 f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " 1578 + ", ".join( 1579 ( 1580 f"{type_name}.$parameters.{parameter}" 1581 for parameter in missing_parameters 1582 ) 1583 ) 1584 ) 1585 raise TypeError( 1586 f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" 1587 ) 1588 else: 1589 raise ValueError( 1590 f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" 1591 ) 1592 1593 @staticmethod 1594 def _is_component(model_value: Any) -> bool: 1595 return isinstance(model_value, dict) and model_value.get("type") is not None 1596 1597 def create_datetime_based_cursor( 1598 self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any 1599 ) -> DatetimeBasedCursor: 1600 start_datetime: Union[str, MinMaxDatetime] = ( 1601 model.start_datetime 1602 if isinstance(model.start_datetime, str) 1603 else self.create_min_max_datetime(model.start_datetime, config) 1604 ) 1605 end_datetime: Union[str, MinMaxDatetime, None] = None 1606 if model.is_data_feed and model.end_datetime: 1607 raise ValueError("Data feed does not support end_datetime") 1608 if model.is_data_feed and model.is_client_side_incremental: 1609 raise ValueError( 1610 "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them." 1611 ) 1612 if model.end_datetime: 1613 end_datetime = ( 1614 model.end_datetime 1615 if isinstance(model.end_datetime, str) 1616 else self.create_min_max_datetime(model.end_datetime, config) 1617 ) 1618 1619 end_time_option = ( 1620 self._create_component_from_model( 1621 model.end_time_option, config, parameters=model.parameters or {} 1622 ) 1623 if model.end_time_option 1624 else None 1625 ) 1626 start_time_option = ( 1627 self._create_component_from_model( 1628 model.start_time_option, config, parameters=model.parameters or {} 1629 ) 1630 if model.start_time_option 1631 else None 1632 ) 1633 1634 return DatetimeBasedCursor( 1635 cursor_field=model.cursor_field, 1636 cursor_datetime_formats=model.cursor_datetime_formats 1637 if model.cursor_datetime_formats 1638 else [], 1639 cursor_granularity=model.cursor_granularity, 1640 datetime_format=model.datetime_format, 1641 end_datetime=end_datetime, 1642 start_datetime=start_datetime, 1643 step=model.step, 1644 end_time_option=end_time_option, 1645 lookback_window=model.lookback_window, 1646 start_time_option=start_time_option, 1647 partition_field_end=model.partition_field_end, 1648 partition_field_start=model.partition_field_start, 1649 message_repository=self._message_repository, 1650 is_compare_strictly=model.is_compare_strictly, 1651 config=config, 1652 parameters=model.parameters or {}, 1653 ) 1654 1655 def create_declarative_stream( 1656 self, model: DeclarativeStreamModel, config: Config, **kwargs: Any 1657 ) -> DeclarativeStream: 1658 # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field 1659 # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the 1660 # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in 1661 # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one. 1662 combined_slicers = self._merge_stream_slicers(model=model, config=config) 1663 1664 primary_key = model.primary_key.__root__ if model.primary_key else None 1665 stop_condition_on_cursor = ( 1666 model.incremental_sync 1667 and hasattr(model.incremental_sync, "is_data_feed") 1668 and model.incremental_sync.is_data_feed 1669 ) 1670 client_side_incremental_sync = None 1671 if ( 1672 model.incremental_sync 1673 and hasattr(model.incremental_sync, "is_client_side_incremental") 1674 and model.incremental_sync.is_client_side_incremental 1675 ): 1676 supported_slicers = ( 1677 DatetimeBasedCursor, 1678 GlobalSubstreamCursor, 1679 PerPartitionWithGlobalCursor, 1680 ) 1681 if combined_slicers and not isinstance(combined_slicers, supported_slicers): 1682 raise ValueError( 1683 "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead" 1684 ) 1685 cursor = ( 1686 combined_slicers 1687 if isinstance( 1688 combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor) 1689 ) 1690 else self._create_component_from_model(model=model.incremental_sync, config=config) 1691 ) 1692 1693 client_side_incremental_sync = {"cursor": cursor} 1694 1695 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1696 cursor_model = model.incremental_sync 1697 1698 end_time_option = ( 1699 self._create_component_from_model( 1700 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 1701 ) 1702 if cursor_model.end_time_option 1703 else None 1704 ) 1705 start_time_option = ( 1706 self._create_component_from_model( 1707 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 1708 ) 1709 if cursor_model.start_time_option 1710 else None 1711 ) 1712 1713 request_options_provider = DatetimeBasedRequestOptionsProvider( 1714 start_time_option=start_time_option, 1715 end_time_option=end_time_option, 1716 partition_field_start=cursor_model.partition_field_end, 1717 partition_field_end=cursor_model.partition_field_end, 1718 config=config, 1719 parameters=model.parameters or {}, 1720 ) 1721 elif model.incremental_sync and isinstance( 1722 model.incremental_sync, IncrementingCountCursorModel 1723 ): 1724 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 1725 1726 start_time_option = ( 1727 self._create_component_from_model( 1728 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 1729 config, 1730 parameters=cursor_model.parameters or {}, 1731 ) 1732 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 1733 else None 1734 ) 1735 1736 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 1737 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 1738 partition_field_start = "start" 1739 1740 request_options_provider = DatetimeBasedRequestOptionsProvider( 1741 start_time_option=start_time_option, 1742 partition_field_start=partition_field_start, 1743 config=config, 1744 parameters=model.parameters or {}, 1745 ) 1746 else: 1747 request_options_provider = None 1748 1749 transformations = [] 1750 if model.transformations: 1751 for transformation_model in model.transformations: 1752 transformations.append( 1753 self._create_component_from_model(model=transformation_model, config=config) 1754 ) 1755 1756 retriever = self._create_component_from_model( 1757 model=model.retriever, 1758 config=config, 1759 name=model.name, 1760 primary_key=primary_key, 1761 stream_slicer=combined_slicers, 1762 request_options_provider=request_options_provider, 1763 stop_condition_on_cursor=stop_condition_on_cursor, 1764 client_side_incremental_sync=client_side_incremental_sync, 1765 transformations=transformations, 1766 incremental_sync=model.incremental_sync, 1767 ) 1768 cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None 1769 1770 if model.state_migrations: 1771 state_transformations = [ 1772 self._create_component_from_model(state_migration, config, declarative_stream=model) 1773 for state_migration in model.state_migrations 1774 ] 1775 else: 1776 state_transformations = [] 1777 1778 if model.schema_loader: 1779 schema_loader = self._create_component_from_model( 1780 model=model.schema_loader, config=config 1781 ) 1782 else: 1783 options = model.parameters or {} 1784 if "name" not in options: 1785 options["name"] = model.name 1786 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 1787 1788 return DeclarativeStream( 1789 name=model.name or "", 1790 primary_key=primary_key, 1791 retriever=retriever, 1792 schema_loader=schema_loader, 1793 stream_cursor_field=cursor_field or "", 1794 state_migrations=state_transformations, 1795 config=config, 1796 parameters=model.parameters or {}, 1797 ) 1798 1799 def _build_stream_slicer_from_partition_router( 1800 self, 1801 model: Union[ 1802 AsyncRetrieverModel, 1803 CustomRetrieverModel, 1804 SimpleRetrieverModel, 1805 ], 1806 config: Config, 1807 stream_name: Optional[str] = None, 1808 ) -> Optional[PartitionRouter]: 1809 if ( 1810 hasattr(model, "partition_router") 1811 and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel) 1812 and model.partition_router 1813 ): 1814 stream_slicer_model = model.partition_router 1815 if isinstance(stream_slicer_model, list): 1816 return CartesianProductStreamSlicer( 1817 [ 1818 self._create_component_from_model( 1819 model=slicer, config=config, stream_name=stream_name or "" 1820 ) 1821 for slicer in stream_slicer_model 1822 ], 1823 parameters={}, 1824 ) 1825 else: 1826 return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router 1827 model=stream_slicer_model, config=config, stream_name=stream_name or "" 1828 ) 1829 return None 1830 1831 def _build_incremental_cursor( 1832 self, 1833 model: DeclarativeStreamModel, 1834 stream_slicer: Optional[PartitionRouter], 1835 config: Config, 1836 ) -> Optional[StreamSlicer]: 1837 if model.incremental_sync and stream_slicer: 1838 if model.retriever.type == "AsyncRetriever": 1839 return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 1840 state_manager=self._connector_state_manager, 1841 model_type=DatetimeBasedCursorModel, 1842 component_definition=model.incremental_sync.__dict__, 1843 stream_name=model.name or "", 1844 stream_namespace=None, 1845 config=config or {}, 1846 stream_state={}, 1847 partition_router=stream_slicer, 1848 ) 1849 1850 incremental_sync_model = model.incremental_sync 1851 cursor_component = self._create_component_from_model( 1852 model=incremental_sync_model, config=config 1853 ) 1854 is_global_cursor = ( 1855 hasattr(incremental_sync_model, "global_substream_cursor") 1856 and incremental_sync_model.global_substream_cursor 1857 ) 1858 1859 if is_global_cursor: 1860 return GlobalSubstreamCursor( 1861 stream_cursor=cursor_component, partition_router=stream_slicer 1862 ) 1863 return PerPartitionWithGlobalCursor( 1864 cursor_factory=CursorFactory( 1865 lambda: self._create_component_from_model( 1866 model=incremental_sync_model, config=config 1867 ), 1868 ), 1869 partition_router=stream_slicer, 1870 stream_cursor=cursor_component, 1871 ) 1872 elif model.incremental_sync: 1873 if model.retriever.type == "AsyncRetriever": 1874 return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 1875 model_type=DatetimeBasedCursorModel, 1876 component_definition=model.incremental_sync.__dict__, 1877 stream_name=model.name or "", 1878 stream_namespace=None, 1879 config=config or {}, 1880 stream_state_migrations=model.state_migrations, 1881 ) 1882 return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync 1883 return None 1884 1885 def _build_resumable_cursor( 1886 self, 1887 model: Union[ 1888 AsyncRetrieverModel, 1889 CustomRetrieverModel, 1890 SimpleRetrieverModel, 1891 ], 1892 stream_slicer: Optional[PartitionRouter], 1893 ) -> Optional[StreamSlicer]: 1894 if hasattr(model, "paginator") and model.paginator and not stream_slicer: 1895 # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor` 1896 return ResumableFullRefreshCursor(parameters={}) 1897 elif stream_slicer: 1898 # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor` 1899 return PerPartitionCursor( 1900 cursor_factory=CursorFactory( 1901 create_function=partial(ChildPartitionResumableFullRefreshCursor, {}) 1902 ), 1903 partition_router=stream_slicer, 1904 ) 1905 return None 1906 1907 def _merge_stream_slicers( 1908 self, model: DeclarativeStreamModel, config: Config 1909 ) -> Optional[StreamSlicer]: 1910 retriever_model = model.retriever 1911 1912 stream_slicer = self._build_stream_slicer_from_partition_router( 1913 retriever_model, config, stream_name=model.name 1914 ) 1915 1916 if retriever_model.type == "AsyncRetriever": 1917 is_not_datetime_cursor = ( 1918 model.incremental_sync.type != "DatetimeBasedCursor" 1919 if model.incremental_sync 1920 else None 1921 ) 1922 is_partition_router = ( 1923 bool(retriever_model.partition_router) if model.incremental_sync else None 1924 ) 1925 1926 if is_not_datetime_cursor: 1927 # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the 1928 # support or unordered slices (for example, when we trigger reports for January and February, the report 1929 # in February can be completed first). Once we have support for custom concurrent cursor or have a new 1930 # implementation available in the CDK, we can enable more cursors here. 1931 raise ValueError( 1932 "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet." 1933 ) 1934 1935 if is_partition_router and not stream_slicer: 1936 # Note that this development is also done in parallel to the per partition development which once merged 1937 # we could support here by calling create_concurrent_cursor_from_perpartition_cursor 1938 raise ValueError("Per partition state is not supported yet for AsyncRetriever.") 1939 1940 if model.incremental_sync: 1941 return self._build_incremental_cursor(model, stream_slicer, config) 1942 1943 return ( 1944 stream_slicer 1945 if self._disable_resumable_full_refresh 1946 else self._build_resumable_cursor(retriever_model, stream_slicer) 1947 ) 1948 1949 def create_default_error_handler( 1950 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 1951 ) -> DefaultErrorHandler: 1952 backoff_strategies = [] 1953 if model.backoff_strategies: 1954 for backoff_strategy_model in model.backoff_strategies: 1955 backoff_strategies.append( 1956 self._create_component_from_model(model=backoff_strategy_model, config=config) 1957 ) 1958 1959 response_filters = [] 1960 if model.response_filters: 1961 for response_filter_model in model.response_filters: 1962 response_filters.append( 1963 self._create_component_from_model(model=response_filter_model, config=config) 1964 ) 1965 response_filters.append( 1966 HttpResponseFilter(config=config, parameters=model.parameters or {}) 1967 ) 1968 1969 return DefaultErrorHandler( 1970 backoff_strategies=backoff_strategies, 1971 max_retries=model.max_retries, 1972 response_filters=response_filters, 1973 config=config, 1974 parameters=model.parameters or {}, 1975 ) 1976 1977 def create_default_paginator( 1978 self, 1979 model: DefaultPaginatorModel, 1980 config: Config, 1981 *, 1982 url_base: str, 1983 decoder: Optional[Decoder] = None, 1984 cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None, 1985 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 1986 if decoder: 1987 if self._is_supported_decoder_for_pagination(decoder): 1988 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 1989 else: 1990 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 1991 else: 1992 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 1993 page_size_option = ( 1994 self._create_component_from_model(model=model.page_size_option, config=config) 1995 if model.page_size_option 1996 else None 1997 ) 1998 page_token_option = ( 1999 self._create_component_from_model(model=model.page_token_option, config=config) 2000 if model.page_token_option 2001 else None 2002 ) 2003 pagination_strategy = self._create_component_from_model( 2004 model=model.pagination_strategy, config=config, decoder=decoder_to_use 2005 ) 2006 if cursor_used_for_stop_condition: 2007 pagination_strategy = StopConditionPaginationStrategyDecorator( 2008 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2009 ) 2010 paginator = DefaultPaginator( 2011 decoder=decoder_to_use, 2012 page_size_option=page_size_option, 2013 page_token_option=page_token_option, 2014 pagination_strategy=pagination_strategy, 2015 url_base=url_base, 2016 config=config, 2017 parameters=model.parameters or {}, 2018 ) 2019 if self._limit_pages_fetched_per_slice: 2020 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2021 return paginator 2022 2023 def create_dpath_extractor( 2024 self, 2025 model: DpathExtractorModel, 2026 config: Config, 2027 decoder: Optional[Decoder] = None, 2028 **kwargs: Any, 2029 ) -> DpathExtractor: 2030 if decoder: 2031 decoder_to_use = decoder 2032 else: 2033 decoder_to_use = JsonDecoder(parameters={}) 2034 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2035 return DpathExtractor( 2036 decoder=decoder_to_use, 2037 field_path=model_field_path, 2038 config=config, 2039 parameters=model.parameters or {}, 2040 ) 2041 2042 def create_response_to_file_extractor( 2043 self, 2044 model: ResponseToFileExtractorModel, 2045 **kwargs: Any, 2046 ) -> ResponseToFileExtractor: 2047 return ResponseToFileExtractor(parameters=model.parameters or {}) 2048 2049 @staticmethod 2050 def create_exponential_backoff_strategy( 2051 model: ExponentialBackoffStrategyModel, config: Config 2052 ) -> ExponentialBackoffStrategy: 2053 return ExponentialBackoffStrategy( 2054 factor=model.factor or 5, parameters=model.parameters or {}, config=config 2055 ) 2056 2057 def create_http_requester( 2058 self, 2059 model: HttpRequesterModel, 2060 config: Config, 2061 decoder: Decoder = JsonDecoder(parameters={}), 2062 *, 2063 name: str, 2064 ) -> HttpRequester: 2065 authenticator = ( 2066 self._create_component_from_model( 2067 model=model.authenticator, 2068 config=config, 2069 url_base=model.url_base, 2070 name=name, 2071 decoder=decoder, 2072 ) 2073 if model.authenticator 2074 else None 2075 ) 2076 error_handler = ( 2077 self._create_component_from_model(model=model.error_handler, config=config) 2078 if model.error_handler 2079 else DefaultErrorHandler( 2080 backoff_strategies=[], 2081 response_filters=[], 2082 config=config, 2083 parameters=model.parameters or {}, 2084 ) 2085 ) 2086 2087 api_budget = self._api_budget 2088 2089 request_options_provider = InterpolatedRequestOptionsProvider( 2090 request_body_data=model.request_body_data, 2091 request_body_json=model.request_body_json, 2092 request_headers=model.request_headers, 2093 request_parameters=model.request_parameters, 2094 config=config, 2095 parameters=model.parameters or {}, 2096 ) 2097 2098 assert model.use_cache is not None # for mypy 2099 assert model.http_method is not None # for mypy 2100 2101 use_cache = model.use_cache and not self._disable_cache 2102 2103 return HttpRequester( 2104 name=name, 2105 url_base=model.url_base, 2106 path=model.path, 2107 authenticator=authenticator, 2108 error_handler=error_handler, 2109 api_budget=api_budget, 2110 http_method=HttpMethod[model.http_method.value], 2111 request_options_provider=request_options_provider, 2112 config=config, 2113 disable_retries=self._disable_retries, 2114 parameters=model.parameters or {}, 2115 message_repository=self._message_repository, 2116 use_cache=use_cache, 2117 decoder=decoder, 2118 stream_response=decoder.is_stream_response() if decoder else False, 2119 ) 2120 2121 @staticmethod 2122 def create_http_response_filter( 2123 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2124 ) -> HttpResponseFilter: 2125 if model.action: 2126 action = ResponseAction(model.action.value) 2127 else: 2128 action = None 2129 2130 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2131 2132 http_codes = ( 2133 set(model.http_codes) if model.http_codes else set() 2134 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2135 2136 return HttpResponseFilter( 2137 action=action, 2138 failure_type=failure_type, 2139 error_message=model.error_message or "", 2140 error_message_contains=model.error_message_contains or "", 2141 http_codes=http_codes, 2142 predicate=model.predicate or "", 2143 config=config, 2144 parameters=model.parameters or {}, 2145 ) 2146 2147 @staticmethod 2148 def create_inline_schema_loader( 2149 model: InlineSchemaLoaderModel, config: Config, **kwargs: Any 2150 ) -> InlineSchemaLoader: 2151 return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) 2152 2153 def create_complex_field_type( 2154 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2155 ) -> ComplexFieldType: 2156 items = ( 2157 self._create_component_from_model(model=model.items, config=config) 2158 if isinstance(model.items, ComplexFieldTypeModel) 2159 else model.items 2160 ) 2161 2162 return ComplexFieldType(field_type=model.field_type, items=items) 2163 2164 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2165 target_type = ( 2166 self._create_component_from_model(model=model.target_type, config=config) 2167 if isinstance(model.target_type, ComplexFieldTypeModel) 2168 else model.target_type 2169 ) 2170 2171 return TypesMap( 2172 target_type=target_type, 2173 current_type=model.current_type, 2174 condition=model.condition if model.condition is not None else "True", 2175 ) 2176 2177 def create_schema_type_identifier( 2178 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2179 ) -> SchemaTypeIdentifier: 2180 types_mapping = [] 2181 if model.types_mapping: 2182 types_mapping.extend( 2183 [ 2184 self._create_component_from_model(types_map, config=config) 2185 for types_map in model.types_mapping 2186 ] 2187 ) 2188 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2189 [x for x in model.schema_pointer] if model.schema_pointer else [] 2190 ) 2191 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2192 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2193 [x for x in model.type_pointer] if model.type_pointer else None 2194 ) 2195 2196 return SchemaTypeIdentifier( 2197 schema_pointer=model_schema_pointer, 2198 key_pointer=model_key_pointer, 2199 type_pointer=model_type_pointer, 2200 types_mapping=types_mapping, 2201 parameters=model.parameters or {}, 2202 ) 2203 2204 def create_dynamic_schema_loader( 2205 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2206 ) -> DynamicSchemaLoader: 2207 stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config) 2208 combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer) 2209 2210 schema_transformations = [] 2211 if model.schema_transformations: 2212 for transformation_model in model.schema_transformations: 2213 schema_transformations.append( 2214 self._create_component_from_model(model=transformation_model, config=config) 2215 ) 2216 2217 retriever = self._create_component_from_model( 2218 model=model.retriever, 2219 config=config, 2220 name="", 2221 primary_key=None, 2222 stream_slicer=combined_slicers, 2223 transformations=[], 2224 ) 2225 schema_type_identifier = self._create_component_from_model( 2226 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2227 ) 2228 return DynamicSchemaLoader( 2229 retriever=retriever, 2230 config=config, 2231 schema_transformations=schema_transformations, 2232 schema_type_identifier=schema_type_identifier, 2233 parameters=model.parameters or {}, 2234 ) 2235 2236 @staticmethod 2237 def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2238 return JsonDecoder(parameters={}) 2239 2240 def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2241 return CompositeRawDecoder( 2242 parser=ModelToComponentFactory._get_parser(model, config), 2243 stream_response=False if self._emit_connector_builder_messages else True, 2244 ) 2245 2246 def create_jsonl_decoder( 2247 self, model: JsonlDecoderModel, config: Config, **kwargs: Any 2248 ) -> Decoder: 2249 return CompositeRawDecoder( 2250 parser=ModelToComponentFactory._get_parser(model, config), 2251 stream_response=False if self._emit_connector_builder_messages else True, 2252 ) 2253 2254 def create_gzip_decoder( 2255 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2256 ) -> Decoder: 2257 _compressed_response_types = { 2258 "gzip", 2259 "x-gzip", 2260 "gzip, deflate", 2261 "x-gzip, deflate", 2262 "application/zip", 2263 "application/gzip", 2264 "application/x-gzip", 2265 "application/x-zip-compressed", 2266 } 2267 2268 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2269 2270 if self._emit_connector_builder_messages: 2271 # This is very surprising but if the response is not streamed, 2272 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2273 # which uses urllib3 directly and does not uncompress the data. 2274 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2275 2276 return CompositeRawDecoder.by_headers( 2277 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2278 stream_response=True, 2279 fallback_parser=gzip_parser.inner_parser, 2280 ) 2281 2282 @staticmethod 2283 def create_incrementing_count_cursor( 2284 model: IncrementingCountCursorModel, config: Config, **kwargs: Any 2285 ) -> DatetimeBasedCursor: 2286 # This should not actually get used anywhere at runtime, but needed to add this to pass checks since 2287 # we still parse models into components. The issue is that there's no runtime implementation of a 2288 # IncrementingCountCursor. 2289 # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor. 2290 return DatetimeBasedCursor( 2291 cursor_field=model.cursor_field, 2292 datetime_format="%Y-%m-%d", 2293 start_datetime="2024-12-12", 2294 config=config, 2295 parameters={}, 2296 ) 2297 2298 @staticmethod 2299 def create_iterable_decoder( 2300 model: IterableDecoderModel, config: Config, **kwargs: Any 2301 ) -> IterableDecoder: 2302 return IterableDecoder(parameters={}) 2303 2304 @staticmethod 2305 def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder: 2306 return XmlDecoder(parameters={}) 2307 2308 def create_zipfile_decoder( 2309 self, model: ZipfileDecoderModel, config: Config, **kwargs: Any 2310 ) -> ZipfileDecoder: 2311 return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config)) 2312 2313 @staticmethod 2314 def _get_parser(model: BaseModel, config: Config) -> Parser: 2315 if isinstance(model, JsonDecoderModel): 2316 # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases 2317 return JsonParser() 2318 elif isinstance(model, JsonlDecoderModel): 2319 return JsonLineParser() 2320 elif isinstance(model, CsvDecoderModel): 2321 return CsvParser(encoding=model.encoding, delimiter=model.delimiter) 2322 elif isinstance(model, GzipDecoderModel): 2323 return GzipParser( 2324 inner_parser=ModelToComponentFactory._get_parser(model.decoder, config) 2325 ) 2326 elif isinstance( 2327 model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel) 2328 ): 2329 raise ValueError(f"Decoder type {model} does not have parser associated to it") 2330 2331 raise ValueError(f"Unknown decoder type {model}") 2332 2333 @staticmethod 2334 def create_json_file_schema_loader( 2335 model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any 2336 ) -> JsonFileSchemaLoader: 2337 return JsonFileSchemaLoader( 2338 file_path=model.file_path or "", config=config, parameters=model.parameters or {} 2339 ) 2340 2341 @staticmethod 2342 def create_jwt_authenticator( 2343 model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2344 ) -> JwtAuthenticator: 2345 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2346 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2347 return JwtAuthenticator( 2348 config=config, 2349 parameters=model.parameters or {}, 2350 algorithm=JwtAlgorithm(model.algorithm.value), 2351 secret_key=model.secret_key, 2352 base64_encode_secret_key=model.base64_encode_secret_key, 2353 token_duration=model.token_duration, 2354 header_prefix=model.header_prefix, 2355 kid=jwt_headers.kid, 2356 typ=jwt_headers.typ, 2357 cty=jwt_headers.cty, 2358 iss=jwt_payload.iss, 2359 sub=jwt_payload.sub, 2360 aud=jwt_payload.aud, 2361 additional_jwt_headers=model.additional_jwt_headers, 2362 additional_jwt_payload=model.additional_jwt_payload, 2363 ) 2364 2365 def create_list_partition_router( 2366 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2367 ) -> ListPartitionRouter: 2368 request_option = ( 2369 self._create_component_from_model(model.request_option, config) 2370 if model.request_option 2371 else None 2372 ) 2373 return ListPartitionRouter( 2374 cursor_field=model.cursor_field, 2375 request_option=request_option, 2376 values=model.values, 2377 config=config, 2378 parameters=model.parameters or {}, 2379 ) 2380 2381 @staticmethod 2382 def create_min_max_datetime( 2383 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2384 ) -> MinMaxDatetime: 2385 return MinMaxDatetime( 2386 datetime=model.datetime, 2387 datetime_format=model.datetime_format or "", 2388 max_datetime=model.max_datetime or "", 2389 min_datetime=model.min_datetime or "", 2390 parameters=model.parameters or {}, 2391 ) 2392 2393 @staticmethod 2394 def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: 2395 return NoAuth(parameters=model.parameters or {}) 2396 2397 @staticmethod 2398 def create_no_pagination( 2399 model: NoPaginationModel, config: Config, **kwargs: Any 2400 ) -> NoPagination: 2401 return NoPagination(parameters={}) 2402 2403 def create_oauth_authenticator( 2404 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2405 ) -> DeclarativeOauth2Authenticator: 2406 profile_assertion = ( 2407 self._create_component_from_model(model.profile_assertion, config=config) 2408 if model.profile_assertion 2409 else None 2410 ) 2411 2412 if model.refresh_token_updater: 2413 # ignore type error because fixing it would have a lot of dependencies, revisit later 2414 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2415 config, 2416 InterpolatedString.create( 2417 model.token_refresh_endpoint, # type: ignore 2418 parameters=model.parameters or {}, 2419 ).eval(config), 2420 access_token_name=InterpolatedString.create( 2421 model.access_token_name or "access_token", parameters=model.parameters or {} 2422 ).eval(config), 2423 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2424 expires_in_name=InterpolatedString.create( 2425 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2426 ).eval(config), 2427 client_id_name=InterpolatedString.create( 2428 model.client_id_name or "client_id", parameters=model.parameters or {} 2429 ).eval(config), 2430 client_id=InterpolatedString.create( 2431 model.client_id, parameters=model.parameters or {} 2432 ).eval(config) 2433 if model.client_id 2434 else model.client_id, 2435 client_secret_name=InterpolatedString.create( 2436 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2437 ).eval(config), 2438 client_secret=InterpolatedString.create( 2439 model.client_secret, parameters=model.parameters or {} 2440 ).eval(config) 2441 if model.client_secret 2442 else model.client_secret, 2443 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2444 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2445 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2446 grant_type_name=InterpolatedString.create( 2447 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2448 ).eval(config), 2449 grant_type=InterpolatedString.create( 2450 model.grant_type or "refresh_token", parameters=model.parameters or {} 2451 ).eval(config), 2452 refresh_request_body=InterpolatedMapping( 2453 model.refresh_request_body or {}, parameters=model.parameters or {} 2454 ).eval(config), 2455 refresh_request_headers=InterpolatedMapping( 2456 model.refresh_request_headers or {}, parameters=model.parameters or {} 2457 ).eval(config), 2458 scopes=model.scopes, 2459 token_expiry_date_format=model.token_expiry_date_format, 2460 message_repository=self._message_repository, 2461 refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes, 2462 refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key, 2463 refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values, 2464 ) 2465 # ignore type error because fixing it would have a lot of dependencies, revisit later 2466 return DeclarativeOauth2Authenticator( # type: ignore 2467 access_token_name=model.access_token_name or "access_token", 2468 access_token_value=model.access_token_value, 2469 client_id_name=model.client_id_name or "client_id", 2470 client_id=model.client_id, 2471 client_secret_name=model.client_secret_name or "client_secret", 2472 client_secret=model.client_secret, 2473 expires_in_name=model.expires_in_name or "expires_in", 2474 grant_type_name=model.grant_type_name or "grant_type", 2475 grant_type=model.grant_type or "refresh_token", 2476 refresh_request_body=model.refresh_request_body, 2477 refresh_request_headers=model.refresh_request_headers, 2478 refresh_token_name=model.refresh_token_name or "refresh_token", 2479 refresh_token=model.refresh_token, 2480 scopes=model.scopes, 2481 token_expiry_date=model.token_expiry_date, 2482 token_expiry_date_format=model.token_expiry_date_format, 2483 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2484 token_refresh_endpoint=model.token_refresh_endpoint, 2485 config=config, 2486 parameters=model.parameters or {}, 2487 message_repository=self._message_repository, 2488 profile_assertion=profile_assertion, 2489 use_profile_assertion=model.use_profile_assertion, 2490 ) 2491 2492 def create_offset_increment( 2493 self, model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any 2494 ) -> OffsetIncrement: 2495 if isinstance(decoder, PaginationDecoderDecorator): 2496 inner_decoder = decoder.decoder 2497 else: 2498 inner_decoder = decoder 2499 decoder = PaginationDecoderDecorator(decoder=decoder) 2500 2501 if self._is_supported_decoder_for_pagination(inner_decoder): 2502 decoder_to_use = decoder 2503 else: 2504 raise ValueError( 2505 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2506 ) 2507 2508 return OffsetIncrement( 2509 page_size=model.page_size, 2510 config=config, 2511 decoder=decoder_to_use, 2512 inject_on_first_request=model.inject_on_first_request or False, 2513 parameters=model.parameters or {}, 2514 ) 2515 2516 @staticmethod 2517 def create_page_increment( 2518 model: PageIncrementModel, config: Config, **kwargs: Any 2519 ) -> PageIncrement: 2520 return PageIncrement( 2521 page_size=model.page_size, 2522 config=config, 2523 start_from_page=model.start_from_page or 0, 2524 inject_on_first_request=model.inject_on_first_request or False, 2525 parameters=model.parameters or {}, 2526 ) 2527 2528 def create_parent_stream_config( 2529 self, model: ParentStreamConfigModel, config: Config, **kwargs: Any 2530 ) -> ParentStreamConfig: 2531 declarative_stream = self._create_component_from_model( 2532 model.stream, config=config, **kwargs 2533 ) 2534 request_option = ( 2535 self._create_component_from_model(model.request_option, config=config) 2536 if model.request_option 2537 else None 2538 ) 2539 2540 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2541 raise ValueError( 2542 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2543 ) 2544 2545 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2546 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2547 ) 2548 2549 return ParentStreamConfig( 2550 parent_key=model.parent_key, 2551 request_option=request_option, 2552 stream=declarative_stream, 2553 partition_field=model.partition_field, 2554 config=config, 2555 incremental_dependency=model.incremental_dependency or False, 2556 parameters=model.parameters or {}, 2557 extra_fields=model.extra_fields, 2558 lazy_read_pointer=model_lazy_read_pointer, 2559 ) 2560 2561 @staticmethod 2562 def create_record_filter( 2563 model: RecordFilterModel, config: Config, **kwargs: Any 2564 ) -> RecordFilter: 2565 return RecordFilter( 2566 condition=model.condition or "", config=config, parameters=model.parameters or {} 2567 ) 2568 2569 @staticmethod 2570 def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: 2571 return RequestPath(parameters={}) 2572 2573 @staticmethod 2574 def create_request_option( 2575 model: RequestOptionModel, config: Config, **kwargs: Any 2576 ) -> RequestOption: 2577 inject_into = RequestOptionType(model.inject_into.value) 2578 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 2579 [ 2580 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 2581 for segment in model.field_path 2582 ] 2583 if model.field_path 2584 else None 2585 ) 2586 field_name = ( 2587 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 2588 if model.field_name 2589 else None 2590 ) 2591 return RequestOption( 2592 field_name=field_name, 2593 field_path=field_path, 2594 inject_into=inject_into, 2595 parameters=kwargs.get("parameters", {}), 2596 ) 2597 2598 def create_record_selector( 2599 self, 2600 model: RecordSelectorModel, 2601 config: Config, 2602 *, 2603 name: str, 2604 transformations: List[RecordTransformation] | None = None, 2605 decoder: Decoder | None = None, 2606 client_side_incremental_sync: Dict[str, Any] | None = None, 2607 **kwargs: Any, 2608 ) -> RecordSelector: 2609 extractor = self._create_component_from_model( 2610 model=model.extractor, decoder=decoder, config=config 2611 ) 2612 record_filter = ( 2613 self._create_component_from_model(model.record_filter, config=config) 2614 if model.record_filter 2615 else None 2616 ) 2617 2618 assert model.transform_before_filtering is not None # for mypy 2619 2620 transform_before_filtering = model.transform_before_filtering 2621 if client_side_incremental_sync: 2622 record_filter = ClientSideIncrementalRecordFilterDecorator( 2623 config=config, 2624 parameters=model.parameters, 2625 condition=model.record_filter.condition 2626 if (model.record_filter and hasattr(model.record_filter, "condition")) 2627 else None, 2628 **client_side_incremental_sync, 2629 ) 2630 transform_before_filtering = True 2631 2632 schema_normalization = ( 2633 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 2634 if isinstance(model.schema_normalization, SchemaNormalizationModel) 2635 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 2636 ) 2637 2638 return RecordSelector( 2639 extractor=extractor, 2640 name=name, 2641 config=config, 2642 record_filter=record_filter, 2643 transformations=transformations or [], 2644 schema_normalization=schema_normalization, 2645 parameters=model.parameters or {}, 2646 transform_before_filtering=transform_before_filtering, 2647 ) 2648 2649 @staticmethod 2650 def create_remove_fields( 2651 model: RemoveFieldsModel, config: Config, **kwargs: Any 2652 ) -> RemoveFields: 2653 return RemoveFields( 2654 field_pointers=model.field_pointers, condition=model.condition or "", parameters={} 2655 ) 2656 2657 def create_selective_authenticator( 2658 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 2659 ) -> DeclarativeAuthenticator: 2660 authenticators = { 2661 name: self._create_component_from_model(model=auth, config=config) 2662 for name, auth in model.authenticators.items() 2663 } 2664 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 2665 return SelectiveAuthenticator( # type: ignore[abstract] 2666 config=config, 2667 authenticators=authenticators, 2668 authenticator_selection_path=model.authenticator_selection_path, 2669 **kwargs, 2670 ) 2671 2672 @staticmethod 2673 def create_legacy_session_token_authenticator( 2674 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 2675 ) -> LegacySessionTokenAuthenticator: 2676 return LegacySessionTokenAuthenticator( 2677 api_url=url_base, 2678 header=model.header, 2679 login_url=model.login_url, 2680 password=model.password or "", 2681 session_token=model.session_token or "", 2682 session_token_response_key=model.session_token_response_key or "", 2683 username=model.username or "", 2684 validate_session_url=model.validate_session_url, 2685 config=config, 2686 parameters=model.parameters or {}, 2687 ) 2688 2689 def create_simple_retriever( 2690 self, 2691 model: SimpleRetrieverModel, 2692 config: Config, 2693 *, 2694 name: str, 2695 primary_key: Optional[Union[str, List[str], List[List[str]]]], 2696 stream_slicer: Optional[StreamSlicer], 2697 request_options_provider: Optional[RequestOptionsProvider] = None, 2698 stop_condition_on_cursor: bool = False, 2699 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 2700 transformations: List[RecordTransformation], 2701 incremental_sync: Optional[ 2702 Union[ 2703 IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel 2704 ] 2705 ] = None, 2706 **kwargs: Any, 2707 ) -> SimpleRetriever: 2708 decoder = ( 2709 self._create_component_from_model(model=model.decoder, config=config) 2710 if model.decoder 2711 else JsonDecoder(parameters={}) 2712 ) 2713 requester = self._create_component_from_model( 2714 model=model.requester, decoder=decoder, config=config, name=name 2715 ) 2716 record_selector = self._create_component_from_model( 2717 model=model.record_selector, 2718 name=name, 2719 config=config, 2720 decoder=decoder, 2721 transformations=transformations, 2722 client_side_incremental_sync=client_side_incremental_sync, 2723 ) 2724 url_base = ( 2725 model.requester.url_base 2726 if hasattr(model.requester, "url_base") 2727 else requester.get_url_base() 2728 ) 2729 2730 # Define cursor only if per partition or common incremental support is needed 2731 cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None 2732 2733 if ( 2734 not isinstance(stream_slicer, DatetimeBasedCursor) 2735 or type(stream_slicer) is not DatetimeBasedCursor 2736 ): 2737 # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods). 2738 # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement 2739 # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's 2740 # request_options_provider 2741 request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={}) 2742 elif not request_options_provider: 2743 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 2744 2745 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 2746 2747 cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None 2748 paginator = ( 2749 self._create_component_from_model( 2750 model=model.paginator, 2751 config=config, 2752 url_base=url_base, 2753 decoder=decoder, 2754 cursor_used_for_stop_condition=cursor_used_for_stop_condition, 2755 ) 2756 if model.paginator 2757 else NoPagination(parameters={}) 2758 ) 2759 2760 ignore_stream_slicer_parameters_on_paginated_requests = ( 2761 model.ignore_stream_slicer_parameters_on_paginated_requests or False 2762 ) 2763 2764 if ( 2765 model.partition_router 2766 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 2767 and not bool(self._connector_state_manager.get_stream_state(name, None)) 2768 and any( 2769 parent_stream_config.lazy_read_pointer 2770 for parent_stream_config in model.partition_router.parent_stream_configs 2771 ) 2772 ): 2773 if incremental_sync: 2774 if incremental_sync.type != "DatetimeBasedCursor": 2775 raise ValueError( 2776 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 2777 ) 2778 2779 elif incremental_sync.step or incremental_sync.cursor_granularity: 2780 raise ValueError( 2781 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 2782 ) 2783 2784 if model.decoder and model.decoder.type != "JsonDecoder": 2785 raise ValueError( 2786 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 2787 ) 2788 2789 return LazySimpleRetriever( 2790 name=name, 2791 paginator=paginator, 2792 primary_key=primary_key, 2793 requester=requester, 2794 record_selector=record_selector, 2795 stream_slicer=stream_slicer, 2796 request_option_provider=request_options_provider, 2797 cursor=cursor, 2798 config=config, 2799 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 2800 parameters=model.parameters or {}, 2801 ) 2802 2803 if self._limit_slices_fetched or self._emit_connector_builder_messages: 2804 return SimpleRetrieverTestReadDecorator( 2805 name=name, 2806 paginator=paginator, 2807 primary_key=primary_key, 2808 requester=requester, 2809 record_selector=record_selector, 2810 stream_slicer=stream_slicer, 2811 request_option_provider=request_options_provider, 2812 cursor=cursor, 2813 config=config, 2814 maximum_number_of_slices=self._limit_slices_fetched or 5, 2815 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 2816 parameters=model.parameters or {}, 2817 ) 2818 return SimpleRetriever( 2819 name=name, 2820 paginator=paginator, 2821 primary_key=primary_key, 2822 requester=requester, 2823 record_selector=record_selector, 2824 stream_slicer=stream_slicer, 2825 request_option_provider=request_options_provider, 2826 cursor=cursor, 2827 config=config, 2828 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 2829 parameters=model.parameters or {}, 2830 ) 2831 2832 def create_state_delegating_stream( 2833 self, 2834 model: StateDelegatingStreamModel, 2835 config: Config, 2836 has_parent_state: Optional[bool] = None, 2837 **kwargs: Any, 2838 ) -> DeclarativeStream: 2839 if ( 2840 model.full_refresh_stream.name != model.name 2841 or model.name != model.incremental_stream.name 2842 ): 2843 raise ValueError( 2844 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 2845 ) 2846 2847 stream_model = ( 2848 model.incremental_stream 2849 if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state 2850 else model.full_refresh_stream 2851 ) 2852 2853 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description 2854 2855 def _create_async_job_status_mapping( 2856 self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any 2857 ) -> Mapping[str, AsyncJobStatus]: 2858 api_status_to_cdk_status = {} 2859 for cdk_status, api_statuses in model.dict().items(): 2860 if cdk_status == "type": 2861 # This is an element of the dict because of the typing of the CDK but it is not a CDK status 2862 continue 2863 2864 for status in api_statuses: 2865 if status in api_status_to_cdk_status: 2866 raise ValueError( 2867 f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once" 2868 ) 2869 api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status) 2870 return api_status_to_cdk_status 2871 2872 def _get_async_job_status(self, status: str) -> AsyncJobStatus: 2873 match status: 2874 case "running": 2875 return AsyncJobStatus.RUNNING 2876 case "completed": 2877 return AsyncJobStatus.COMPLETED 2878 case "failed": 2879 return AsyncJobStatus.FAILED 2880 case "timeout": 2881 return AsyncJobStatus.TIMED_OUT 2882 case _: 2883 raise ValueError(f"Unsupported CDK status {status}") 2884 2885 def create_async_retriever( 2886 self, 2887 model: AsyncRetrieverModel, 2888 config: Config, 2889 *, 2890 name: str, 2891 primary_key: Optional[ 2892 Union[str, List[str], List[List[str]]] 2893 ], # this seems to be needed to match create_simple_retriever 2894 stream_slicer: Optional[StreamSlicer], 2895 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 2896 transformations: List[RecordTransformation], 2897 **kwargs: Any, 2898 ) -> AsyncRetriever: 2899 def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever: 2900 record_selector = RecordSelector( 2901 extractor=download_extractor, 2902 name=name, 2903 record_filter=None, 2904 transformations=transformations, 2905 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 2906 config=config, 2907 parameters={}, 2908 ) 2909 paginator = ( 2910 self._create_component_from_model( 2911 model=model.download_paginator, 2912 decoder=decoder, 2913 config=config, 2914 url_base="", 2915 ) 2916 if model.download_paginator 2917 else NoPagination(parameters={}) 2918 ) 2919 maximum_number_of_slices = self._limit_slices_fetched or 5 2920 2921 if self._limit_slices_fetched or self._emit_connector_builder_messages: 2922 return SimpleRetrieverTestReadDecorator( 2923 requester=download_requester, 2924 record_selector=record_selector, 2925 primary_key=None, 2926 name=job_download_components_name, 2927 paginator=paginator, 2928 config=config, 2929 parameters={}, 2930 maximum_number_of_slices=maximum_number_of_slices, 2931 ) 2932 2933 return SimpleRetriever( 2934 requester=download_requester, 2935 record_selector=record_selector, 2936 primary_key=None, 2937 name=job_download_components_name, 2938 paginator=paginator, 2939 config=config, 2940 parameters={}, 2941 ) 2942 2943 def _get_job_timeout() -> datetime.timedelta: 2944 user_defined_timeout: Optional[int] = ( 2945 int( 2946 InterpolatedString.create( 2947 str(model.polling_job_timeout), 2948 parameters={}, 2949 ).eval(config) 2950 ) 2951 if model.polling_job_timeout 2952 else None 2953 ) 2954 2955 # check for user defined timeout during the test read or 15 minutes 2956 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 2957 # default value for non-connector builder is 60 minutes. 2958 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 2959 2960 return ( 2961 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 2962 ) 2963 2964 decoder = ( 2965 self._create_component_from_model(model=model.decoder, config=config) 2966 if model.decoder 2967 else JsonDecoder(parameters={}) 2968 ) 2969 record_selector = self._create_component_from_model( 2970 model=model.record_selector, 2971 config=config, 2972 decoder=decoder, 2973 name=name, 2974 transformations=transformations, 2975 client_side_incremental_sync=client_side_incremental_sync, 2976 ) 2977 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 2978 creation_requester = self._create_component_from_model( 2979 model=model.creation_requester, 2980 decoder=decoder, 2981 config=config, 2982 name=f"job creation - {name}", 2983 ) 2984 polling_requester = self._create_component_from_model( 2985 model=model.polling_requester, 2986 decoder=decoder, 2987 config=config, 2988 name=f"job polling - {name}", 2989 ) 2990 job_download_components_name = f"job download - {name}" 2991 download_decoder = ( 2992 self._create_component_from_model(model=model.download_decoder, config=config) 2993 if model.download_decoder 2994 else JsonDecoder(parameters={}) 2995 ) 2996 download_extractor = ( 2997 self._create_component_from_model( 2998 model=model.download_extractor, 2999 config=config, 3000 decoder=download_decoder, 3001 parameters=model.parameters, 3002 ) 3003 if model.download_extractor 3004 else DpathExtractor( 3005 [], 3006 config=config, 3007 decoder=download_decoder, 3008 parameters=model.parameters or {}, 3009 ) 3010 ) 3011 download_requester = self._create_component_from_model( 3012 model=model.download_requester, 3013 decoder=download_decoder, 3014 config=config, 3015 name=job_download_components_name, 3016 ) 3017 download_retriever = _get_download_retriever() 3018 abort_requester = ( 3019 self._create_component_from_model( 3020 model=model.abort_requester, 3021 decoder=decoder, 3022 config=config, 3023 name=f"job abort - {name}", 3024 ) 3025 if model.abort_requester 3026 else None 3027 ) 3028 delete_requester = ( 3029 self._create_component_from_model( 3030 model=model.delete_requester, 3031 decoder=decoder, 3032 config=config, 3033 name=f"job delete - {name}", 3034 ) 3035 if model.delete_requester 3036 else None 3037 ) 3038 download_target_requester = ( 3039 self._create_component_from_model( 3040 model=model.download_target_requester, 3041 decoder=decoder, 3042 config=config, 3043 name=f"job extract_url - {name}", 3044 ) 3045 if model.download_target_requester 3046 else None 3047 ) 3048 status_extractor = self._create_component_from_model( 3049 model=model.status_extractor, decoder=decoder, config=config, name=name 3050 ) 3051 download_target_extractor = self._create_component_from_model( 3052 model=model.download_target_extractor, 3053 decoder=decoder, 3054 config=config, 3055 name=name, 3056 ) 3057 3058 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3059 creation_requester=creation_requester, 3060 polling_requester=polling_requester, 3061 download_retriever=download_retriever, 3062 download_target_requester=download_target_requester, 3063 abort_requester=abort_requester, 3064 delete_requester=delete_requester, 3065 status_extractor=status_extractor, 3066 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3067 download_target_extractor=download_target_extractor, 3068 job_timeout=_get_job_timeout(), 3069 ) 3070 3071 async_job_partition_router = AsyncJobPartitionRouter( 3072 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3073 job_repository, 3074 stream_slices, 3075 self._job_tracker, 3076 self._message_repository, 3077 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3078 has_bulk_parent=False, 3079 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3080 # `None` == default retry is set to 3 attempts, under the hood. 3081 job_max_retry=1 if self._emit_connector_builder_messages else None, 3082 ), 3083 stream_slicer=stream_slicer, 3084 config=config, 3085 parameters=model.parameters or {}, 3086 ) 3087 3088 return AsyncRetriever( 3089 record_selector=record_selector, 3090 stream_slicer=async_job_partition_router, 3091 config=config, 3092 parameters=model.parameters or {}, 3093 ) 3094 3095 @staticmethod 3096 def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3097 return Spec( 3098 connection_specification=model.connection_specification, 3099 documentation_url=model.documentation_url, 3100 advanced_auth=model.advanced_auth, 3101 parameters={}, 3102 ) 3103 3104 def create_substream_partition_router( 3105 self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any 3106 ) -> SubstreamPartitionRouter: 3107 parent_stream_configs = [] 3108 if model.parent_stream_configs: 3109 parent_stream_configs.extend( 3110 [ 3111 self._create_message_repository_substream_wrapper( 3112 model=parent_stream_config, config=config, **kwargs 3113 ) 3114 for parent_stream_config in model.parent_stream_configs 3115 ] 3116 ) 3117 3118 return SubstreamPartitionRouter( 3119 parent_stream_configs=parent_stream_configs, 3120 parameters=model.parameters or {}, 3121 config=config, 3122 ) 3123 3124 def _create_message_repository_substream_wrapper( 3125 self, model: ParentStreamConfigModel, config: Config, **kwargs: Any 3126 ) -> Any: 3127 substream_factory = ModelToComponentFactory( 3128 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3129 limit_slices_fetched=self._limit_slices_fetched, 3130 emit_connector_builder_messages=self._emit_connector_builder_messages, 3131 disable_retries=self._disable_retries, 3132 disable_cache=self._disable_cache, 3133 message_repository=LogAppenderMessageRepositoryDecorator( 3134 {"airbyte_cdk": {"stream": {"is_substream": True}}, "http": {"is_auxiliary": True}}, 3135 self._message_repository, 3136 self._evaluate_log_level(self._emit_connector_builder_messages), 3137 ), 3138 ) 3139 3140 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3141 has_parent_state = bool( 3142 self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None) 3143 if model.incremental_dependency 3144 else False 3145 ) 3146 return substream_factory._create_component_from_model( 3147 model=model, config=config, has_parent_state=has_parent_state, **kwargs 3148 ) 3149 3150 @staticmethod 3151 def create_wait_time_from_header( 3152 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 3153 ) -> WaitTimeFromHeaderBackoffStrategy: 3154 return WaitTimeFromHeaderBackoffStrategy( 3155 header=model.header, 3156 parameters=model.parameters or {}, 3157 config=config, 3158 regex=model.regex, 3159 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 3160 if model.max_waiting_time_in_seconds is not None 3161 else None, 3162 ) 3163 3164 @staticmethod 3165 def create_wait_until_time_from_header( 3166 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 3167 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 3168 return WaitUntilTimeFromHeaderBackoffStrategy( 3169 header=model.header, 3170 parameters=model.parameters or {}, 3171 config=config, 3172 min_wait=model.min_wait, 3173 regex=model.regex, 3174 ) 3175 3176 def get_message_repository(self) -> MessageRepository: 3177 return self._message_repository 3178 3179 def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: 3180 return Level.DEBUG if emit_connector_builder_messages else Level.INFO 3181 3182 @staticmethod 3183 def create_components_mapping_definition( 3184 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 3185 ) -> ComponentMappingDefinition: 3186 interpolated_value = InterpolatedString.create( 3187 model.value, parameters=model.parameters or {} 3188 ) 3189 field_path = [ 3190 InterpolatedString.create(path, parameters=model.parameters or {}) 3191 for path in model.field_path 3192 ] 3193 return ComponentMappingDefinition( 3194 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 3195 value=interpolated_value, 3196 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 3197 parameters=model.parameters or {}, 3198 ) 3199 3200 def create_http_components_resolver( 3201 self, model: HttpComponentsResolverModel, config: Config 3202 ) -> Any: 3203 stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config) 3204 combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer) 3205 3206 retriever = self._create_component_from_model( 3207 model=model.retriever, 3208 config=config, 3209 name="", 3210 primary_key=None, 3211 stream_slicer=stream_slicer if stream_slicer else combined_slicers, 3212 transformations=[], 3213 ) 3214 3215 components_mapping = [ 3216 self._create_component_from_model( 3217 model=components_mapping_definition_model, 3218 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3219 components_mapping_definition_model.value_type 3220 ), 3221 config=config, 3222 ) 3223 for components_mapping_definition_model in model.components_mapping 3224 ] 3225 3226 return HttpComponentsResolver( 3227 retriever=retriever, 3228 config=config, 3229 components_mapping=components_mapping, 3230 parameters=model.parameters or {}, 3231 ) 3232 3233 @staticmethod 3234 def create_stream_config( 3235 model: StreamConfigModel, config: Config, **kwargs: Any 3236 ) -> StreamConfig: 3237 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 3238 [x for x in model.configs_pointer] if model.configs_pointer else [] 3239 ) 3240 3241 return StreamConfig( 3242 configs_pointer=model_configs_pointer, 3243 parameters=model.parameters or {}, 3244 ) 3245 3246 def create_config_components_resolver( 3247 self, model: ConfigComponentsResolverModel, config: Config 3248 ) -> Any: 3249 stream_config = self._create_component_from_model( 3250 model.stream_config, config=config, parameters=model.parameters or {} 3251 ) 3252 3253 components_mapping = [ 3254 self._create_component_from_model( 3255 model=components_mapping_definition_model, 3256 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3257 components_mapping_definition_model.value_type 3258 ), 3259 config=config, 3260 ) 3261 for components_mapping_definition_model in model.components_mapping 3262 ] 3263 3264 return ConfigComponentsResolver( 3265 stream_config=stream_config, 3266 config=config, 3267 components_mapping=components_mapping, 3268 parameters=model.parameters or {}, 3269 ) 3270 3271 _UNSUPPORTED_DECODER_ERROR = ( 3272 "Specified decoder of {decoder_type} is not supported for pagination." 3273 "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead." 3274 "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`." 3275 ) 3276 3277 def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool: 3278 if isinstance(decoder, (JsonDecoder, XmlDecoder)): 3279 return True 3280 elif isinstance(decoder, CompositeRawDecoder): 3281 return self._is_supported_parser_for_pagination(decoder.parser) 3282 else: 3283 return False 3284 3285 def _is_supported_parser_for_pagination(self, parser: Parser) -> bool: 3286 if isinstance(parser, JsonParser): 3287 return True 3288 elif isinstance(parser, GzipParser): 3289 return isinstance(parser.inner_parser, JsonParser) 3290 else: 3291 return False 3292 3293 def create_http_api_budget( 3294 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 3295 ) -> HttpAPIBudget: 3296 policies = [ 3297 self._create_component_from_model(model=policy, config=config) 3298 for policy in model.policies 3299 ] 3300 3301 return HttpAPIBudget( 3302 policies=policies, 3303 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 3304 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 3305 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 3306 ) 3307 3308 def create_fixed_window_call_rate_policy( 3309 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 3310 ) -> FixedWindowCallRatePolicy: 3311 matchers = [ 3312 self._create_component_from_model(model=matcher, config=config) 3313 for matcher in model.matchers 3314 ] 3315 3316 # Set the initial reset timestamp to 10 days from now. 3317 # This value will be updated by the first request. 3318 return FixedWindowCallRatePolicy( 3319 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 3320 period=parse_duration(model.period), 3321 call_limit=model.call_limit, 3322 matchers=matchers, 3323 ) 3324 3325 def create_moving_window_call_rate_policy( 3326 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 3327 ) -> MovingWindowCallRatePolicy: 3328 rates = [ 3329 self._create_component_from_model(model=rate, config=config) for rate in model.rates 3330 ] 3331 matchers = [ 3332 self._create_component_from_model(model=matcher, config=config) 3333 for matcher in model.matchers 3334 ] 3335 return MovingWindowCallRatePolicy( 3336 rates=rates, 3337 matchers=matchers, 3338 ) 3339 3340 def create_unlimited_call_rate_policy( 3341 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 3342 ) -> UnlimitedCallRatePolicy: 3343 matchers = [ 3344 self._create_component_from_model(model=matcher, config=config) 3345 for matcher in model.matchers 3346 ] 3347 3348 return UnlimitedCallRatePolicy( 3349 matchers=matchers, 3350 ) 3351 3352 def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate: 3353 interpolated_limit = InterpolatedString.create(str(model.limit), parameters={}) 3354 return Rate( 3355 limit=int(interpolated_limit.eval(config=config)), 3356 interval=parse_duration(model.interval), 3357 ) 3358 3359 def create_http_request_matcher( 3360 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 3361 ) -> HttpRequestRegexMatcher: 3362 return HttpRequestRegexMatcher( 3363 method=model.method, 3364 url_base=model.url_base, 3365 url_path_pattern=model.url_path_pattern, 3366 params=model.params, 3367 headers=model.headers, 3368 ) 3369 3370 def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None: 3371 self._api_budget = self.create_component( 3372 model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config 3373 )
ModelToComponentFactory( limit_pages_fetched_per_slice: Optional[int] = None, limit_slices_fetched: Optional[int] = None, emit_connector_builder_messages: bool = False, disable_retries: bool = False, disable_cache: bool = False, disable_resumable_full_refresh: bool = False, message_repository: Optional[airbyte_cdk.MessageRepository] = None, connector_state_manager: Optional[airbyte_cdk.ConnectorStateManager] = None, max_concurrent_async_job_count: Optional[int] = None)
525 def __init__( 526 self, 527 limit_pages_fetched_per_slice: Optional[int] = None, 528 limit_slices_fetched: Optional[int] = None, 529 emit_connector_builder_messages: bool = False, 530 disable_retries: bool = False, 531 disable_cache: bool = False, 532 disable_resumable_full_refresh: bool = False, 533 message_repository: Optional[MessageRepository] = None, 534 connector_state_manager: Optional[ConnectorStateManager] = None, 535 max_concurrent_async_job_count: Optional[int] = None, 536 ): 537 self._init_mappings() 538 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 539 self._limit_slices_fetched = limit_slices_fetched 540 self._emit_connector_builder_messages = emit_connector_builder_messages 541 self._disable_retries = disable_retries 542 self._disable_cache = disable_cache 543 self._disable_resumable_full_refresh = disable_resumable_full_refresh 544 self._message_repository = message_repository or InMemoryMessageRepository( 545 self._evaluate_log_level(emit_connector_builder_messages) 546 ) 547 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 548 self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None 549 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1)
def
create_component( self, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], config: Mapping[str, Any], **kwargs: Any) -> Any:
647 def create_component( 648 self, 649 model_type: Type[BaseModel], 650 component_definition: ComponentDefinition, 651 config: Config, 652 **kwargs: Any, 653 ) -> Any: 654 """ 655 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 656 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 657 creating declarative components from that model. 658 659 :param model_type: The type of declarative component that is being initialized 660 :param component_definition: The mapping that represents a declarative component 661 :param config: The connector config that is provided by the customer 662 :return: The declarative component to be used at runtime 663 """ 664 665 component_type = component_definition.get("type") 666 if component_definition.get("type") != model_type.__name__: 667 raise ValueError( 668 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 669 ) 670 671 declarative_component_model = model_type.parse_obj(component_definition) 672 673 if not isinstance(declarative_component_model, model_type): 674 raise ValueError( 675 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 676 ) 677 678 return self._create_component_from_model( 679 model=declarative_component_model, config=config, **kwargs 680 )
Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating creating declarative components from that model.
Parameters
- model_type: The type of declarative component that is being initialized
- component_definition: The mapping that represents a declarative component
- config: The connector config that is provided by the customer
Returns
The declarative component to be used at runtime
@staticmethod
def
create_added_field_definition( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.AddedFieldDefinition, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.AddedFieldDefinition:
692 @staticmethod 693 def create_added_field_definition( 694 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 695 ) -> AddedFieldDefinition: 696 interpolated_value = InterpolatedString.create( 697 model.value, parameters=model.parameters or {} 698 ) 699 return AddedFieldDefinition( 700 path=model.path, 701 value=interpolated_value, 702 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 703 parameters=model.parameters or {}, 704 )
def
create_add_fields( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.AddFields, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.AddFields:
706 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 707 added_field_definitions = [ 708 self._create_component_from_model( 709 model=added_field_definition_model, 710 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 711 added_field_definition_model.value_type 712 ), 713 config=config, 714 ) 715 for added_field_definition_model in model.fields 716 ] 717 return AddFields( 718 fields=added_field_definitions, 719 condition=model.condition or "", 720 parameters=model.parameters or {}, 721 )
def
create_keys_to_lower_transformation( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.KeysToLower, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation.KeysToLowerTransformation:
def
create_keys_to_snake_transformation( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.KeysToSnakeCase, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation.KeysToSnakeCaseTransformation:
def
create_keys_replace_transformation( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.KeysReplace, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.keys_replace_transformation.KeysReplaceTransformation:
def
create_flatten_fields( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.FlattenFields, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.flatten_fields.FlattenFields:
def
create_dpath_flatten_fields( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DpathFlattenFields, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.dpath_flatten_fields.DpathFlattenFields:
747 def create_dpath_flatten_fields( 748 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 749 ) -> DpathFlattenFields: 750 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 751 return DpathFlattenFields( 752 config=config, 753 field_path=model_field_path, 754 delete_origin_value=model.delete_origin_value 755 if model.delete_origin_value is not None 756 else False, 757 replace_record=model.replace_record if model.replace_record is not None else False, 758 parameters=model.parameters or {}, 759 )
def
create_api_key_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ApiKeyAuthenticator, config: Mapping[str, Any], token_provider: Optional[airbyte_cdk.sources.declarative.auth.token_provider.TokenProvider] = None, **kwargs: Any) -> airbyte_cdk.ApiKeyAuthenticator:
773 def create_api_key_authenticator( 774 self, 775 model: ApiKeyAuthenticatorModel, 776 config: Config, 777 token_provider: Optional[TokenProvider] = None, 778 **kwargs: Any, 779 ) -> ApiKeyAuthenticator: 780 if model.inject_into is None and model.header is None: 781 raise ValueError( 782 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 783 ) 784 785 if model.inject_into is not None and model.header is not None: 786 raise ValueError( 787 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 788 ) 789 790 if token_provider is not None and model.api_token != "": 791 raise ValueError( 792 "If token_provider is set, api_token is ignored and has to be set to empty string." 793 ) 794 795 request_option = ( 796 self._create_component_from_model( 797 model.inject_into, config, parameters=model.parameters or {} 798 ) 799 if model.inject_into 800 else RequestOption( 801 inject_into=RequestOptionType.header, 802 field_name=model.header or "", 803 parameters=model.parameters or {}, 804 ) 805 ) 806 807 return ApiKeyAuthenticator( 808 token_provider=( 809 token_provider 810 if token_provider is not None 811 else InterpolatedStringTokenProvider( 812 api_token=model.api_token or "", 813 config=config, 814 parameters=model.parameters or {}, 815 ) 816 ), 817 request_option=request_option, 818 config=config, 819 parameters=model.parameters or {}, 820 )
def
create_legacy_to_per_partition_state_migration( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.LegacyToPerPartitionStateMigration, config: Mapping[str, Any], declarative_stream: airbyte_cdk.sources.declarative.models.declarative_component_schema.DeclarativeStream) -> airbyte_cdk.LegacyToPerPartitionStateMigration:
822 def create_legacy_to_per_partition_state_migration( 823 self, 824 model: LegacyToPerPartitionStateMigrationModel, 825 config: Mapping[str, Any], 826 declarative_stream: DeclarativeStreamModel, 827 ) -> LegacyToPerPartitionStateMigration: 828 retriever = declarative_stream.retriever 829 if not isinstance(retriever, SimpleRetrieverModel): 830 raise ValueError( 831 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}" 832 ) 833 partition_router = retriever.partition_router 834 if not isinstance( 835 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 836 ): 837 raise ValueError( 838 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 839 ) 840 if not hasattr(partition_router, "parent_stream_configs"): 841 raise ValueError( 842 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 843 ) 844 845 if not hasattr(declarative_stream, "incremental_sync"): 846 raise ValueError( 847 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 848 ) 849 850 return LegacyToPerPartitionStateMigration( 851 partition_router, # type: ignore # was already checked above 852 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 853 config, 854 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 855 )
def
create_session_token_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SessionTokenAuthenticator, config: Mapping[str, Any], name: str, **kwargs: Any) -> Union[airbyte_cdk.ApiKeyAuthenticator, airbyte_cdk.BearerAuthenticator]:
857 def create_session_token_authenticator( 858 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 859 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 860 decoder = ( 861 self._create_component_from_model(model=model.decoder, config=config) 862 if model.decoder 863 else JsonDecoder(parameters={}) 864 ) 865 login_requester = self._create_component_from_model( 866 model=model.login_requester, 867 config=config, 868 name=f"{name}_login_requester", 869 decoder=decoder, 870 ) 871 token_provider = SessionTokenProvider( 872 login_requester=login_requester, 873 session_token_path=model.session_token_path, 874 expiration_duration=parse_duration(model.expiration_duration) 875 if model.expiration_duration 876 else None, 877 parameters=model.parameters or {}, 878 message_repository=self._message_repository, 879 decoder=decoder, 880 ) 881 if model.request_authentication.type == "Bearer": 882 return ModelToComponentFactory.create_bearer_authenticator( 883 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 884 config, 885 token_provider=token_provider, 886 ) 887 else: 888 return self.create_api_key_authenticator( 889 ApiKeyAuthenticatorModel( 890 type="ApiKeyAuthenticator", 891 api_token="", 892 inject_into=model.request_authentication.inject_into, 893 ), # type: ignore # $parameters and headers default to None 894 config=config, 895 token_provider=token_provider, 896 )
@staticmethod
def
create_basic_http_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.BasicHttpAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.BasicHttpAuthenticator:
898 @staticmethod 899 def create_basic_http_authenticator( 900 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 901 ) -> BasicHttpAuthenticator: 902 return BasicHttpAuthenticator( 903 password=model.password or "", 904 username=model.username, 905 config=config, 906 parameters=model.parameters or {}, 907 )
@staticmethod
def
create_bearer_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.BearerAuthenticator, config: Mapping[str, Any], token_provider: Optional[airbyte_cdk.sources.declarative.auth.token_provider.TokenProvider] = None, **kwargs: Any) -> airbyte_cdk.BearerAuthenticator:
909 @staticmethod 910 def create_bearer_authenticator( 911 model: BearerAuthenticatorModel, 912 config: Config, 913 token_provider: Optional[TokenProvider] = None, 914 **kwargs: Any, 915 ) -> BearerAuthenticator: 916 if token_provider is not None and model.api_token != "": 917 raise ValueError( 918 "If token_provider is set, api_token is ignored and has to be set to empty string." 919 ) 920 return BearerAuthenticator( 921 token_provider=( 922 token_provider 923 if token_provider is not None 924 else InterpolatedStringTokenProvider( 925 api_token=model.api_token or "", 926 config=config, 927 parameters=model.parameters or {}, 928 ) 929 ), 930 config=config, 931 parameters=model.parameters or {}, 932 )
@staticmethod
def
create_check_stream( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CheckStream, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.checks.CheckStream:
@staticmethod
def
create_check_dynamic_stream( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CheckDynamicStream, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.checks.CheckDynamicStream:
938 @staticmethod 939 def create_check_dynamic_stream( 940 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 941 ) -> CheckDynamicStream: 942 assert model.use_check_availability is not None # for mypy 943 944 use_check_availability = model.use_check_availability 945 946 return CheckDynamicStream( 947 stream_count=model.stream_count, 948 use_check_availability=use_check_availability, 949 parameters={}, 950 )
def
create_composite_error_handler( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CompositeErrorHandler, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.CompositeErrorHandler:
952 def create_composite_error_handler( 953 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 954 ) -> CompositeErrorHandler: 955 error_handlers = [ 956 self._create_component_from_model(model=error_handler_model, config=config) 957 for error_handler_model in model.error_handlers 958 ] 959 return CompositeErrorHandler( 960 error_handlers=error_handlers, parameters=model.parameters or {} 961 )
@staticmethod
def
create_concurrency_level( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ConcurrencyLevel, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.concurrency_level.ConcurrencyLevel:
963 @staticmethod 964 def create_concurrency_level( 965 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 966 ) -> ConcurrencyLevel: 967 return ConcurrencyLevel( 968 default_concurrency=model.default_concurrency, 969 max_concurrency=model.max_concurrency, 970 config=config, 971 parameters={}, 972 )
@staticmethod
def
apply_stream_state_migrations( stream_state_migrations: Optional[List[Any]], stream_state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
974 @staticmethod 975 def apply_stream_state_migrations( 976 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 977 ) -> MutableMapping[str, Any]: 978 if stream_state_migrations: 979 for state_migration in stream_state_migrations: 980 if state_migration.should_migrate(stream_state): 981 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 982 stream_state = dict(state_migration.migrate(stream_state)) 983 return stream_state
def
create_concurrent_cursor_from_datetime_based_cursor( self, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], stream_name: str, stream_namespace: Optional[str], config: Mapping[str, Any], message_repository: Optional[airbyte_cdk.MessageRepository] = None, runtime_lookback_window: Optional[datetime.timedelta] = None, stream_state_migrations: Optional[List[Any]] = None, **kwargs: Any) -> airbyte_cdk.ConcurrentCursor:
985 def create_concurrent_cursor_from_datetime_based_cursor( 986 self, 987 model_type: Type[BaseModel], 988 component_definition: ComponentDefinition, 989 stream_name: str, 990 stream_namespace: Optional[str], 991 config: Config, 992 message_repository: Optional[MessageRepository] = None, 993 runtime_lookback_window: Optional[datetime.timedelta] = None, 994 stream_state_migrations: Optional[List[Any]] = None, 995 **kwargs: Any, 996 ) -> ConcurrentCursor: 997 # Per-partition incremental streams can dynamically create child cursors which will pass their current 998 # state via the stream_state keyword argument. Incremental syncs without parent streams use the 999 # incoming state and connector_state_manager that is initialized when the component factory is created 1000 stream_state = ( 1001 self._connector_state_manager.get_stream_state(stream_name, stream_namespace) 1002 if "stream_state" not in kwargs 1003 else kwargs["stream_state"] 1004 ) 1005 stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state) 1006 1007 component_type = component_definition.get("type") 1008 if component_definition.get("type") != model_type.__name__: 1009 raise ValueError( 1010 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1011 ) 1012 1013 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1014 1015 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1016 raise ValueError( 1017 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1018 ) 1019 1020 interpolated_cursor_field = InterpolatedString.create( 1021 datetime_based_cursor_model.cursor_field, 1022 parameters=datetime_based_cursor_model.parameters or {}, 1023 ) 1024 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1025 1026 interpolated_partition_field_start = InterpolatedString.create( 1027 datetime_based_cursor_model.partition_field_start or "start_time", 1028 parameters=datetime_based_cursor_model.parameters or {}, 1029 ) 1030 interpolated_partition_field_end = InterpolatedString.create( 1031 datetime_based_cursor_model.partition_field_end or "end_time", 1032 parameters=datetime_based_cursor_model.parameters or {}, 1033 ) 1034 1035 slice_boundary_fields = ( 1036 interpolated_partition_field_start.eval(config=config), 1037 interpolated_partition_field_end.eval(config=config), 1038 ) 1039 1040 datetime_format = datetime_based_cursor_model.datetime_format 1041 1042 cursor_granularity = ( 1043 parse_duration(datetime_based_cursor_model.cursor_granularity) 1044 if datetime_based_cursor_model.cursor_granularity 1045 else None 1046 ) 1047 1048 lookback_window = None 1049 interpolated_lookback_window = ( 1050 InterpolatedString.create( 1051 datetime_based_cursor_model.lookback_window, 1052 parameters=datetime_based_cursor_model.parameters or {}, 1053 ) 1054 if datetime_based_cursor_model.lookback_window 1055 else None 1056 ) 1057 if interpolated_lookback_window: 1058 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1059 if evaluated_lookback_window: 1060 lookback_window = parse_duration(evaluated_lookback_window) 1061 1062 connector_state_converter: DateTimeStreamStateConverter 1063 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1064 datetime_format=datetime_format, 1065 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1066 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1067 cursor_granularity=cursor_granularity, 1068 ) 1069 1070 # Adjusts the stream state by applying the runtime lookback window. 1071 # This is used to ensure correct state handling in case of failed partitions. 1072 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1073 if runtime_lookback_window and stream_state_value: 1074 new_stream_state = ( 1075 connector_state_converter.parse_timestamp(stream_state_value) 1076 - runtime_lookback_window 1077 ) 1078 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1079 new_stream_state 1080 ) 1081 1082 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1083 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1084 start_date_runtime_value = self.create_min_max_datetime( 1085 model=datetime_based_cursor_model.start_datetime, config=config 1086 ) 1087 else: 1088 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1089 1090 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1091 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1092 end_date_runtime_value = self.create_min_max_datetime( 1093 model=datetime_based_cursor_model.end_datetime, config=config 1094 ) 1095 else: 1096 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1097 1098 interpolated_start_date = MinMaxDatetime.create( 1099 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1100 parameters=datetime_based_cursor_model.parameters, 1101 ) 1102 interpolated_end_date = ( 1103 None 1104 if not end_date_runtime_value 1105 else MinMaxDatetime.create( 1106 end_date_runtime_value, datetime_based_cursor_model.parameters 1107 ) 1108 ) 1109 1110 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1111 if not interpolated_start_date.datetime_format: 1112 interpolated_start_date.datetime_format = datetime_format 1113 if interpolated_end_date and not interpolated_end_date.datetime_format: 1114 interpolated_end_date.datetime_format = datetime_format 1115 1116 start_date = interpolated_start_date.get_datetime(config=config) 1117 end_date_provider = ( 1118 partial(interpolated_end_date.get_datetime, config) 1119 if interpolated_end_date 1120 else connector_state_converter.get_end_provider() 1121 ) 1122 1123 if ( 1124 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1125 ) or ( 1126 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1127 ): 1128 raise ValueError( 1129 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1130 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1131 ) 1132 1133 # When step is not defined, default to a step size from the starting date to the present moment 1134 step_length = datetime.timedelta.max 1135 interpolated_step = ( 1136 InterpolatedString.create( 1137 datetime_based_cursor_model.step, 1138 parameters=datetime_based_cursor_model.parameters or {}, 1139 ) 1140 if datetime_based_cursor_model.step 1141 else None 1142 ) 1143 if interpolated_step: 1144 evaluated_step = interpolated_step.eval(config) 1145 if evaluated_step: 1146 step_length = parse_duration(evaluated_step) 1147 1148 clamping_strategy: ClampingStrategy = NoClamping() 1149 if datetime_based_cursor_model.clamping: 1150 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1151 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1152 # object which we want to keep agnostic of being low-code 1153 target = InterpolatedString( 1154 string=datetime_based_cursor_model.clamping.target, 1155 parameters=datetime_based_cursor_model.parameters or {}, 1156 ) 1157 evaluated_target = target.eval(config=config) 1158 match evaluated_target: 1159 case "DAY": 1160 clamping_strategy = DayClampingStrategy() 1161 end_date_provider = ClampingEndProvider( 1162 DayClampingStrategy(is_ceiling=False), 1163 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1164 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1165 ) 1166 case "WEEK": 1167 if ( 1168 not datetime_based_cursor_model.clamping.target_details 1169 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1170 ): 1171 raise ValueError( 1172 "Given WEEK clamping, weekday needs to be provided as target_details" 1173 ) 1174 weekday = self._assemble_weekday( 1175 datetime_based_cursor_model.clamping.target_details["weekday"] 1176 ) 1177 clamping_strategy = WeekClampingStrategy(weekday) 1178 end_date_provider = ClampingEndProvider( 1179 WeekClampingStrategy(weekday, is_ceiling=False), 1180 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1181 granularity=cursor_granularity or datetime.timedelta(days=1), 1182 ) 1183 case "MONTH": 1184 clamping_strategy = MonthClampingStrategy() 1185 end_date_provider = ClampingEndProvider( 1186 MonthClampingStrategy(is_ceiling=False), 1187 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1188 granularity=cursor_granularity or datetime.timedelta(days=1), 1189 ) 1190 case _: 1191 raise ValueError( 1192 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1193 ) 1194 1195 return ConcurrentCursor( 1196 stream_name=stream_name, 1197 stream_namespace=stream_namespace, 1198 stream_state=stream_state, 1199 message_repository=message_repository or self._message_repository, 1200 connector_state_manager=self._connector_state_manager, 1201 connector_state_converter=connector_state_converter, 1202 cursor_field=cursor_field, 1203 slice_boundary_fields=slice_boundary_fields, 1204 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1205 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1206 lookback_window=lookback_window, 1207 slice_range=step_length, 1208 cursor_granularity=cursor_granularity, 1209 clamping_strategy=clamping_strategy, 1210 )
def
create_concurrent_cursor_from_incrementing_count_cursor( self, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], stream_name: str, stream_namespace: Optional[str], config: Mapping[str, Any], message_repository: Optional[airbyte_cdk.MessageRepository] = None, **kwargs: Any) -> airbyte_cdk.ConcurrentCursor:
1212 def create_concurrent_cursor_from_incrementing_count_cursor( 1213 self, 1214 model_type: Type[BaseModel], 1215 component_definition: ComponentDefinition, 1216 stream_name: str, 1217 stream_namespace: Optional[str], 1218 config: Config, 1219 message_repository: Optional[MessageRepository] = None, 1220 **kwargs: Any, 1221 ) -> ConcurrentCursor: 1222 # Per-partition incremental streams can dynamically create child cursors which will pass their current 1223 # state via the stream_state keyword argument. Incremental syncs without parent streams use the 1224 # incoming state and connector_state_manager that is initialized when the component factory is created 1225 stream_state = ( 1226 self._connector_state_manager.get_stream_state(stream_name, stream_namespace) 1227 if "stream_state" not in kwargs 1228 else kwargs["stream_state"] 1229 ) 1230 1231 component_type = component_definition.get("type") 1232 if component_definition.get("type") != model_type.__name__: 1233 raise ValueError( 1234 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1235 ) 1236 1237 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1238 1239 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1240 raise ValueError( 1241 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1242 ) 1243 1244 interpolated_start_value = ( 1245 InterpolatedString.create( 1246 incrementing_count_cursor_model.start_value, # type: ignore 1247 parameters=incrementing_count_cursor_model.parameters or {}, 1248 ) 1249 if incrementing_count_cursor_model.start_value 1250 else 0 1251 ) 1252 1253 interpolated_cursor_field = InterpolatedString.create( 1254 incrementing_count_cursor_model.cursor_field, 1255 parameters=incrementing_count_cursor_model.parameters or {}, 1256 ) 1257 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1258 1259 connector_state_converter = IncrementingCountStreamStateConverter( 1260 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1261 ) 1262 1263 return ConcurrentCursor( 1264 stream_name=stream_name, 1265 stream_namespace=stream_namespace, 1266 stream_state=stream_state, 1267 message_repository=message_repository or self._message_repository, 1268 connector_state_manager=self._connector_state_manager, 1269 connector_state_converter=connector_state_converter, 1270 cursor_field=cursor_field, 1271 slice_boundary_fields=None, 1272 start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1273 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1274 )
def
create_concurrent_cursor_from_perpartition_cursor( self, state_manager: airbyte_cdk.ConnectorStateManager, model_type: Type[pydantic.v1.main.BaseModel], component_definition: Mapping[str, Any], stream_name: str, stream_namespace: Optional[str], config: Mapping[str, Any], stream_state: MutableMapping[str, Any], partition_router: airbyte_cdk.sources.declarative.partition_routers.PartitionRouter, stream_state_migrations: Optional[List[Any]] = None, **kwargs: Any) -> airbyte_cdk.sources.declarative.incremental.ConcurrentPerPartitionCursor:
1295 def create_concurrent_cursor_from_perpartition_cursor( 1296 self, 1297 state_manager: ConnectorStateManager, 1298 model_type: Type[BaseModel], 1299 component_definition: ComponentDefinition, 1300 stream_name: str, 1301 stream_namespace: Optional[str], 1302 config: Config, 1303 stream_state: MutableMapping[str, Any], 1304 partition_router: PartitionRouter, 1305 stream_state_migrations: Optional[List[Any]] = None, 1306 **kwargs: Any, 1307 ) -> ConcurrentPerPartitionCursor: 1308 component_type = component_definition.get("type") 1309 if component_definition.get("type") != model_type.__name__: 1310 raise ValueError( 1311 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1312 ) 1313 1314 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1315 1316 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1317 raise ValueError( 1318 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1319 ) 1320 1321 interpolated_cursor_field = InterpolatedString.create( 1322 datetime_based_cursor_model.cursor_field, 1323 parameters=datetime_based_cursor_model.parameters or {}, 1324 ) 1325 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1326 1327 datetime_format = datetime_based_cursor_model.datetime_format 1328 1329 cursor_granularity = ( 1330 parse_duration(datetime_based_cursor_model.cursor_granularity) 1331 if datetime_based_cursor_model.cursor_granularity 1332 else None 1333 ) 1334 1335 connector_state_converter: DateTimeStreamStateConverter 1336 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1337 datetime_format=datetime_format, 1338 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1339 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1340 cursor_granularity=cursor_granularity, 1341 ) 1342 1343 # Create the cursor factory 1344 cursor_factory = ConcurrentCursorFactory( 1345 partial( 1346 self.create_concurrent_cursor_from_datetime_based_cursor, 1347 state_manager=state_manager, 1348 model_type=model_type, 1349 component_definition=component_definition, 1350 stream_name=stream_name, 1351 stream_namespace=stream_namespace, 1352 config=config, 1353 message_repository=NoopMessageRepository(), 1354 stream_state_migrations=stream_state_migrations, 1355 ) 1356 ) 1357 stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state) 1358 1359 # Return the concurrent cursor and state converter 1360 return ConcurrentPerPartitionCursor( 1361 cursor_factory=cursor_factory, 1362 partition_router=partition_router, 1363 stream_name=stream_name, 1364 stream_namespace=stream_namespace, 1365 stream_state=stream_state, 1366 message_repository=self._message_repository, # type: ignore 1367 connector_state_manager=state_manager, 1368 connector_state_converter=connector_state_converter, 1369 cursor_field=cursor_field, 1370 )
@staticmethod
def
create_constant_backoff_strategy( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ConstantBackoffStrategy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.ConstantBackoffStrategy:
1372 @staticmethod 1373 def create_constant_backoff_strategy( 1374 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1375 ) -> ConstantBackoffStrategy: 1376 return ConstantBackoffStrategy( 1377 backoff_time_in_seconds=model.backoff_time_in_seconds, 1378 config=config, 1379 parameters=model.parameters or {}, 1380 )
def
create_cursor_pagination( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CursorPagination, config: Mapping[str, Any], decoder: airbyte_cdk.Decoder, **kwargs: Any) -> airbyte_cdk.CursorPaginationStrategy:
1382 def create_cursor_pagination( 1383 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1384 ) -> CursorPaginationStrategy: 1385 if isinstance(decoder, PaginationDecoderDecorator): 1386 inner_decoder = decoder.decoder 1387 else: 1388 inner_decoder = decoder 1389 decoder = PaginationDecoderDecorator(decoder=decoder) 1390 1391 if self._is_supported_decoder_for_pagination(inner_decoder): 1392 decoder_to_use = decoder 1393 else: 1394 raise ValueError( 1395 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1396 ) 1397 1398 return CursorPaginationStrategy( 1399 cursor_value=model.cursor_value, 1400 decoder=decoder_to_use, 1401 page_size=model.page_size, 1402 stop_condition=model.stop_condition, 1403 config=config, 1404 parameters=model.parameters or {}, 1405 )
def
create_custom_component(self, model: Any, config: Mapping[str, Any], **kwargs: Any) -> Any:
1407 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1408 """ 1409 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1410 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1411 :param model: The Pydantic model of the custom component being created 1412 :param config: The custom defined connector config 1413 :return: The declarative component built from the Pydantic model to be used at runtime 1414 """ 1415 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1416 component_fields = get_type_hints(custom_component_class) 1417 model_args = model.dict() 1418 model_args["config"] = config 1419 1420 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1421 # we defer to these arguments over the component's definition 1422 for key, arg in kwargs.items(): 1423 model_args[key] = arg 1424 1425 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1426 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1427 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1428 for model_field, model_value in model_args.items(): 1429 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1430 if ( 1431 isinstance(model_value, dict) 1432 and "type" not in model_value 1433 and model_field in component_fields 1434 ): 1435 derived_type = self._derive_component_type_from_type_hints( 1436 component_fields.get(model_field) 1437 ) 1438 if derived_type: 1439 model_value["type"] = derived_type 1440 1441 if self._is_component(model_value): 1442 model_args[model_field] = self._create_nested_component( 1443 model, model_field, model_value, config 1444 ) 1445 elif isinstance(model_value, list): 1446 vals = [] 1447 for v in model_value: 1448 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1449 derived_type = self._derive_component_type_from_type_hints( 1450 component_fields.get(model_field) 1451 ) 1452 if derived_type: 1453 v["type"] = derived_type 1454 if self._is_component(v): 1455 vals.append(self._create_nested_component(model, model_field, v, config)) 1456 else: 1457 vals.append(v) 1458 model_args[model_field] = vals 1459 1460 kwargs = { 1461 class_field: model_args[class_field] 1462 for class_field in component_fields.keys() 1463 if class_field in model_args 1464 } 1465 return custom_component_class(**kwargs)
Generically creates a custom component based on the model type and a class_name reference to the custom Python class being instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
Parameters
- model: The Pydantic model of the custom component being created
- config: The custom defined connector config
Returns
The declarative component built from the Pydantic model to be used at runtime
def
create_datetime_based_cursor( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DatetimeBasedCursor, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DatetimeBasedCursor:
1597 def create_datetime_based_cursor( 1598 self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any 1599 ) -> DatetimeBasedCursor: 1600 start_datetime: Union[str, MinMaxDatetime] = ( 1601 model.start_datetime 1602 if isinstance(model.start_datetime, str) 1603 else self.create_min_max_datetime(model.start_datetime, config) 1604 ) 1605 end_datetime: Union[str, MinMaxDatetime, None] = None 1606 if model.is_data_feed and model.end_datetime: 1607 raise ValueError("Data feed does not support end_datetime") 1608 if model.is_data_feed and model.is_client_side_incremental: 1609 raise ValueError( 1610 "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them." 1611 ) 1612 if model.end_datetime: 1613 end_datetime = ( 1614 model.end_datetime 1615 if isinstance(model.end_datetime, str) 1616 else self.create_min_max_datetime(model.end_datetime, config) 1617 ) 1618 1619 end_time_option = ( 1620 self._create_component_from_model( 1621 model.end_time_option, config, parameters=model.parameters or {} 1622 ) 1623 if model.end_time_option 1624 else None 1625 ) 1626 start_time_option = ( 1627 self._create_component_from_model( 1628 model.start_time_option, config, parameters=model.parameters or {} 1629 ) 1630 if model.start_time_option 1631 else None 1632 ) 1633 1634 return DatetimeBasedCursor( 1635 cursor_field=model.cursor_field, 1636 cursor_datetime_formats=model.cursor_datetime_formats 1637 if model.cursor_datetime_formats 1638 else [], 1639 cursor_granularity=model.cursor_granularity, 1640 datetime_format=model.datetime_format, 1641 end_datetime=end_datetime, 1642 start_datetime=start_datetime, 1643 step=model.step, 1644 end_time_option=end_time_option, 1645 lookback_window=model.lookback_window, 1646 start_time_option=start_time_option, 1647 partition_field_end=model.partition_field_end, 1648 partition_field_start=model.partition_field_start, 1649 message_repository=self._message_repository, 1650 is_compare_strictly=model.is_compare_strictly, 1651 config=config, 1652 parameters=model.parameters or {}, 1653 )
def
create_declarative_stream( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DeclarativeStream, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DeclarativeStream:
1655 def create_declarative_stream( 1656 self, model: DeclarativeStreamModel, config: Config, **kwargs: Any 1657 ) -> DeclarativeStream: 1658 # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field 1659 # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the 1660 # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in 1661 # the factory only support passing arguments to the component constructors, whereas this performs a merge of all slicers into one. 1662 combined_slicers = self._merge_stream_slicers(model=model, config=config) 1663 1664 primary_key = model.primary_key.__root__ if model.primary_key else None 1665 stop_condition_on_cursor = ( 1666 model.incremental_sync 1667 and hasattr(model.incremental_sync, "is_data_feed") 1668 and model.incremental_sync.is_data_feed 1669 ) 1670 client_side_incremental_sync = None 1671 if ( 1672 model.incremental_sync 1673 and hasattr(model.incremental_sync, "is_client_side_incremental") 1674 and model.incremental_sync.is_client_side_incremental 1675 ): 1676 supported_slicers = ( 1677 DatetimeBasedCursor, 1678 GlobalSubstreamCursor, 1679 PerPartitionWithGlobalCursor, 1680 ) 1681 if combined_slicers and not isinstance(combined_slicers, supported_slicers): 1682 raise ValueError( 1683 "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead" 1684 ) 1685 cursor = ( 1686 combined_slicers 1687 if isinstance( 1688 combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor) 1689 ) 1690 else self._create_component_from_model(model=model.incremental_sync, config=config) 1691 ) 1692 1693 client_side_incremental_sync = {"cursor": cursor} 1694 1695 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1696 cursor_model = model.incremental_sync 1697 1698 end_time_option = ( 1699 self._create_component_from_model( 1700 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 1701 ) 1702 if cursor_model.end_time_option 1703 else None 1704 ) 1705 start_time_option = ( 1706 self._create_component_from_model( 1707 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 1708 ) 1709 if cursor_model.start_time_option 1710 else None 1711 ) 1712 1713 request_options_provider = DatetimeBasedRequestOptionsProvider( 1714 start_time_option=start_time_option, 1715 end_time_option=end_time_option, 1716 partition_field_start=cursor_model.partition_field_end, 1717 partition_field_end=cursor_model.partition_field_end, 1718 config=config, 1719 parameters=model.parameters or {}, 1720 ) 1721 elif model.incremental_sync and isinstance( 1722 model.incremental_sync, IncrementingCountCursorModel 1723 ): 1724 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 1725 1726 start_time_option = ( 1727 self._create_component_from_model( 1728 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 1729 config, 1730 parameters=cursor_model.parameters or {}, 1731 ) 1732 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 1733 else None 1734 ) 1735 1736 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 1737 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 1738 partition_field_start = "start" 1739 1740 request_options_provider = DatetimeBasedRequestOptionsProvider( 1741 start_time_option=start_time_option, 1742 partition_field_start=partition_field_start, 1743 config=config, 1744 parameters=model.parameters or {}, 1745 ) 1746 else: 1747 request_options_provider = None 1748 1749 transformations = [] 1750 if model.transformations: 1751 for transformation_model in model.transformations: 1752 transformations.append( 1753 self._create_component_from_model(model=transformation_model, config=config) 1754 ) 1755 1756 retriever = self._create_component_from_model( 1757 model=model.retriever, 1758 config=config, 1759 name=model.name, 1760 primary_key=primary_key, 1761 stream_slicer=combined_slicers, 1762 request_options_provider=request_options_provider, 1763 stop_condition_on_cursor=stop_condition_on_cursor, 1764 client_side_incremental_sync=client_side_incremental_sync, 1765 transformations=transformations, 1766 incremental_sync=model.incremental_sync, 1767 ) 1768 cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None 1769 1770 if model.state_migrations: 1771 state_transformations = [ 1772 self._create_component_from_model(state_migration, config, declarative_stream=model) 1773 for state_migration in model.state_migrations 1774 ] 1775 else: 1776 state_transformations = [] 1777 1778 if model.schema_loader: 1779 schema_loader = self._create_component_from_model( 1780 model=model.schema_loader, config=config 1781 ) 1782 else: 1783 options = model.parameters or {} 1784 if "name" not in options: 1785 options["name"] = model.name 1786 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 1787 1788 return DeclarativeStream( 1789 name=model.name or "", 1790 primary_key=primary_key, 1791 retriever=retriever, 1792 schema_loader=schema_loader, 1793 stream_cursor_field=cursor_field or "", 1794 state_migrations=state_transformations, 1795 config=config, 1796 parameters=model.parameters or {}, 1797 )
def
create_default_error_handler( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DefaultErrorHandler, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.DefaultErrorHandler:
1949 def create_default_error_handler( 1950 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 1951 ) -> DefaultErrorHandler: 1952 backoff_strategies = [] 1953 if model.backoff_strategies: 1954 for backoff_strategy_model in model.backoff_strategies: 1955 backoff_strategies.append( 1956 self._create_component_from_model(model=backoff_strategy_model, config=config) 1957 ) 1958 1959 response_filters = [] 1960 if model.response_filters: 1961 for response_filter_model in model.response_filters: 1962 response_filters.append( 1963 self._create_component_from_model(model=response_filter_model, config=config) 1964 ) 1965 response_filters.append( 1966 HttpResponseFilter(config=config, parameters=model.parameters or {}) 1967 ) 1968 1969 return DefaultErrorHandler( 1970 backoff_strategies=backoff_strategies, 1971 max_retries=model.max_retries, 1972 response_filters=response_filters, 1973 config=config, 1974 parameters=model.parameters or {}, 1975 )
def
create_default_paginator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DefaultPaginator, config: Mapping[str, Any], *, url_base: str, decoder: Optional[airbyte_cdk.Decoder] = None, cursor_used_for_stop_condition: Optional[airbyte_cdk.sources.declarative.incremental.DeclarativeCursor] = None) -> Union[airbyte_cdk.DefaultPaginator, airbyte_cdk.sources.declarative.requesters.paginators.PaginatorTestReadDecorator]:
1977 def create_default_paginator( 1978 self, 1979 model: DefaultPaginatorModel, 1980 config: Config, 1981 *, 1982 url_base: str, 1983 decoder: Optional[Decoder] = None, 1984 cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None, 1985 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 1986 if decoder: 1987 if self._is_supported_decoder_for_pagination(decoder): 1988 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 1989 else: 1990 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 1991 else: 1992 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 1993 page_size_option = ( 1994 self._create_component_from_model(model=model.page_size_option, config=config) 1995 if model.page_size_option 1996 else None 1997 ) 1998 page_token_option = ( 1999 self._create_component_from_model(model=model.page_token_option, config=config) 2000 if model.page_token_option 2001 else None 2002 ) 2003 pagination_strategy = self._create_component_from_model( 2004 model=model.pagination_strategy, config=config, decoder=decoder_to_use 2005 ) 2006 if cursor_used_for_stop_condition: 2007 pagination_strategy = StopConditionPaginationStrategyDecorator( 2008 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2009 ) 2010 paginator = DefaultPaginator( 2011 decoder=decoder_to_use, 2012 page_size_option=page_size_option, 2013 page_token_option=page_token_option, 2014 pagination_strategy=pagination_strategy, 2015 url_base=url_base, 2016 config=config, 2017 parameters=model.parameters or {}, 2018 ) 2019 if self._limit_pages_fetched_per_slice: 2020 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2021 return paginator
def
create_dpath_extractor( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DpathExtractor, config: Mapping[str, Any], decoder: Optional[airbyte_cdk.Decoder] = None, **kwargs: Any) -> airbyte_cdk.DpathExtractor:
2023 def create_dpath_extractor( 2024 self, 2025 model: DpathExtractorModel, 2026 config: Config, 2027 decoder: Optional[Decoder] = None, 2028 **kwargs: Any, 2029 ) -> DpathExtractor: 2030 if decoder: 2031 decoder_to_use = decoder 2032 else: 2033 decoder_to_use = JsonDecoder(parameters={}) 2034 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2035 return DpathExtractor( 2036 decoder=decoder_to_use, 2037 field_path=model_field_path, 2038 config=config, 2039 parameters=model.parameters or {}, 2040 )
def
create_response_to_file_extractor( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ResponseToFileExtractor, **kwargs: Any) -> airbyte_cdk.sources.declarative.extractors.ResponseToFileExtractor:
@staticmethod
def
create_exponential_backoff_strategy( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ExponentialBackoffStrategy, config: Mapping[str, Any]) -> airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.ExponentialBackoffStrategy:
def
create_http_requester( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpRequester, config: Mapping[str, Any], decoder: airbyte_cdk.Decoder = JsonDecoder(), *, name: str) -> airbyte_cdk.HttpRequester:
2057 def create_http_requester( 2058 self, 2059 model: HttpRequesterModel, 2060 config: Config, 2061 decoder: Decoder = JsonDecoder(parameters={}), 2062 *, 2063 name: str, 2064 ) -> HttpRequester: 2065 authenticator = ( 2066 self._create_component_from_model( 2067 model=model.authenticator, 2068 config=config, 2069 url_base=model.url_base, 2070 name=name, 2071 decoder=decoder, 2072 ) 2073 if model.authenticator 2074 else None 2075 ) 2076 error_handler = ( 2077 self._create_component_from_model(model=model.error_handler, config=config) 2078 if model.error_handler 2079 else DefaultErrorHandler( 2080 backoff_strategies=[], 2081 response_filters=[], 2082 config=config, 2083 parameters=model.parameters or {}, 2084 ) 2085 ) 2086 2087 api_budget = self._api_budget 2088 2089 request_options_provider = InterpolatedRequestOptionsProvider( 2090 request_body_data=model.request_body_data, 2091 request_body_json=model.request_body_json, 2092 request_headers=model.request_headers, 2093 request_parameters=model.request_parameters, 2094 config=config, 2095 parameters=model.parameters or {}, 2096 ) 2097 2098 assert model.use_cache is not None # for mypy 2099 assert model.http_method is not None # for mypy 2100 2101 use_cache = model.use_cache and not self._disable_cache 2102 2103 return HttpRequester( 2104 name=name, 2105 url_base=model.url_base, 2106 path=model.path, 2107 authenticator=authenticator, 2108 error_handler=error_handler, 2109 api_budget=api_budget, 2110 http_method=HttpMethod[model.http_method.value], 2111 request_options_provider=request_options_provider, 2112 config=config, 2113 disable_retries=self._disable_retries, 2114 parameters=model.parameters or {}, 2115 message_repository=self._message_repository, 2116 use_cache=use_cache, 2117 decoder=decoder, 2118 stream_response=decoder.is_stream_response() if decoder else False, 2119 )
@staticmethod
def
create_http_response_filter( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpResponseFilter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.HttpResponseFilter:
2121 @staticmethod 2122 def create_http_response_filter( 2123 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2124 ) -> HttpResponseFilter: 2125 if model.action: 2126 action = ResponseAction(model.action.value) 2127 else: 2128 action = None 2129 2130 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2131 2132 http_codes = ( 2133 set(model.http_codes) if model.http_codes else set() 2134 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2135 2136 return HttpResponseFilter( 2137 action=action, 2138 failure_type=failure_type, 2139 error_message=model.error_message or "", 2140 error_message_contains=model.error_message_contains or "", 2141 http_codes=http_codes, 2142 predicate=model.predicate or "", 2143 config=config, 2144 parameters=model.parameters or {}, 2145 )
@staticmethod
def
create_inline_schema_loader( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.InlineSchemaLoader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.InlineSchemaLoader:
def
create_complex_field_type( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ComplexFieldType, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.ComplexFieldType:
2153 def create_complex_field_type( 2154 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2155 ) -> ComplexFieldType: 2156 items = ( 2157 self._create_component_from_model(model=model.items, config=config) 2158 if isinstance(model.items, ComplexFieldTypeModel) 2159 else model.items 2160 ) 2161 2162 return ComplexFieldType(field_type=model.field_type, items=items)
def
create_types_map( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.TypesMap, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.TypesMap:
2164 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2165 target_type = ( 2166 self._create_component_from_model(model=model.target_type, config=config) 2167 if isinstance(model.target_type, ComplexFieldTypeModel) 2168 else model.target_type 2169 ) 2170 2171 return TypesMap( 2172 target_type=target_type, 2173 current_type=model.current_type, 2174 condition=model.condition if model.condition is not None else "True", 2175 )
def
create_schema_type_identifier( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SchemaTypeIdentifier, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.SchemaTypeIdentifier:
2177 def create_schema_type_identifier( 2178 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2179 ) -> SchemaTypeIdentifier: 2180 types_mapping = [] 2181 if model.types_mapping: 2182 types_mapping.extend( 2183 [ 2184 self._create_component_from_model(types_map, config=config) 2185 for types_map in model.types_mapping 2186 ] 2187 ) 2188 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2189 [x for x in model.schema_pointer] if model.schema_pointer else [] 2190 ) 2191 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2192 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2193 [x for x in model.type_pointer] if model.type_pointer else None 2194 ) 2195 2196 return SchemaTypeIdentifier( 2197 schema_pointer=model_schema_pointer, 2198 key_pointer=model_key_pointer, 2199 type_pointer=model_type_pointer, 2200 types_mapping=types_mapping, 2201 parameters=model.parameters or {}, 2202 )
def
create_dynamic_schema_loader( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.DynamicSchemaLoader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.schema.DynamicSchemaLoader:
2204 def create_dynamic_schema_loader( 2205 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2206 ) -> DynamicSchemaLoader: 2207 stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config) 2208 combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer) 2209 2210 schema_transformations = [] 2211 if model.schema_transformations: 2212 for transformation_model in model.schema_transformations: 2213 schema_transformations.append( 2214 self._create_component_from_model(model=transformation_model, config=config) 2215 ) 2216 2217 retriever = self._create_component_from_model( 2218 model=model.retriever, 2219 config=config, 2220 name="", 2221 primary_key=None, 2222 stream_slicer=combined_slicers, 2223 transformations=[], 2224 ) 2225 schema_type_identifier = self._create_component_from_model( 2226 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2227 ) 2228 return DynamicSchemaLoader( 2229 retriever=retriever, 2230 config=config, 2231 schema_transformations=schema_transformations, 2232 schema_type_identifier=schema_type_identifier, 2233 parameters=model.parameters or {}, 2234 )
@staticmethod
def
create_json_decoder( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JsonDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
def
create_csv_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.CsvDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
def
create_jsonl_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JsonlDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
def
create_gzip_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.GzipDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Decoder:
2254 def create_gzip_decoder( 2255 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2256 ) -> Decoder: 2257 _compressed_response_types = { 2258 "gzip", 2259 "x-gzip", 2260 "gzip, deflate", 2261 "x-gzip, deflate", 2262 "application/zip", 2263 "application/gzip", 2264 "application/x-gzip", 2265 "application/x-zip-compressed", 2266 } 2267 2268 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2269 2270 if self._emit_connector_builder_messages: 2271 # This is very surprising but if the response is not streamed, 2272 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2273 # which uses urllib3 directly and does not uncompress the data. 2274 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2275 2276 return CompositeRawDecoder.by_headers( 2277 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2278 stream_response=True, 2279 fallback_parser=gzip_parser.inner_parser, 2280 )
@staticmethod
def
create_incrementing_count_cursor( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.IncrementingCountCursor, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DatetimeBasedCursor:
2282 @staticmethod 2283 def create_incrementing_count_cursor( 2284 model: IncrementingCountCursorModel, config: Config, **kwargs: Any 2285 ) -> DatetimeBasedCursor: 2286 # This should not actually get used anywhere at runtime, but needed to add this to pass checks since 2287 # we still parse models into components. The issue is that there's no runtime implementation of a 2288 # IncrementingCountCursor. 2289 # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor. 2290 return DatetimeBasedCursor( 2291 cursor_field=model.cursor_field, 2292 datetime_format="%Y-%m-%d", 2293 start_datetime="2024-12-12", 2294 config=config, 2295 parameters={}, 2296 )
@staticmethod
def
create_iterable_decoder( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.IterableDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.decoders.IterableDecoder:
@staticmethod
def
create_xml_decoder( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.XmlDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.decoders.XmlDecoder:
def
create_zipfile_decoder( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ZipfileDecoder, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.decoders.ZipfileDecoder:
@staticmethod
def
create_json_file_schema_loader( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JsonFileSchemaLoader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.JsonFileSchemaLoader:
@staticmethod
def
create_jwt_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.JwtAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.auth.JwtAuthenticator:
2341 @staticmethod 2342 def create_jwt_authenticator( 2343 model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2344 ) -> JwtAuthenticator: 2345 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2346 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2347 return JwtAuthenticator( 2348 config=config, 2349 parameters=model.parameters or {}, 2350 algorithm=JwtAlgorithm(model.algorithm.value), 2351 secret_key=model.secret_key, 2352 base64_encode_secret_key=model.base64_encode_secret_key, 2353 token_duration=model.token_duration, 2354 header_prefix=model.header_prefix, 2355 kid=jwt_headers.kid, 2356 typ=jwt_headers.typ, 2357 cty=jwt_headers.cty, 2358 iss=jwt_payload.iss, 2359 sub=jwt_payload.sub, 2360 aud=jwt_payload.aud, 2361 additional_jwt_headers=model.additional_jwt_headers, 2362 additional_jwt_payload=model.additional_jwt_payload, 2363 )
def
create_list_partition_router( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ListPartitionRouter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.partition_routers.ListPartitionRouter:
2365 def create_list_partition_router( 2366 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2367 ) -> ListPartitionRouter: 2368 request_option = ( 2369 self._create_component_from_model(model.request_option, config) 2370 if model.request_option 2371 else None 2372 ) 2373 return ListPartitionRouter( 2374 cursor_field=model.cursor_field, 2375 request_option=request_option, 2376 values=model.values, 2377 config=config, 2378 parameters=model.parameters or {}, 2379 )
@staticmethod
def
create_min_max_datetime( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.MinMaxDatetime, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.MinMaxDatetime:
2381 @staticmethod 2382 def create_min_max_datetime( 2383 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2384 ) -> MinMaxDatetime: 2385 return MinMaxDatetime( 2386 datetime=model.datetime, 2387 datetime_format=model.datetime_format or "", 2388 max_datetime=model.max_datetime or "", 2389 min_datetime=model.min_datetime or "", 2390 parameters=model.parameters or {}, 2391 )
@staticmethod
def
create_no_auth( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.NoAuth, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.NoAuth:
@staticmethod
def
create_no_pagination( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.NoPagination, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.paginators.NoPagination:
def
create_oauth_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.OAuthAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DeclarativeOauth2Authenticator:
2403 def create_oauth_authenticator( 2404 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2405 ) -> DeclarativeOauth2Authenticator: 2406 profile_assertion = ( 2407 self._create_component_from_model(model.profile_assertion, config=config) 2408 if model.profile_assertion 2409 else None 2410 ) 2411 2412 if model.refresh_token_updater: 2413 # ignore type error because fixing it would have a lot of dependencies, revisit later 2414 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2415 config, 2416 InterpolatedString.create( 2417 model.token_refresh_endpoint, # type: ignore 2418 parameters=model.parameters or {}, 2419 ).eval(config), 2420 access_token_name=InterpolatedString.create( 2421 model.access_token_name or "access_token", parameters=model.parameters or {} 2422 ).eval(config), 2423 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2424 expires_in_name=InterpolatedString.create( 2425 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2426 ).eval(config), 2427 client_id_name=InterpolatedString.create( 2428 model.client_id_name or "client_id", parameters=model.parameters or {} 2429 ).eval(config), 2430 client_id=InterpolatedString.create( 2431 model.client_id, parameters=model.parameters or {} 2432 ).eval(config) 2433 if model.client_id 2434 else model.client_id, 2435 client_secret_name=InterpolatedString.create( 2436 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2437 ).eval(config), 2438 client_secret=InterpolatedString.create( 2439 model.client_secret, parameters=model.parameters or {} 2440 ).eval(config) 2441 if model.client_secret 2442 else model.client_secret, 2443 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2444 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2445 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2446 grant_type_name=InterpolatedString.create( 2447 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2448 ).eval(config), 2449 grant_type=InterpolatedString.create( 2450 model.grant_type or "refresh_token", parameters=model.parameters or {} 2451 ).eval(config), 2452 refresh_request_body=InterpolatedMapping( 2453 model.refresh_request_body or {}, parameters=model.parameters or {} 2454 ).eval(config), 2455 refresh_request_headers=InterpolatedMapping( 2456 model.refresh_request_headers or {}, parameters=model.parameters or {} 2457 ).eval(config), 2458 scopes=model.scopes, 2459 token_expiry_date_format=model.token_expiry_date_format, 2460 message_repository=self._message_repository, 2461 refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes, 2462 refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key, 2463 refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values, 2464 ) 2465 # ignore type error because fixing it would have a lot of dependencies, revisit later 2466 return DeclarativeOauth2Authenticator( # type: ignore 2467 access_token_name=model.access_token_name or "access_token", 2468 access_token_value=model.access_token_value, 2469 client_id_name=model.client_id_name or "client_id", 2470 client_id=model.client_id, 2471 client_secret_name=model.client_secret_name or "client_secret", 2472 client_secret=model.client_secret, 2473 expires_in_name=model.expires_in_name or "expires_in", 2474 grant_type_name=model.grant_type_name or "grant_type", 2475 grant_type=model.grant_type or "refresh_token", 2476 refresh_request_body=model.refresh_request_body, 2477 refresh_request_headers=model.refresh_request_headers, 2478 refresh_token_name=model.refresh_token_name or "refresh_token", 2479 refresh_token=model.refresh_token, 2480 scopes=model.scopes, 2481 token_expiry_date=model.token_expiry_date, 2482 token_expiry_date_format=model.token_expiry_date_format, 2483 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2484 token_refresh_endpoint=model.token_refresh_endpoint, 2485 config=config, 2486 parameters=model.parameters or {}, 2487 message_repository=self._message_repository, 2488 profile_assertion=profile_assertion, 2489 use_profile_assertion=model.use_profile_assertion, 2490 )
def
create_offset_increment( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.OffsetIncrement, config: Mapping[str, Any], decoder: airbyte_cdk.Decoder, **kwargs: Any) -> airbyte_cdk.OffsetIncrement:
2492 def create_offset_increment( 2493 self, model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any 2494 ) -> OffsetIncrement: 2495 if isinstance(decoder, PaginationDecoderDecorator): 2496 inner_decoder = decoder.decoder 2497 else: 2498 inner_decoder = decoder 2499 decoder = PaginationDecoderDecorator(decoder=decoder) 2500 2501 if self._is_supported_decoder_for_pagination(inner_decoder): 2502 decoder_to_use = decoder 2503 else: 2504 raise ValueError( 2505 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2506 ) 2507 2508 return OffsetIncrement( 2509 page_size=model.page_size, 2510 config=config, 2511 decoder=decoder_to_use, 2512 inject_on_first_request=model.inject_on_first_request or False, 2513 parameters=model.parameters or {}, 2514 )
@staticmethod
def
create_page_increment( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.PageIncrement, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.PageIncrement:
2516 @staticmethod 2517 def create_page_increment( 2518 model: PageIncrementModel, config: Config, **kwargs: Any 2519 ) -> PageIncrement: 2520 return PageIncrement( 2521 page_size=model.page_size, 2522 config=config, 2523 start_from_page=model.start_from_page or 0, 2524 inject_on_first_request=model.inject_on_first_request or False, 2525 parameters=model.parameters or {}, 2526 )
def
create_parent_stream_config( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ParentStreamConfig, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.ParentStreamConfig:
2528 def create_parent_stream_config( 2529 self, model: ParentStreamConfigModel, config: Config, **kwargs: Any 2530 ) -> ParentStreamConfig: 2531 declarative_stream = self._create_component_from_model( 2532 model.stream, config=config, **kwargs 2533 ) 2534 request_option = ( 2535 self._create_component_from_model(model.request_option, config=config) 2536 if model.request_option 2537 else None 2538 ) 2539 2540 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2541 raise ValueError( 2542 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2543 ) 2544 2545 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2546 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2547 ) 2548 2549 return ParentStreamConfig( 2550 parent_key=model.parent_key, 2551 request_option=request_option, 2552 stream=declarative_stream, 2553 partition_field=model.partition_field, 2554 config=config, 2555 incremental_dependency=model.incremental_dependency or False, 2556 parameters=model.parameters or {}, 2557 extra_fields=model.extra_fields, 2558 lazy_read_pointer=model_lazy_read_pointer, 2559 )
@staticmethod
def
create_record_filter( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RecordFilter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.RecordFilter:
@staticmethod
def
create_request_path( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RequestPath, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.request_path.RequestPath:
@staticmethod
def
create_request_option( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RequestOption, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.RequestOption:
2573 @staticmethod 2574 def create_request_option( 2575 model: RequestOptionModel, config: Config, **kwargs: Any 2576 ) -> RequestOption: 2577 inject_into = RequestOptionType(model.inject_into.value) 2578 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 2579 [ 2580 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 2581 for segment in model.field_path 2582 ] 2583 if model.field_path 2584 else None 2585 ) 2586 field_name = ( 2587 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 2588 if model.field_name 2589 else None 2590 ) 2591 return RequestOption( 2592 field_name=field_name, 2593 field_path=field_path, 2594 inject_into=inject_into, 2595 parameters=kwargs.get("parameters", {}), 2596 )
def
create_record_selector( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RecordSelector, config: Mapping[str, Any], *, name: str, transformations: Optional[List[airbyte_cdk.RecordTransformation]] = None, decoder: airbyte_cdk.Decoder | None = None, client_side_incremental_sync: Optional[Dict[str, Any]] = None, **kwargs: Any) -> airbyte_cdk.RecordSelector:
2598 def create_record_selector( 2599 self, 2600 model: RecordSelectorModel, 2601 config: Config, 2602 *, 2603 name: str, 2604 transformations: List[RecordTransformation] | None = None, 2605 decoder: Decoder | None = None, 2606 client_side_incremental_sync: Dict[str, Any] | None = None, 2607 **kwargs: Any, 2608 ) -> RecordSelector: 2609 extractor = self._create_component_from_model( 2610 model=model.extractor, decoder=decoder, config=config 2611 ) 2612 record_filter = ( 2613 self._create_component_from_model(model.record_filter, config=config) 2614 if model.record_filter 2615 else None 2616 ) 2617 2618 assert model.transform_before_filtering is not None # for mypy 2619 2620 transform_before_filtering = model.transform_before_filtering 2621 if client_side_incremental_sync: 2622 record_filter = ClientSideIncrementalRecordFilterDecorator( 2623 config=config, 2624 parameters=model.parameters, 2625 condition=model.record_filter.condition 2626 if (model.record_filter and hasattr(model.record_filter, "condition")) 2627 else None, 2628 **client_side_incremental_sync, 2629 ) 2630 transform_before_filtering = True 2631 2632 schema_normalization = ( 2633 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 2634 if isinstance(model.schema_normalization, SchemaNormalizationModel) 2635 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 2636 ) 2637 2638 return RecordSelector( 2639 extractor=extractor, 2640 name=name, 2641 config=config, 2642 record_filter=record_filter, 2643 transformations=transformations or [], 2644 schema_normalization=schema_normalization, 2645 parameters=model.parameters or {}, 2646 transform_before_filtering=transform_before_filtering, 2647 )
@staticmethod
def
create_remove_fields( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.RemoveFields, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.transformations.RemoveFields:
def
create_selective_authenticator( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SelectiveAuthenticator, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.DeclarativeAuthenticator:
2657 def create_selective_authenticator( 2658 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 2659 ) -> DeclarativeAuthenticator: 2660 authenticators = { 2661 name: self._create_component_from_model(model=auth, config=config) 2662 for name, auth in model.authenticators.items() 2663 } 2664 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 2665 return SelectiveAuthenticator( # type: ignore[abstract] 2666 config=config, 2667 authenticators=authenticators, 2668 authenticator_selection_path=model.authenticator_selection_path, 2669 **kwargs, 2670 )
@staticmethod
def
create_legacy_session_token_authenticator( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.LegacySessionTokenAuthenticator, config: Mapping[str, Any], *, url_base: str, **kwargs: Any) -> airbyte_cdk.sources.declarative.auth.token.LegacySessionTokenAuthenticator:
2672 @staticmethod 2673 def create_legacy_session_token_authenticator( 2674 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 2675 ) -> LegacySessionTokenAuthenticator: 2676 return LegacySessionTokenAuthenticator( 2677 api_url=url_base, 2678 header=model.header, 2679 login_url=model.login_url, 2680 password=model.password or "", 2681 session_token=model.session_token or "", 2682 session_token_response_key=model.session_token_response_key or "", 2683 username=model.username or "", 2684 validate_session_url=model.validate_session_url, 2685 config=config, 2686 parameters=model.parameters or {}, 2687 )
def
create_simple_retriever( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SimpleRetriever, config: Mapping[str, Any], *, name: str, primary_key: Union[str, List[str], List[List[str]], NoneType], stream_slicer: Optional[airbyte_cdk.sources.declarative.stream_slicers.StreamSlicer], request_options_provider: Optional[airbyte_cdk.sources.declarative.requesters.request_options.RequestOptionsProvider] = None, stop_condition_on_cursor: bool = False, client_side_incremental_sync: Optional[Dict[str, Any]] = None, transformations: List[airbyte_cdk.RecordTransformation], incremental_sync: Union[airbyte_cdk.sources.declarative.models.declarative_component_schema.IncrementingCountCursor, airbyte_cdk.sources.declarative.models.declarative_component_schema.DatetimeBasedCursor, airbyte_cdk.sources.declarative.models.declarative_component_schema.CustomIncrementalSync, NoneType] = None, **kwargs: Any) -> airbyte_cdk.SimpleRetriever:
2689 def create_simple_retriever( 2690 self, 2691 model: SimpleRetrieverModel, 2692 config: Config, 2693 *, 2694 name: str, 2695 primary_key: Optional[Union[str, List[str], List[List[str]]]], 2696 stream_slicer: Optional[StreamSlicer], 2697 request_options_provider: Optional[RequestOptionsProvider] = None, 2698 stop_condition_on_cursor: bool = False, 2699 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 2700 transformations: List[RecordTransformation], 2701 incremental_sync: Optional[ 2702 Union[ 2703 IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel 2704 ] 2705 ] = None, 2706 **kwargs: Any, 2707 ) -> SimpleRetriever: 2708 decoder = ( 2709 self._create_component_from_model(model=model.decoder, config=config) 2710 if model.decoder 2711 else JsonDecoder(parameters={}) 2712 ) 2713 requester = self._create_component_from_model( 2714 model=model.requester, decoder=decoder, config=config, name=name 2715 ) 2716 record_selector = self._create_component_from_model( 2717 model=model.record_selector, 2718 name=name, 2719 config=config, 2720 decoder=decoder, 2721 transformations=transformations, 2722 client_side_incremental_sync=client_side_incremental_sync, 2723 ) 2724 url_base = ( 2725 model.requester.url_base 2726 if hasattr(model.requester, "url_base") 2727 else requester.get_url_base() 2728 ) 2729 2730 # Define cursor only if per partition or common incremental support is needed 2731 cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None 2732 2733 if ( 2734 not isinstance(stream_slicer, DatetimeBasedCursor) 2735 or type(stream_slicer) is not DatetimeBasedCursor 2736 ): 2737 # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods). 2738 # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement 2739 # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's 2740 # request_options_provider 2741 request_options_provider = stream_slicer or DefaultRequestOptionsProvider(parameters={}) 2742 elif not request_options_provider: 2743 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 2744 2745 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 2746 2747 cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None 2748 paginator = ( 2749 self._create_component_from_model( 2750 model=model.paginator, 2751 config=config, 2752 url_base=url_base, 2753 decoder=decoder, 2754 cursor_used_for_stop_condition=cursor_used_for_stop_condition, 2755 ) 2756 if model.paginator 2757 else NoPagination(parameters={}) 2758 ) 2759 2760 ignore_stream_slicer_parameters_on_paginated_requests = ( 2761 model.ignore_stream_slicer_parameters_on_paginated_requests or False 2762 ) 2763 2764 if ( 2765 model.partition_router 2766 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 2767 and not bool(self._connector_state_manager.get_stream_state(name, None)) 2768 and any( 2769 parent_stream_config.lazy_read_pointer 2770 for parent_stream_config in model.partition_router.parent_stream_configs 2771 ) 2772 ): 2773 if incremental_sync: 2774 if incremental_sync.type != "DatetimeBasedCursor": 2775 raise ValueError( 2776 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 2777 ) 2778 2779 elif incremental_sync.step or incremental_sync.cursor_granularity: 2780 raise ValueError( 2781 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 2782 ) 2783 2784 if model.decoder and model.decoder.type != "JsonDecoder": 2785 raise ValueError( 2786 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 2787 ) 2788 2789 return LazySimpleRetriever( 2790 name=name, 2791 paginator=paginator, 2792 primary_key=primary_key, 2793 requester=requester, 2794 record_selector=record_selector, 2795 stream_slicer=stream_slicer, 2796 request_option_provider=request_options_provider, 2797 cursor=cursor, 2798 config=config, 2799 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 2800 parameters=model.parameters or {}, 2801 ) 2802 2803 if self._limit_slices_fetched or self._emit_connector_builder_messages: 2804 return SimpleRetrieverTestReadDecorator( 2805 name=name, 2806 paginator=paginator, 2807 primary_key=primary_key, 2808 requester=requester, 2809 record_selector=record_selector, 2810 stream_slicer=stream_slicer, 2811 request_option_provider=request_options_provider, 2812 cursor=cursor, 2813 config=config, 2814 maximum_number_of_slices=self._limit_slices_fetched or 5, 2815 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 2816 parameters=model.parameters or {}, 2817 ) 2818 return SimpleRetriever( 2819 name=name, 2820 paginator=paginator, 2821 primary_key=primary_key, 2822 requester=requester, 2823 record_selector=record_selector, 2824 stream_slicer=stream_slicer, 2825 request_option_provider=request_options_provider, 2826 cursor=cursor, 2827 config=config, 2828 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 2829 parameters=model.parameters or {}, 2830 )
def
create_state_delegating_stream( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.StateDelegatingStream, config: Mapping[str, Any], has_parent_state: Optional[bool] = None, **kwargs: Any) -> airbyte_cdk.DeclarativeStream:
2832 def create_state_delegating_stream( 2833 self, 2834 model: StateDelegatingStreamModel, 2835 config: Config, 2836 has_parent_state: Optional[bool] = None, 2837 **kwargs: Any, 2838 ) -> DeclarativeStream: 2839 if ( 2840 model.full_refresh_stream.name != model.name 2841 or model.name != model.incremental_stream.name 2842 ): 2843 raise ValueError( 2844 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 2845 ) 2846 2847 stream_model = ( 2848 model.incremental_stream 2849 if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state 2850 else model.full_refresh_stream 2851 ) 2852 2853 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description
def
create_async_retriever( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.AsyncRetriever, config: Mapping[str, Any], *, name: str, primary_key: Union[str, List[str], List[List[str]], NoneType], stream_slicer: Optional[airbyte_cdk.sources.declarative.stream_slicers.StreamSlicer], client_side_incremental_sync: Optional[Dict[str, Any]] = None, transformations: List[airbyte_cdk.RecordTransformation], **kwargs: Any) -> airbyte_cdk.sources.declarative.retrievers.AsyncRetriever:
2885 def create_async_retriever( 2886 self, 2887 model: AsyncRetrieverModel, 2888 config: Config, 2889 *, 2890 name: str, 2891 primary_key: Optional[ 2892 Union[str, List[str], List[List[str]]] 2893 ], # this seems to be needed to match create_simple_retriever 2894 stream_slicer: Optional[StreamSlicer], 2895 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 2896 transformations: List[RecordTransformation], 2897 **kwargs: Any, 2898 ) -> AsyncRetriever: 2899 def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever: 2900 record_selector = RecordSelector( 2901 extractor=download_extractor, 2902 name=name, 2903 record_filter=None, 2904 transformations=transformations, 2905 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 2906 config=config, 2907 parameters={}, 2908 ) 2909 paginator = ( 2910 self._create_component_from_model( 2911 model=model.download_paginator, 2912 decoder=decoder, 2913 config=config, 2914 url_base="", 2915 ) 2916 if model.download_paginator 2917 else NoPagination(parameters={}) 2918 ) 2919 maximum_number_of_slices = self._limit_slices_fetched or 5 2920 2921 if self._limit_slices_fetched or self._emit_connector_builder_messages: 2922 return SimpleRetrieverTestReadDecorator( 2923 requester=download_requester, 2924 record_selector=record_selector, 2925 primary_key=None, 2926 name=job_download_components_name, 2927 paginator=paginator, 2928 config=config, 2929 parameters={}, 2930 maximum_number_of_slices=maximum_number_of_slices, 2931 ) 2932 2933 return SimpleRetriever( 2934 requester=download_requester, 2935 record_selector=record_selector, 2936 primary_key=None, 2937 name=job_download_components_name, 2938 paginator=paginator, 2939 config=config, 2940 parameters={}, 2941 ) 2942 2943 def _get_job_timeout() -> datetime.timedelta: 2944 user_defined_timeout: Optional[int] = ( 2945 int( 2946 InterpolatedString.create( 2947 str(model.polling_job_timeout), 2948 parameters={}, 2949 ).eval(config) 2950 ) 2951 if model.polling_job_timeout 2952 else None 2953 ) 2954 2955 # check for user defined timeout during the test read or 15 minutes 2956 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 2957 # default value for non-connector builder is 60 minutes. 2958 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 2959 2960 return ( 2961 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 2962 ) 2963 2964 decoder = ( 2965 self._create_component_from_model(model=model.decoder, config=config) 2966 if model.decoder 2967 else JsonDecoder(parameters={}) 2968 ) 2969 record_selector = self._create_component_from_model( 2970 model=model.record_selector, 2971 config=config, 2972 decoder=decoder, 2973 name=name, 2974 transformations=transformations, 2975 client_side_incremental_sync=client_side_incremental_sync, 2976 ) 2977 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 2978 creation_requester = self._create_component_from_model( 2979 model=model.creation_requester, 2980 decoder=decoder, 2981 config=config, 2982 name=f"job creation - {name}", 2983 ) 2984 polling_requester = self._create_component_from_model( 2985 model=model.polling_requester, 2986 decoder=decoder, 2987 config=config, 2988 name=f"job polling - {name}", 2989 ) 2990 job_download_components_name = f"job download - {name}" 2991 download_decoder = ( 2992 self._create_component_from_model(model=model.download_decoder, config=config) 2993 if model.download_decoder 2994 else JsonDecoder(parameters={}) 2995 ) 2996 download_extractor = ( 2997 self._create_component_from_model( 2998 model=model.download_extractor, 2999 config=config, 3000 decoder=download_decoder, 3001 parameters=model.parameters, 3002 ) 3003 if model.download_extractor 3004 else DpathExtractor( 3005 [], 3006 config=config, 3007 decoder=download_decoder, 3008 parameters=model.parameters or {}, 3009 ) 3010 ) 3011 download_requester = self._create_component_from_model( 3012 model=model.download_requester, 3013 decoder=download_decoder, 3014 config=config, 3015 name=job_download_components_name, 3016 ) 3017 download_retriever = _get_download_retriever() 3018 abort_requester = ( 3019 self._create_component_from_model( 3020 model=model.abort_requester, 3021 decoder=decoder, 3022 config=config, 3023 name=f"job abort - {name}", 3024 ) 3025 if model.abort_requester 3026 else None 3027 ) 3028 delete_requester = ( 3029 self._create_component_from_model( 3030 model=model.delete_requester, 3031 decoder=decoder, 3032 config=config, 3033 name=f"job delete - {name}", 3034 ) 3035 if model.delete_requester 3036 else None 3037 ) 3038 download_target_requester = ( 3039 self._create_component_from_model( 3040 model=model.download_target_requester, 3041 decoder=decoder, 3042 config=config, 3043 name=f"job extract_url - {name}", 3044 ) 3045 if model.download_target_requester 3046 else None 3047 ) 3048 status_extractor = self._create_component_from_model( 3049 model=model.status_extractor, decoder=decoder, config=config, name=name 3050 ) 3051 download_target_extractor = self._create_component_from_model( 3052 model=model.download_target_extractor, 3053 decoder=decoder, 3054 config=config, 3055 name=name, 3056 ) 3057 3058 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3059 creation_requester=creation_requester, 3060 polling_requester=polling_requester, 3061 download_retriever=download_retriever, 3062 download_target_requester=download_target_requester, 3063 abort_requester=abort_requester, 3064 delete_requester=delete_requester, 3065 status_extractor=status_extractor, 3066 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3067 download_target_extractor=download_target_extractor, 3068 job_timeout=_get_job_timeout(), 3069 ) 3070 3071 async_job_partition_router = AsyncJobPartitionRouter( 3072 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3073 job_repository, 3074 stream_slices, 3075 self._job_tracker, 3076 self._message_repository, 3077 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3078 has_bulk_parent=False, 3079 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3080 # `None` == default retry is set to 3 attempts, under the hood. 3081 job_max_retry=1 if self._emit_connector_builder_messages else None, 3082 ), 3083 stream_slicer=stream_slicer, 3084 config=config, 3085 parameters=model.parameters or {}, 3086 ) 3087 3088 return AsyncRetriever( 3089 record_selector=record_selector, 3090 stream_slicer=async_job_partition_router, 3091 config=config, 3092 parameters=model.parameters or {}, 3093 )
@staticmethod
def
create_spec( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.Spec, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.spec.Spec:
def
create_substream_partition_router( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.SubstreamPartitionRouter, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.SubstreamPartitionRouter:
3104 def create_substream_partition_router( 3105 self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any 3106 ) -> SubstreamPartitionRouter: 3107 parent_stream_configs = [] 3108 if model.parent_stream_configs: 3109 parent_stream_configs.extend( 3110 [ 3111 self._create_message_repository_substream_wrapper( 3112 model=parent_stream_config, config=config, **kwargs 3113 ) 3114 for parent_stream_config in model.parent_stream_configs 3115 ] 3116 ) 3117 3118 return SubstreamPartitionRouter( 3119 parent_stream_configs=parent_stream_configs, 3120 parameters=model.parameters or {}, 3121 config=config, 3122 )
@staticmethod
def
create_wait_time_from_header( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.WaitTimeFromHeader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.WaitTimeFromHeaderBackoffStrategy:
3150 @staticmethod 3151 def create_wait_time_from_header( 3152 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 3153 ) -> WaitTimeFromHeaderBackoffStrategy: 3154 return WaitTimeFromHeaderBackoffStrategy( 3155 header=model.header, 3156 parameters=model.parameters or {}, 3157 config=config, 3158 regex=model.regex, 3159 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 3160 if model.max_waiting_time_in_seconds is not None 3161 else None, 3162 )
@staticmethod
def
create_wait_until_time_from_header( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.WaitUntilTimeFromHeader, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.WaitUntilTimeFromHeaderBackoffStrategy:
3164 @staticmethod 3165 def create_wait_until_time_from_header( 3166 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 3167 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 3168 return WaitUntilTimeFromHeaderBackoffStrategy( 3169 header=model.header, 3170 parameters=model.parameters or {}, 3171 config=config, 3172 min_wait=model.min_wait, 3173 regex=model.regex, 3174 )
@staticmethod
def
create_components_mapping_definition( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ComponentMappingDefinition, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.resolvers.ComponentMappingDefinition:
3182 @staticmethod 3183 def create_components_mapping_definition( 3184 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 3185 ) -> ComponentMappingDefinition: 3186 interpolated_value = InterpolatedString.create( 3187 model.value, parameters=model.parameters or {} 3188 ) 3189 field_path = [ 3190 InterpolatedString.create(path, parameters=model.parameters or {}) 3191 for path in model.field_path 3192 ] 3193 return ComponentMappingDefinition( 3194 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 3195 value=interpolated_value, 3196 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 3197 parameters=model.parameters or {}, 3198 )
def
create_http_components_resolver( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpComponentsResolver, config: Mapping[str, Any]) -> Any:
3200 def create_http_components_resolver( 3201 self, model: HttpComponentsResolverModel, config: Config 3202 ) -> Any: 3203 stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config) 3204 combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer) 3205 3206 retriever = self._create_component_from_model( 3207 model=model.retriever, 3208 config=config, 3209 name="", 3210 primary_key=None, 3211 stream_slicer=stream_slicer if stream_slicer else combined_slicers, 3212 transformations=[], 3213 ) 3214 3215 components_mapping = [ 3216 self._create_component_from_model( 3217 model=components_mapping_definition_model, 3218 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3219 components_mapping_definition_model.value_type 3220 ), 3221 config=config, 3222 ) 3223 for components_mapping_definition_model in model.components_mapping 3224 ] 3225 3226 return HttpComponentsResolver( 3227 retriever=retriever, 3228 config=config, 3229 components_mapping=components_mapping, 3230 parameters=model.parameters or {}, 3231 )
@staticmethod
def
create_stream_config( model: airbyte_cdk.sources.declarative.models.declarative_component_schema.StreamConfig, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.declarative.resolvers.StreamConfig:
3233 @staticmethod 3234 def create_stream_config( 3235 model: StreamConfigModel, config: Config, **kwargs: Any 3236 ) -> StreamConfig: 3237 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 3238 [x for x in model.configs_pointer] if model.configs_pointer else [] 3239 ) 3240 3241 return StreamConfig( 3242 configs_pointer=model_configs_pointer, 3243 parameters=model.parameters or {}, 3244 )
def
create_config_components_resolver( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.ConfigComponentsResolver, config: Mapping[str, Any]) -> Any:
3246 def create_config_components_resolver( 3247 self, model: ConfigComponentsResolverModel, config: Config 3248 ) -> Any: 3249 stream_config = self._create_component_from_model( 3250 model.stream_config, config=config, parameters=model.parameters or {} 3251 ) 3252 3253 components_mapping = [ 3254 self._create_component_from_model( 3255 model=components_mapping_definition_model, 3256 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3257 components_mapping_definition_model.value_type 3258 ), 3259 config=config, 3260 ) 3261 for components_mapping_definition_model in model.components_mapping 3262 ] 3263 3264 return ConfigComponentsResolver( 3265 stream_config=stream_config, 3266 config=config, 3267 components_mapping=components_mapping, 3268 parameters=model.parameters or {}, 3269 )
def
create_http_api_budget( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HTTPAPIBudget, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.HttpAPIBudget:
3293 def create_http_api_budget( 3294 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 3295 ) -> HttpAPIBudget: 3296 policies = [ 3297 self._create_component_from_model(model=policy, config=config) 3298 for policy in model.policies 3299 ] 3300 3301 return HttpAPIBudget( 3302 policies=policies, 3303 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 3304 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 3305 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 3306 )
def
create_fixed_window_call_rate_policy( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.FixedWindowCallRatePolicy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.streams.call_rate.FixedWindowCallRatePolicy:
3308 def create_fixed_window_call_rate_policy( 3309 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 3310 ) -> FixedWindowCallRatePolicy: 3311 matchers = [ 3312 self._create_component_from_model(model=matcher, config=config) 3313 for matcher in model.matchers 3314 ] 3315 3316 # Set the initial reset timestamp to 10 days from now. 3317 # This value will be updated by the first request. 3318 return FixedWindowCallRatePolicy( 3319 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 3320 period=parse_duration(model.period), 3321 call_limit=model.call_limit, 3322 matchers=matchers, 3323 )
def
create_moving_window_call_rate_policy( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.MovingWindowCallRatePolicy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.MovingWindowCallRatePolicy:
3325 def create_moving_window_call_rate_policy( 3326 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 3327 ) -> MovingWindowCallRatePolicy: 3328 rates = [ 3329 self._create_component_from_model(model=rate, config=config) for rate in model.rates 3330 ] 3331 matchers = [ 3332 self._create_component_from_model(model=matcher, config=config) 3333 for matcher in model.matchers 3334 ] 3335 return MovingWindowCallRatePolicy( 3336 rates=rates, 3337 matchers=matchers, 3338 )
def
create_unlimited_call_rate_policy( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.UnlimitedCallRatePolicy, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.streams.call_rate.UnlimitedCallRatePolicy:
3340 def create_unlimited_call_rate_policy( 3341 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 3342 ) -> UnlimitedCallRatePolicy: 3343 matchers = [ 3344 self._create_component_from_model(model=matcher, config=config) 3345 for matcher in model.matchers 3346 ] 3347 3348 return UnlimitedCallRatePolicy( 3349 matchers=matchers, 3350 )
def
create_rate( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.Rate, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.Rate:
def
create_http_request_matcher( self, model: airbyte_cdk.sources.declarative.models.declarative_component_schema.HttpRequestRegexMatcher, config: Mapping[str, Any], **kwargs: Any) -> airbyte_cdk.sources.streams.call_rate.HttpRequestRegexMatcher:
3359 def create_http_request_matcher( 3360 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 3361 ) -> HttpRequestRegexMatcher: 3362 return HttpRequestRegexMatcher( 3363 method=model.method, 3364 url_base=model.url_base, 3365 url_path_pattern=model.url_path_pattern, 3366 params=model.params, 3367 headers=model.headers, 3368 )