airbyte_cdk.sources.declarative.parsers.model_to_component_factory
1# 2# Copyright (c) 2025 Airbyte, Inc., all rights reserved. 3# 4 5from __future__ import annotations 6 7import datetime 8import importlib 9import inspect 10import logging 11import re 12from functools import partial 13from typing import ( 14 Any, 15 Callable, 16 Dict, 17 List, 18 Mapping, 19 MutableMapping, 20 Optional, 21 Tuple, 22 Type, 23 Union, 24 cast, 25 get_args, 26 get_origin, 27 get_type_hints, 28) 29 30from airbyte_protocol_dataclasses.models import ConfiguredAirbyteStream 31from isodate import parse_duration 32from pydantic.v1 import BaseModel 33from requests import Response 34 35from airbyte_cdk.connector_builder.models import ( 36 LogMessage as ConnectorBuilderLogMessage, 37) 38from airbyte_cdk.models import ( 39 AirbyteStateBlob, 40 AirbyteStateMessage, 41 AirbyteStateType, 42 AirbyteStreamState, 43 ConfiguredAirbyteCatalog, 44 FailureType, 45 Level, 46 StreamDescriptor, 47) 48from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager 49from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator 50from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker 51from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository 52from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus 53from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator 54from airbyte_cdk.sources.declarative.auth.declarative_authenticator import ( 55 DeclarativeAuthenticator, 56 NoAuth, 57) 58from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm 59from airbyte_cdk.sources.declarative.auth.oauth import ( 60 DeclarativeSingleUseRefreshTokenOauth2Authenticator, 61) 62from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator 63from airbyte_cdk.sources.declarative.auth.token import ( 64 ApiKeyAuthenticator, 65 BasicHttpAuthenticator, 66 BearerAuthenticator, 67 LegacySessionTokenAuthenticator, 68) 69from airbyte_cdk.sources.declarative.auth.token_provider import ( 70 InterpolatedStringTokenProvider, 71 SessionTokenProvider, 72 TokenProvider, 73) 74from airbyte_cdk.sources.declarative.checks import ( 75 CheckDynamicStream, 76 CheckStream, 77 DynamicStreamCheckConfig, 78) 79from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel 80from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime 81from airbyte_cdk.sources.declarative.decoders import ( 82 Decoder, 83 IterableDecoder, 84 JsonDecoder, 85 PaginationDecoderDecorator, 86 XmlDecoder, 87 ZipfileDecoder, 88) 89from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import ( 90 CompositeRawDecoder, 91 CsvParser, 92 GzipParser, 93 JsonLineParser, 94 JsonParser, 95 Parser, 96) 97from airbyte_cdk.sources.declarative.extractors import ( 98 DpathExtractor, 99 RecordFilter, 100 RecordSelector, 101 ResponseToFileExtractor, 102) 103from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor 104from airbyte_cdk.sources.declarative.extractors.record_filter import ( 105 ClientSideIncrementalRecordFilterDecorator, 106) 107from airbyte_cdk.sources.declarative.incremental import ( 108 ConcurrentCursorFactory, 109 ConcurrentPerPartitionCursor, 110) 111from airbyte_cdk.sources.declarative.interpolation import InterpolatedString 112from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping 113from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import ( 114 LegacyToPerPartitionStateMigration, 115) 116from airbyte_cdk.sources.declarative.models import ( 117 CustomStateMigration, 118 PaginationResetLimits, 119) 120from airbyte_cdk.sources.declarative.models.base_model_with_deprecations import ( 121 DEPRECATION_LOGS_TAG, 122 BaseModelWithDeprecations, 123) 124from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 125 Action1 as PaginationResetActionModel, 126) 127from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 128 AddedFieldDefinition as AddedFieldDefinitionModel, 129) 130from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 131 AddFields as AddFieldsModel, 132) 133from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 134 ApiKeyAuthenticator as ApiKeyAuthenticatorModel, 135) 136from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 137 AsyncJobStatusMap as AsyncJobStatusMapModel, 138) 139from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 140 AsyncRetriever as AsyncRetrieverModel, 141) 142from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 143 BasicHttpAuthenticator as BasicHttpAuthenticatorModel, 144) 145from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 146 BearerAuthenticator as BearerAuthenticatorModel, 147) 148from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 149 CheckDynamicStream as CheckDynamicStreamModel, 150) 151from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 152 CheckStream as CheckStreamModel, 153) 154from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 155 ComplexFieldType as ComplexFieldTypeModel, 156) 157from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 158 ComponentMappingDefinition as ComponentMappingDefinitionModel, 159) 160from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 161 CompositeErrorHandler as CompositeErrorHandlerModel, 162) 163from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 164 ConcurrencyLevel as ConcurrencyLevelModel, 165) 166from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 167 ConfigAddFields as ConfigAddFieldsModel, 168) 169from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 170 ConfigComponentsResolver as ConfigComponentsResolverModel, 171) 172from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 173 ConfigMigration as ConfigMigrationModel, 174) 175from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 176 ConfigRemapField as ConfigRemapFieldModel, 177) 178from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 179 ConfigRemoveFields as ConfigRemoveFieldsModel, 180) 181from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 182 ConstantBackoffStrategy as ConstantBackoffStrategyModel, 183) 184from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 185 CsvDecoder as CsvDecoderModel, 186) 187from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 188 CursorPagination as CursorPaginationModel, 189) 190from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 191 CustomAuthenticator as CustomAuthenticatorModel, 192) 193from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 194 CustomBackoffStrategy as CustomBackoffStrategyModel, 195) 196from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 197 CustomConfigTransformation as CustomConfigTransformationModel, 198) 199from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 200 CustomDecoder as CustomDecoderModel, 201) 202from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 203 CustomErrorHandler as CustomErrorHandlerModel, 204) 205from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 206 CustomPaginationStrategy as CustomPaginationStrategyModel, 207) 208from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 209 CustomPartitionRouter as CustomPartitionRouterModel, 210) 211from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 212 CustomRecordExtractor as CustomRecordExtractorModel, 213) 214from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 215 CustomRecordFilter as CustomRecordFilterModel, 216) 217from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 218 CustomRequester as CustomRequesterModel, 219) 220from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 221 CustomRetriever as CustomRetrieverModel, 222) 223from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 224 CustomSchemaLoader as CustomSchemaLoader, 225) 226from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 227 CustomSchemaNormalization as CustomSchemaNormalizationModel, 228) 229from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 230 CustomTransformation as CustomTransformationModel, 231) 232from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 233 CustomValidationStrategy as CustomValidationStrategyModel, 234) 235from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 236 DatetimeBasedCursor as DatetimeBasedCursorModel, 237) 238from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 239 DeclarativeStream as DeclarativeStreamModel, 240) 241from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 242 DefaultErrorHandler as DefaultErrorHandlerModel, 243) 244from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 245 DefaultPaginator as DefaultPaginatorModel, 246) 247from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 248 DpathExtractor as DpathExtractorModel, 249) 250from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 251 DpathFlattenFields as DpathFlattenFieldsModel, 252) 253from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 254 DpathValidator as DpathValidatorModel, 255) 256from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 257 DynamicSchemaLoader as DynamicSchemaLoaderModel, 258) 259from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 260 DynamicStreamCheckConfig as DynamicStreamCheckConfigModel, 261) 262from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 263 ExponentialBackoffStrategy as ExponentialBackoffStrategyModel, 264) 265from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 266 FileUploader as FileUploaderModel, 267) 268from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 269 FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel, 270) 271from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 272 FlattenFields as FlattenFieldsModel, 273) 274from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 275 GroupByKeyMergeStrategy as GroupByKeyMergeStrategyModel, 276) 277from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 278 GroupingPartitionRouter as GroupingPartitionRouterModel, 279) 280from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 281 GzipDecoder as GzipDecoderModel, 282) 283from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 284 HTTPAPIBudget as HTTPAPIBudgetModel, 285) 286from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 287 HttpComponentsResolver as HttpComponentsResolverModel, 288) 289from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 290 HttpRequester as HttpRequesterModel, 291) 292from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 293 HttpRequestRegexMatcher as HttpRequestRegexMatcherModel, 294) 295from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 296 HttpResponseFilter as HttpResponseFilterModel, 297) 298from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 299 IncrementingCountCursor as IncrementingCountCursorModel, 300) 301from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 302 InlineSchemaLoader as InlineSchemaLoaderModel, 303) 304from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 305 IterableDecoder as IterableDecoderModel, 306) 307from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 308 JsonDecoder as JsonDecoderModel, 309) 310from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 311 JsonFileSchemaLoader as JsonFileSchemaLoaderModel, 312) 313from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 314 JsonlDecoder as JsonlDecoderModel, 315) 316from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 317 JsonSchemaPropertySelector as JsonSchemaPropertySelectorModel, 318) 319from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 320 JwtAuthenticator as JwtAuthenticatorModel, 321) 322from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 323 JwtHeaders as JwtHeadersModel, 324) 325from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 326 JwtPayload as JwtPayloadModel, 327) 328from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 329 KeysReplace as KeysReplaceModel, 330) 331from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 332 KeysToLower as KeysToLowerModel, 333) 334from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 335 KeysToSnakeCase as KeysToSnakeCaseModel, 336) 337from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 338 LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel, 339) 340from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 341 LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel, 342) 343from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 344 ListPartitionRouter as ListPartitionRouterModel, 345) 346from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 347 MinMaxDatetime as MinMaxDatetimeModel, 348) 349from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 350 MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel, 351) 352from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 353 NoAuth as NoAuthModel, 354) 355from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 356 NoPagination as NoPaginationModel, 357) 358from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 359 OAuthAuthenticator as OAuthAuthenticatorModel, 360) 361from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 362 OffsetIncrement as OffsetIncrementModel, 363) 364from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 365 PageIncrement as PageIncrementModel, 366) 367from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 368 PaginationReset as PaginationResetModel, 369) 370from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 371 ParametrizedComponentsResolver as ParametrizedComponentsResolverModel, 372) 373from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 374 ParentStreamConfig as ParentStreamConfigModel, 375) 376from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 377 PredicateValidator as PredicateValidatorModel, 378) 379from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 380 PropertiesFromEndpoint as PropertiesFromEndpointModel, 381) 382from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 383 PropertyChunking as PropertyChunkingModel, 384) 385from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 386 PropertyLimitType as PropertyLimitTypeModel, 387) 388from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 389 QueryProperties as QueryPropertiesModel, 390) 391from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 392 Rate as RateModel, 393) 394from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 395 RecordFilter as RecordFilterModel, 396) 397from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 398 RecordSelector as RecordSelectorModel, 399) 400from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 401 RefreshTokenUpdater as RefreshTokenUpdaterModel, 402) 403from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 404 RemoveFields as RemoveFieldsModel, 405) 406from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 407 RequestOption as RequestOptionModel, 408) 409from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 410 RequestPath as RequestPathModel, 411) 412from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 413 ResponseToFileExtractor as ResponseToFileExtractorModel, 414) 415from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 416 SchemaNormalization as SchemaNormalizationModel, 417) 418from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 419 SchemaTypeIdentifier as SchemaTypeIdentifierModel, 420) 421from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 422 SelectiveAuthenticator as SelectiveAuthenticatorModel, 423) 424from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 425 SessionTokenAuthenticator as SessionTokenAuthenticatorModel, 426) 427from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 428 SimpleRetriever as SimpleRetrieverModel, 429) 430from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel 431from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 432 StateDelegatingStream as StateDelegatingStreamModel, 433) 434from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 435 StreamConfig as StreamConfigModel, 436) 437from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 438 SubstreamPartitionRouter as SubstreamPartitionRouterModel, 439) 440from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 441 TypesMap as TypesMapModel, 442) 443from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 444 UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel, 445) 446from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 447 ValidateAdheresToSchema as ValidateAdheresToSchemaModel, 448) 449from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType 450from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 451 WaitTimeFromHeader as WaitTimeFromHeaderModel, 452) 453from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 454 WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel, 455) 456from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 457 XmlDecoder as XmlDecoderModel, 458) 459from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 460 ZipfileDecoder as ZipfileDecoderModel, 461) 462from airbyte_cdk.sources.declarative.partition_routers import ( 463 CartesianProductStreamSlicer, 464 GroupingPartitionRouter, 465 ListPartitionRouter, 466 PartitionRouter, 467 SinglePartitionRouter, 468 SubstreamPartitionRouter, 469) 470from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import ( 471 AsyncJobPartitionRouter, 472) 473from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ( 474 ParentStreamConfig, 475) 476from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption 477from airbyte_cdk.sources.declarative.requesters.error_handlers import ( 478 CompositeErrorHandler, 479 DefaultErrorHandler, 480 HttpResponseFilter, 481) 482from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import ( 483 ConstantBackoffStrategy, 484 ExponentialBackoffStrategy, 485 WaitTimeFromHeaderBackoffStrategy, 486 WaitUntilTimeFromHeaderBackoffStrategy, 487) 488from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository 489from airbyte_cdk.sources.declarative.requesters.paginators import ( 490 DefaultPaginator, 491 NoPagination, 492 PaginatorTestReadDecorator, 493) 494from airbyte_cdk.sources.declarative.requesters.paginators.strategies import ( 495 CursorPaginationStrategy, 496 CursorStopCondition, 497 OffsetIncrement, 498 PageIncrement, 499 StopConditionPaginationStrategyDecorator, 500) 501from airbyte_cdk.sources.declarative.requesters.query_properties import ( 502 PropertiesFromEndpoint, 503 PropertyChunking, 504 QueryProperties, 505) 506from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import ( 507 PropertyLimitType, 508) 509from airbyte_cdk.sources.declarative.requesters.query_properties.property_selector import ( 510 JsonSchemaPropertySelector, 511) 512from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import ( 513 GroupByKey, 514) 515from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType 516from airbyte_cdk.sources.declarative.requesters.request_options import ( 517 DatetimeBasedRequestOptionsProvider, 518 DefaultRequestOptionsProvider, 519 InterpolatedRequestOptionsProvider, 520 RequestOptionsProvider, 521) 522from airbyte_cdk.sources.declarative.requesters.request_options.per_partition_request_option_provider import ( 523 PerPartitionRequestOptionsProvider, 524) 525from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath 526from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester 527from airbyte_cdk.sources.declarative.resolvers import ( 528 ComponentMappingDefinition, 529 ConfigComponentsResolver, 530 HttpComponentsResolver, 531 ParametrizedComponentsResolver, 532 StreamConfig, 533 StreamParametersDefinition, 534) 535from airbyte_cdk.sources.declarative.retrievers import ( 536 AsyncRetriever, 537 LazySimpleRetriever, 538 SimpleRetriever, 539) 540from airbyte_cdk.sources.declarative.retrievers.file_uploader import ( 541 ConnectorBuilderFileUploader, 542 DefaultFileUploader, 543 FileUploader, 544 LocalFileSystemFileWriter, 545 NoopFileWriter, 546) 547from airbyte_cdk.sources.declarative.retrievers.pagination_tracker import PaginationTracker 548from airbyte_cdk.sources.declarative.schema import ( 549 ComplexFieldType, 550 DefaultSchemaLoader, 551 DynamicSchemaLoader, 552 InlineSchemaLoader, 553 JsonFileSchemaLoader, 554 SchemaLoader, 555 SchemaTypeIdentifier, 556 TypesMap, 557) 558from airbyte_cdk.sources.declarative.schema.caching_schema_loader_decorator import ( 559 CachingSchemaLoaderDecorator, 560) 561from airbyte_cdk.sources.declarative.schema.composite_schema_loader import CompositeSchemaLoader 562from airbyte_cdk.sources.declarative.spec import ConfigMigration, Spec 563from airbyte_cdk.sources.declarative.stream_slicers import ( 564 StreamSlicer, 565 StreamSlicerTestReadDecorator, 566) 567from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import ( 568 DeclarativePartitionFactory, 569 StreamSlicerPartitionGenerator, 570) 571from airbyte_cdk.sources.declarative.transformations import ( 572 AddFields, 573 RecordTransformation, 574 RemoveFields, 575) 576from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition 577from airbyte_cdk.sources.declarative.transformations.config_transformations import ( 578 ConfigAddFields, 579 ConfigRemapField, 580 ConfigRemoveFields, 581) 582from airbyte_cdk.sources.declarative.transformations.config_transformations.config_transformation import ( 583 ConfigTransformation, 584) 585from airbyte_cdk.sources.declarative.transformations.dpath_flatten_fields import ( 586 DpathFlattenFields, 587 KeyTransformation, 588) 589from airbyte_cdk.sources.declarative.transformations.flatten_fields import ( 590 FlattenFields, 591) 592from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import ( 593 KeysReplaceTransformation, 594) 595from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import ( 596 KeysToLowerTransformation, 597) 598from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import ( 599 KeysToSnakeCaseTransformation, 600) 601from airbyte_cdk.sources.declarative.validators import ( 602 DpathValidator, 603 PredicateValidator, 604 ValidateAdheresToSchema, 605) 606from airbyte_cdk.sources.http_logger import format_http_message 607from airbyte_cdk.sources.message import ( 608 InMemoryMessageRepository, 609 LogAppenderMessageRepositoryDecorator, 610 MessageRepository, 611 NoopMessageRepository, 612) 613from airbyte_cdk.sources.message.repository import StateFilteringMessageRepository 614from airbyte_cdk.sources.streams.call_rate import ( 615 APIBudget, 616 FixedWindowCallRatePolicy, 617 HttpAPIBudget, 618 HttpRequestRegexMatcher, 619 MovingWindowCallRatePolicy, 620 Rate, 621 UnlimitedCallRatePolicy, 622) 623from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream 624from airbyte_cdk.sources.streams.concurrent.clamping import ( 625 ClampingEndProvider, 626 ClampingStrategy, 627 DayClampingStrategy, 628 MonthClampingStrategy, 629 NoClamping, 630 WeekClampingStrategy, 631 Weekday, 632) 633from airbyte_cdk.sources.streams.concurrent.cursor import ( 634 ConcurrentCursor, 635 Cursor, 636 CursorField, 637 FinalStateCursor, 638) 639from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream 640from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream 641from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import ( 642 StreamSlicer as ConcurrentStreamSlicer, 643) 644from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import ( 645 CustomFormatConcurrentStreamStateConverter, 646 DateTimeStreamStateConverter, 647) 648from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import ( 649 IncrementingCountStreamStateConverter, 650) 651from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction 652from airbyte_cdk.sources.types import Config 653from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer 654 655ComponentDefinition = Mapping[str, Any] 656 657SCHEMA_TRANSFORMER_TYPE_MAPPING = { 658 SchemaNormalizationModel.None_: TransformConfig.NoTransform, 659 SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization, 660} 661_NO_STREAM_SLICING = SinglePartitionRouter(parameters={}) 662 663# Ideally this should use the value defined in ConcurrentDeclarativeSource, but 664# this would be a circular import 665MAX_SLICES = 5 666 667LOGGER = logging.getLogger(f"airbyte.model_to_component_factory") 668 669 670class ModelToComponentFactory: 671 EPOCH_DATETIME_FORMAT = "%s" 672 673 def __init__( 674 self, 675 limit_pages_fetched_per_slice: Optional[int] = None, 676 limit_slices_fetched: Optional[int] = None, 677 emit_connector_builder_messages: bool = False, 678 disable_retries: bool = False, 679 disable_cache: bool = False, 680 message_repository: Optional[MessageRepository] = None, 681 connector_state_manager: Optional[ConnectorStateManager] = None, 682 max_concurrent_async_job_count: Optional[int] = None, 683 configured_catalog: Optional[ConfiguredAirbyteCatalog] = None, 684 api_budget: Optional[APIBudget] = None, 685 ): 686 self._init_mappings() 687 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 688 self._limit_slices_fetched = limit_slices_fetched 689 self._emit_connector_builder_messages = emit_connector_builder_messages 690 self._disable_retries = disable_retries 691 self._disable_cache = disable_cache 692 self._message_repository = message_repository or InMemoryMessageRepository( 693 self._evaluate_log_level(emit_connector_builder_messages) 694 ) 695 self._stream_name_to_configured_stream = self._create_stream_name_to_configured_stream( 696 configured_catalog 697 ) 698 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 699 self._api_budget: Optional[Union[APIBudget]] = api_budget 700 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 701 # placeholder for deprecation warnings 702 self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = [] 703 704 def _init_mappings(self) -> None: 705 self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = { 706 AddedFieldDefinitionModel: self.create_added_field_definition, 707 AddFieldsModel: self.create_add_fields, 708 ApiKeyAuthenticatorModel: self.create_api_key_authenticator, 709 BasicHttpAuthenticatorModel: self.create_basic_http_authenticator, 710 BearerAuthenticatorModel: self.create_bearer_authenticator, 711 CheckStreamModel: self.create_check_stream, 712 DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config, 713 CheckDynamicStreamModel: self.create_check_dynamic_stream, 714 CompositeErrorHandlerModel: self.create_composite_error_handler, 715 ConcurrencyLevelModel: self.create_concurrency_level, 716 ConfigMigrationModel: self.create_config_migration, 717 ConfigAddFieldsModel: self.create_config_add_fields, 718 ConfigRemapFieldModel: self.create_config_remap_field, 719 ConfigRemoveFieldsModel: self.create_config_remove_fields, 720 ConstantBackoffStrategyModel: self.create_constant_backoff_strategy, 721 CsvDecoderModel: self.create_csv_decoder, 722 CursorPaginationModel: self.create_cursor_pagination, 723 CustomAuthenticatorModel: self.create_custom_component, 724 CustomBackoffStrategyModel: self.create_custom_component, 725 CustomDecoderModel: self.create_custom_component, 726 CustomErrorHandlerModel: self.create_custom_component, 727 CustomRecordExtractorModel: self.create_custom_component, 728 CustomRecordFilterModel: self.create_custom_component, 729 CustomRequesterModel: self.create_custom_component, 730 CustomRetrieverModel: self.create_custom_component, 731 CustomSchemaLoader: self.create_custom_component, 732 CustomSchemaNormalizationModel: self.create_custom_component, 733 CustomStateMigration: self.create_custom_component, 734 CustomPaginationStrategyModel: self.create_custom_component, 735 CustomPartitionRouterModel: self.create_custom_component, 736 CustomTransformationModel: self.create_custom_component, 737 CustomValidationStrategyModel: self.create_custom_component, 738 CustomConfigTransformationModel: self.create_custom_component, 739 DeclarativeStreamModel: self.create_default_stream, 740 DefaultErrorHandlerModel: self.create_default_error_handler, 741 DefaultPaginatorModel: self.create_default_paginator, 742 DpathExtractorModel: self.create_dpath_extractor, 743 DpathValidatorModel: self.create_dpath_validator, 744 ResponseToFileExtractorModel: self.create_response_to_file_extractor, 745 ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy, 746 SessionTokenAuthenticatorModel: self.create_session_token_authenticator, 747 GroupByKeyMergeStrategyModel: self.create_group_by_key, 748 HttpRequesterModel: self.create_http_requester, 749 HttpResponseFilterModel: self.create_http_response_filter, 750 InlineSchemaLoaderModel: self.create_inline_schema_loader, 751 JsonDecoderModel: self.create_json_decoder, 752 JsonlDecoderModel: self.create_jsonl_decoder, 753 JsonSchemaPropertySelectorModel: self.create_json_schema_property_selector, 754 GzipDecoderModel: self.create_gzip_decoder, 755 KeysToLowerModel: self.create_keys_to_lower_transformation, 756 KeysToSnakeCaseModel: self.create_keys_to_snake_transformation, 757 KeysReplaceModel: self.create_keys_replace_transformation, 758 FlattenFieldsModel: self.create_flatten_fields, 759 DpathFlattenFieldsModel: self.create_dpath_flatten_fields, 760 IterableDecoderModel: self.create_iterable_decoder, 761 XmlDecoderModel: self.create_xml_decoder, 762 JsonFileSchemaLoaderModel: self.create_json_file_schema_loader, 763 DynamicSchemaLoaderModel: self.create_dynamic_schema_loader, 764 SchemaTypeIdentifierModel: self.create_schema_type_identifier, 765 TypesMapModel: self.create_types_map, 766 ComplexFieldTypeModel: self.create_complex_field_type, 767 JwtAuthenticatorModel: self.create_jwt_authenticator, 768 LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration, 769 ListPartitionRouterModel: self.create_list_partition_router, 770 MinMaxDatetimeModel: self.create_min_max_datetime, 771 NoAuthModel: self.create_no_auth, 772 NoPaginationModel: self.create_no_pagination, 773 OAuthAuthenticatorModel: self.create_oauth_authenticator, 774 OffsetIncrementModel: self.create_offset_increment, 775 PageIncrementModel: self.create_page_increment, 776 ParentStreamConfigModel: self.create_parent_stream_config_with_substream_wrapper, 777 PredicateValidatorModel: self.create_predicate_validator, 778 PropertiesFromEndpointModel: self.create_properties_from_endpoint, 779 PropertyChunkingModel: self.create_property_chunking, 780 QueryPropertiesModel: self.create_query_properties, 781 RecordFilterModel: self.create_record_filter, 782 RecordSelectorModel: self.create_record_selector, 783 RemoveFieldsModel: self.create_remove_fields, 784 RequestPathModel: self.create_request_path, 785 RequestOptionModel: self.create_request_option, 786 LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator, 787 SelectiveAuthenticatorModel: self.create_selective_authenticator, 788 SimpleRetrieverModel: self.create_simple_retriever, 789 StateDelegatingStreamModel: self.create_state_delegating_stream, 790 SpecModel: self.create_spec, 791 SubstreamPartitionRouterModel: self.create_substream_partition_router, 792 ValidateAdheresToSchemaModel: self.create_validate_adheres_to_schema, 793 WaitTimeFromHeaderModel: self.create_wait_time_from_header, 794 WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header, 795 AsyncRetrieverModel: self.create_async_retriever, 796 HttpComponentsResolverModel: self.create_http_components_resolver, 797 ConfigComponentsResolverModel: self.create_config_components_resolver, 798 ParametrizedComponentsResolverModel: self.create_parametrized_components_resolver, 799 StreamConfigModel: self.create_stream_config, 800 ComponentMappingDefinitionModel: self.create_components_mapping_definition, 801 ZipfileDecoderModel: self.create_zipfile_decoder, 802 HTTPAPIBudgetModel: self.create_http_api_budget, 803 FileUploaderModel: self.create_file_uploader, 804 FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy, 805 MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy, 806 UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy, 807 RateModel: self.create_rate, 808 HttpRequestRegexMatcherModel: self.create_http_request_matcher, 809 GroupingPartitionRouterModel: self.create_grouping_partition_router, 810 } 811 812 # Needed for the case where we need to perform a second parse on the fields of a custom component 813 self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR} 814 815 @staticmethod 816 def _create_stream_name_to_configured_stream( 817 configured_catalog: Optional[ConfiguredAirbyteCatalog], 818 ) -> Mapping[str, ConfiguredAirbyteStream]: 819 return ( 820 {stream.stream.name: stream for stream in configured_catalog.streams} 821 if configured_catalog 822 else {} 823 ) 824 825 def create_component( 826 self, 827 model_type: Type[BaseModel], 828 component_definition: ComponentDefinition, 829 config: Config, 830 **kwargs: Any, 831 ) -> Any: 832 """ 833 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 834 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 835 creating declarative components from that model. 836 837 :param model_type: The type of declarative component that is being initialized 838 :param component_definition: The mapping that represents a declarative component 839 :param config: The connector config that is provided by the customer 840 :return: The declarative component to be used at runtime 841 """ 842 843 component_type = component_definition.get("type") 844 if component_definition.get("type") != model_type.__name__: 845 raise ValueError( 846 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 847 ) 848 849 declarative_component_model = model_type.parse_obj(component_definition) 850 851 if not isinstance(declarative_component_model, model_type): 852 raise ValueError( 853 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 854 ) 855 856 return self._create_component_from_model( 857 model=declarative_component_model, config=config, **kwargs 858 ) 859 860 def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: 861 if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: 862 raise ValueError( 863 f"{model.__class__} with attributes {model} is not a valid component type" 864 ) 865 component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) 866 if not component_constructor: 867 raise ValueError(f"Could not find constructor for {model.__class__}") 868 869 # collect deprecation warnings for supported models. 870 if isinstance(model, BaseModelWithDeprecations): 871 self._collect_model_deprecations(model) 872 873 return component_constructor(model=model, config=config, **kwargs) 874 875 def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]: 876 """ 877 Returns the deprecation warnings that were collected during the creation of components. 878 """ 879 return self._collected_deprecation_logs 880 881 def _collect_model_deprecations(self, model: BaseModelWithDeprecations) -> None: 882 """ 883 Collects deprecation logs from the given model and appends any new logs to the internal collection. 884 885 This method checks if the provided model has deprecation logs (identified by the presence of the DEPRECATION_LOGS_TAG attribute and a non-None `_deprecation_logs` property). It iterates through each deprecation log in the model and appends it to the `_collected_deprecation_logs` list if it has not already been collected, ensuring that duplicate logs are avoided. 886 887 Args: 888 model (BaseModelWithDeprecations): The model instance from which to collect deprecation logs. 889 """ 890 if hasattr(model, DEPRECATION_LOGS_TAG) and model._deprecation_logs is not None: 891 for log in model._deprecation_logs: 892 # avoid duplicates for deprecation logs observed. 893 if log not in self._collected_deprecation_logs: 894 self._collected_deprecation_logs.append(log) 895 896 def create_config_migration( 897 self, model: ConfigMigrationModel, config: Config 898 ) -> ConfigMigration: 899 transformations: List[ConfigTransformation] = [ 900 self._create_component_from_model(transformation, config) 901 for transformation in model.transformations 902 ] 903 904 return ConfigMigration( 905 description=model.description, 906 transformations=transformations, 907 ) 908 909 def create_config_add_fields( 910 self, model: ConfigAddFieldsModel, config: Config, **kwargs: Any 911 ) -> ConfigAddFields: 912 fields = [self._create_component_from_model(field, config) for field in model.fields] 913 return ConfigAddFields( 914 fields=fields, 915 condition=model.condition or "", 916 ) 917 918 @staticmethod 919 def create_config_remove_fields( 920 model: ConfigRemoveFieldsModel, config: Config, **kwargs: Any 921 ) -> ConfigRemoveFields: 922 return ConfigRemoveFields( 923 field_pointers=model.field_pointers, 924 condition=model.condition or "", 925 ) 926 927 @staticmethod 928 def create_config_remap_field( 929 model: ConfigRemapFieldModel, config: Config, **kwargs: Any 930 ) -> ConfigRemapField: 931 mapping = cast(Mapping[str, Any], model.map) 932 return ConfigRemapField( 933 map=mapping, 934 field_path=model.field_path, 935 config=config, 936 ) 937 938 def create_dpath_validator(self, model: DpathValidatorModel, config: Config) -> DpathValidator: 939 strategy = self._create_component_from_model(model.validation_strategy, config) 940 941 return DpathValidator( 942 field_path=model.field_path, 943 strategy=strategy, 944 ) 945 946 def create_predicate_validator( 947 self, model: PredicateValidatorModel, config: Config 948 ) -> PredicateValidator: 949 strategy = self._create_component_from_model(model.validation_strategy, config) 950 951 return PredicateValidator( 952 value=model.value, 953 strategy=strategy, 954 ) 955 956 @staticmethod 957 def create_validate_adheres_to_schema( 958 model: ValidateAdheresToSchemaModel, config: Config, **kwargs: Any 959 ) -> ValidateAdheresToSchema: 960 base_schema = cast(Mapping[str, Any], model.base_schema) 961 return ValidateAdheresToSchema( 962 schema=base_schema, 963 ) 964 965 @staticmethod 966 def create_added_field_definition( 967 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 968 ) -> AddedFieldDefinition: 969 interpolated_value = InterpolatedString.create( 970 model.value, parameters=model.parameters or {} 971 ) 972 return AddedFieldDefinition( 973 path=model.path, 974 value=interpolated_value, 975 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 976 parameters=model.parameters or {}, 977 ) 978 979 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 980 added_field_definitions = [ 981 self._create_component_from_model( 982 model=added_field_definition_model, 983 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 984 added_field_definition_model.value_type 985 ), 986 config=config, 987 ) 988 for added_field_definition_model in model.fields 989 ] 990 return AddFields( 991 fields=added_field_definitions, 992 condition=model.condition or "", 993 parameters=model.parameters or {}, 994 ) 995 996 def create_keys_to_lower_transformation( 997 self, model: KeysToLowerModel, config: Config, **kwargs: Any 998 ) -> KeysToLowerTransformation: 999 return KeysToLowerTransformation() 1000 1001 def create_keys_to_snake_transformation( 1002 self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any 1003 ) -> KeysToSnakeCaseTransformation: 1004 return KeysToSnakeCaseTransformation() 1005 1006 def create_keys_replace_transformation( 1007 self, model: KeysReplaceModel, config: Config, **kwargs: Any 1008 ) -> KeysReplaceTransformation: 1009 return KeysReplaceTransformation( 1010 old=model.old, new=model.new, parameters=model.parameters or {} 1011 ) 1012 1013 def create_flatten_fields( 1014 self, model: FlattenFieldsModel, config: Config, **kwargs: Any 1015 ) -> FlattenFields: 1016 return FlattenFields( 1017 flatten_lists=model.flatten_lists if model.flatten_lists is not None else True 1018 ) 1019 1020 def create_dpath_flatten_fields( 1021 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 1022 ) -> DpathFlattenFields: 1023 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 1024 key_transformation = ( 1025 KeyTransformation( 1026 config=config, 1027 prefix=model.key_transformation.prefix, 1028 suffix=model.key_transformation.suffix, 1029 parameters=model.parameters or {}, 1030 ) 1031 if model.key_transformation is not None 1032 else None 1033 ) 1034 return DpathFlattenFields( 1035 config=config, 1036 field_path=model_field_path, 1037 delete_origin_value=model.delete_origin_value 1038 if model.delete_origin_value is not None 1039 else False, 1040 replace_record=model.replace_record if model.replace_record is not None else False, 1041 key_transformation=key_transformation, 1042 parameters=model.parameters or {}, 1043 ) 1044 1045 @staticmethod 1046 def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: 1047 if not value_type: 1048 return None 1049 names_to_types = { 1050 ValueType.string: str, 1051 ValueType.number: float, 1052 ValueType.integer: int, 1053 ValueType.boolean: bool, 1054 } 1055 return names_to_types[value_type] 1056 1057 def create_api_key_authenticator( 1058 self, 1059 model: ApiKeyAuthenticatorModel, 1060 config: Config, 1061 token_provider: Optional[TokenProvider] = None, 1062 **kwargs: Any, 1063 ) -> ApiKeyAuthenticator: 1064 if model.inject_into is None and model.header is None: 1065 raise ValueError( 1066 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 1067 ) 1068 1069 if model.inject_into is not None and model.header is not None: 1070 raise ValueError( 1071 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 1072 ) 1073 1074 if token_provider is not None and model.api_token != "": 1075 raise ValueError( 1076 "If token_provider is set, api_token is ignored and has to be set to empty string." 1077 ) 1078 1079 request_option = ( 1080 self._create_component_from_model( 1081 model.inject_into, config, parameters=model.parameters or {} 1082 ) 1083 if model.inject_into 1084 else RequestOption( 1085 inject_into=RequestOptionType.header, 1086 field_name=model.header or "", 1087 parameters=model.parameters or {}, 1088 ) 1089 ) 1090 1091 return ApiKeyAuthenticator( 1092 token_provider=( 1093 token_provider 1094 if token_provider is not None 1095 else InterpolatedStringTokenProvider( 1096 api_token=model.api_token or "", 1097 config=config, 1098 parameters=model.parameters or {}, 1099 ) 1100 ), 1101 request_option=request_option, 1102 config=config, 1103 parameters=model.parameters or {}, 1104 ) 1105 1106 def create_legacy_to_per_partition_state_migration( 1107 self, 1108 model: LegacyToPerPartitionStateMigrationModel, 1109 config: Mapping[str, Any], 1110 declarative_stream: DeclarativeStreamModel, 1111 ) -> LegacyToPerPartitionStateMigration: 1112 retriever = declarative_stream.retriever 1113 if not isinstance(retriever, (SimpleRetrieverModel, AsyncRetrieverModel)): 1114 raise ValueError( 1115 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever or AsyncRetriever. Got {type(retriever)}" 1116 ) 1117 partition_router = retriever.partition_router 1118 if not isinstance( 1119 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 1120 ): 1121 raise ValueError( 1122 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 1123 ) 1124 if not hasattr(partition_router, "parent_stream_configs"): 1125 raise ValueError( 1126 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 1127 ) 1128 1129 if not hasattr(declarative_stream, "incremental_sync"): 1130 raise ValueError( 1131 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 1132 ) 1133 1134 return LegacyToPerPartitionStateMigration( 1135 partition_router, # type: ignore # was already checked above 1136 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 1137 config, 1138 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 1139 ) 1140 1141 def create_session_token_authenticator( 1142 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 1143 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 1144 decoder = ( 1145 self._create_component_from_model(model=model.decoder, config=config) 1146 if model.decoder 1147 else JsonDecoder(parameters={}) 1148 ) 1149 login_requester = self._create_component_from_model( 1150 model=model.login_requester, 1151 config=config, 1152 name=f"{name}_login_requester", 1153 decoder=decoder, 1154 ) 1155 token_provider = SessionTokenProvider( 1156 login_requester=login_requester, 1157 session_token_path=model.session_token_path, 1158 expiration_duration=parse_duration(model.expiration_duration) 1159 if model.expiration_duration 1160 else None, 1161 parameters=model.parameters or {}, 1162 message_repository=self._message_repository, 1163 decoder=decoder, 1164 ) 1165 if model.request_authentication.type == "Bearer": 1166 return ModelToComponentFactory.create_bearer_authenticator( 1167 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 1168 config, 1169 token_provider=token_provider, 1170 ) 1171 else: 1172 return self.create_api_key_authenticator( 1173 ApiKeyAuthenticatorModel( 1174 type="ApiKeyAuthenticator", 1175 api_token="", 1176 inject_into=model.request_authentication.inject_into, 1177 ), # type: ignore # $parameters and headers default to None 1178 config=config, 1179 token_provider=token_provider, 1180 ) 1181 1182 @staticmethod 1183 def create_basic_http_authenticator( 1184 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 1185 ) -> BasicHttpAuthenticator: 1186 return BasicHttpAuthenticator( 1187 password=model.password or "", 1188 username=model.username, 1189 config=config, 1190 parameters=model.parameters or {}, 1191 ) 1192 1193 @staticmethod 1194 def create_bearer_authenticator( 1195 model: BearerAuthenticatorModel, 1196 config: Config, 1197 token_provider: Optional[TokenProvider] = None, 1198 **kwargs: Any, 1199 ) -> BearerAuthenticator: 1200 if token_provider is not None and model.api_token != "": 1201 raise ValueError( 1202 "If token_provider is set, api_token is ignored and has to be set to empty string." 1203 ) 1204 return BearerAuthenticator( 1205 token_provider=( 1206 token_provider 1207 if token_provider is not None 1208 else InterpolatedStringTokenProvider( 1209 api_token=model.api_token or "", 1210 config=config, 1211 parameters=model.parameters or {}, 1212 ) 1213 ), 1214 config=config, 1215 parameters=model.parameters or {}, 1216 ) 1217 1218 @staticmethod 1219 def create_dynamic_stream_check_config( 1220 model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any 1221 ) -> DynamicStreamCheckConfig: 1222 return DynamicStreamCheckConfig( 1223 dynamic_stream_name=model.dynamic_stream_name, 1224 stream_count=model.stream_count or 0, 1225 ) 1226 1227 def create_check_stream( 1228 self, model: CheckStreamModel, config: Config, **kwargs: Any 1229 ) -> CheckStream: 1230 if model.dynamic_streams_check_configs is None and model.stream_names is None: 1231 raise ValueError( 1232 "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream" 1233 ) 1234 1235 dynamic_streams_check_configs = ( 1236 [ 1237 self._create_component_from_model(model=dynamic_stream_check_config, config=config) 1238 for dynamic_stream_check_config in model.dynamic_streams_check_configs 1239 ] 1240 if model.dynamic_streams_check_configs 1241 else [] 1242 ) 1243 1244 return CheckStream( 1245 stream_names=model.stream_names or [], 1246 dynamic_streams_check_configs=dynamic_streams_check_configs, 1247 parameters={}, 1248 ) 1249 1250 @staticmethod 1251 def create_check_dynamic_stream( 1252 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 1253 ) -> CheckDynamicStream: 1254 assert model.use_check_availability is not None # for mypy 1255 1256 use_check_availability = model.use_check_availability 1257 1258 return CheckDynamicStream( 1259 stream_count=model.stream_count, 1260 use_check_availability=use_check_availability, 1261 parameters={}, 1262 ) 1263 1264 def create_composite_error_handler( 1265 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 1266 ) -> CompositeErrorHandler: 1267 error_handlers = [ 1268 self._create_component_from_model(model=error_handler_model, config=config) 1269 for error_handler_model in model.error_handlers 1270 ] 1271 return CompositeErrorHandler( 1272 error_handlers=error_handlers, parameters=model.parameters or {} 1273 ) 1274 1275 @staticmethod 1276 def create_concurrency_level( 1277 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 1278 ) -> ConcurrencyLevel: 1279 return ConcurrencyLevel( 1280 default_concurrency=model.default_concurrency, 1281 max_concurrency=model.max_concurrency, 1282 config=config, 1283 parameters={}, 1284 ) 1285 1286 @staticmethod 1287 def apply_stream_state_migrations( 1288 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 1289 ) -> MutableMapping[str, Any]: 1290 if stream_state_migrations: 1291 for state_migration in stream_state_migrations: 1292 if state_migration.should_migrate(stream_state): 1293 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 1294 stream_state = dict(state_migration.migrate(stream_state)) 1295 return stream_state 1296 1297 def create_concurrent_cursor_from_datetime_based_cursor( 1298 self, 1299 model_type: Type[BaseModel], 1300 component_definition: ComponentDefinition, 1301 stream_name: str, 1302 stream_namespace: Optional[str], 1303 stream_state: MutableMapping[str, Any], 1304 config: Config, 1305 message_repository: Optional[MessageRepository] = None, 1306 runtime_lookback_window: Optional[datetime.timedelta] = None, 1307 **kwargs: Any, 1308 ) -> ConcurrentCursor: 1309 component_type = component_definition.get("type") 1310 if component_definition.get("type") != model_type.__name__: 1311 raise ValueError( 1312 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1313 ) 1314 1315 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1316 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1317 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1318 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1319 if "$parameters" not in component_definition and "parameters" in component_definition: 1320 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1321 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1322 1323 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1324 raise ValueError( 1325 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1326 ) 1327 1328 model_parameters = datetime_based_cursor_model.parameters or {} 1329 1330 cursor_field = self._get_catalog_defined_cursor_field( 1331 stream_name=stream_name, 1332 allow_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1333 or False, 1334 ) 1335 1336 if not cursor_field: 1337 interpolated_cursor_field = InterpolatedString.create( 1338 datetime_based_cursor_model.cursor_field, 1339 parameters=model_parameters, 1340 ) 1341 cursor_field = CursorField( 1342 cursor_field_key=interpolated_cursor_field.eval(config=config), 1343 supports_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1344 or False, 1345 ) 1346 1347 interpolated_partition_field_start = InterpolatedString.create( 1348 datetime_based_cursor_model.partition_field_start or "start_time", 1349 parameters=model_parameters, 1350 ) 1351 interpolated_partition_field_end = InterpolatedString.create( 1352 datetime_based_cursor_model.partition_field_end or "end_time", 1353 parameters=model_parameters, 1354 ) 1355 1356 slice_boundary_fields = ( 1357 interpolated_partition_field_start.eval(config=config), 1358 interpolated_partition_field_end.eval(config=config), 1359 ) 1360 1361 datetime_format = datetime_based_cursor_model.datetime_format 1362 1363 cursor_granularity = ( 1364 parse_duration(datetime_based_cursor_model.cursor_granularity) 1365 if datetime_based_cursor_model.cursor_granularity 1366 else None 1367 ) 1368 1369 lookback_window = None 1370 interpolated_lookback_window = ( 1371 InterpolatedString.create( 1372 datetime_based_cursor_model.lookback_window, 1373 parameters=model_parameters, 1374 ) 1375 if datetime_based_cursor_model.lookback_window 1376 else None 1377 ) 1378 if interpolated_lookback_window: 1379 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1380 if evaluated_lookback_window: 1381 lookback_window = parse_duration(evaluated_lookback_window) 1382 1383 connector_state_converter: DateTimeStreamStateConverter 1384 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1385 datetime_format=datetime_format, 1386 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1387 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1388 cursor_granularity=cursor_granularity, 1389 ) 1390 1391 # Adjusts the stream state by applying the runtime lookback window. 1392 # This is used to ensure correct state handling in case of failed partitions. 1393 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1394 if runtime_lookback_window and stream_state_value: 1395 new_stream_state = ( 1396 connector_state_converter.parse_timestamp(stream_state_value) 1397 - runtime_lookback_window 1398 ) 1399 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1400 new_stream_state 1401 ) 1402 1403 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1404 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1405 start_date_runtime_value = self.create_min_max_datetime( 1406 model=datetime_based_cursor_model.start_datetime, config=config 1407 ) 1408 else: 1409 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1410 1411 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1412 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1413 end_date_runtime_value = self.create_min_max_datetime( 1414 model=datetime_based_cursor_model.end_datetime, config=config 1415 ) 1416 else: 1417 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1418 1419 interpolated_start_date = MinMaxDatetime.create( 1420 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1421 parameters=datetime_based_cursor_model.parameters, 1422 ) 1423 interpolated_end_date = ( 1424 None 1425 if not end_date_runtime_value 1426 else MinMaxDatetime.create( 1427 end_date_runtime_value, datetime_based_cursor_model.parameters 1428 ) 1429 ) 1430 1431 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1432 if not interpolated_start_date.datetime_format: 1433 interpolated_start_date.datetime_format = datetime_format 1434 if interpolated_end_date and not interpolated_end_date.datetime_format: 1435 interpolated_end_date.datetime_format = datetime_format 1436 1437 start_date = interpolated_start_date.get_datetime(config=config) 1438 end_date_provider = ( 1439 partial(interpolated_end_date.get_datetime, config) 1440 if interpolated_end_date 1441 else connector_state_converter.get_end_provider() 1442 ) 1443 1444 if ( 1445 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1446 ) or ( 1447 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1448 ): 1449 raise ValueError( 1450 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1451 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1452 ) 1453 1454 # When step is not defined, default to a step size from the starting date to the present moment 1455 step_length = datetime.timedelta.max 1456 interpolated_step = ( 1457 InterpolatedString.create( 1458 datetime_based_cursor_model.step, 1459 parameters=model_parameters, 1460 ) 1461 if datetime_based_cursor_model.step 1462 else None 1463 ) 1464 if interpolated_step: 1465 evaluated_step = interpolated_step.eval(config) 1466 if evaluated_step: 1467 step_length = parse_duration(evaluated_step) 1468 1469 clamping_strategy: ClampingStrategy = NoClamping() 1470 if datetime_based_cursor_model.clamping: 1471 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1472 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1473 # object which we want to keep agnostic of being low-code 1474 target = InterpolatedString( 1475 string=datetime_based_cursor_model.clamping.target, 1476 parameters=model_parameters, 1477 ) 1478 evaluated_target = target.eval(config=config) 1479 match evaluated_target: 1480 case "DAY": 1481 clamping_strategy = DayClampingStrategy() 1482 end_date_provider = ClampingEndProvider( 1483 DayClampingStrategy(is_ceiling=False), 1484 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1485 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1486 ) 1487 case "WEEK": 1488 if ( 1489 not datetime_based_cursor_model.clamping.target_details 1490 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1491 ): 1492 raise ValueError( 1493 "Given WEEK clamping, weekday needs to be provided as target_details" 1494 ) 1495 weekday = self._assemble_weekday( 1496 datetime_based_cursor_model.clamping.target_details["weekday"] 1497 ) 1498 clamping_strategy = WeekClampingStrategy(weekday) 1499 end_date_provider = ClampingEndProvider( 1500 WeekClampingStrategy(weekday, is_ceiling=False), 1501 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1502 granularity=cursor_granularity or datetime.timedelta(days=1), 1503 ) 1504 case "MONTH": 1505 clamping_strategy = MonthClampingStrategy() 1506 end_date_provider = ClampingEndProvider( 1507 MonthClampingStrategy(is_ceiling=False), 1508 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1509 granularity=cursor_granularity or datetime.timedelta(days=1), 1510 ) 1511 case _: 1512 raise ValueError( 1513 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1514 ) 1515 1516 return ConcurrentCursor( 1517 stream_name=stream_name, 1518 stream_namespace=stream_namespace, 1519 stream_state=stream_state, 1520 message_repository=message_repository or self._message_repository, 1521 connector_state_manager=self._connector_state_manager, 1522 connector_state_converter=connector_state_converter, 1523 cursor_field=cursor_field, 1524 slice_boundary_fields=slice_boundary_fields, 1525 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1526 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1527 lookback_window=lookback_window, 1528 slice_range=step_length, 1529 cursor_granularity=cursor_granularity, 1530 clamping_strategy=clamping_strategy, 1531 ) 1532 1533 def create_concurrent_cursor_from_incrementing_count_cursor( 1534 self, 1535 model_type: Type[BaseModel], 1536 component_definition: ComponentDefinition, 1537 stream_name: str, 1538 stream_namespace: Optional[str], 1539 stream_state: MutableMapping[str, Any], 1540 config: Config, 1541 message_repository: Optional[MessageRepository] = None, 1542 **kwargs: Any, 1543 ) -> ConcurrentCursor: 1544 component_type = component_definition.get("type") 1545 if component_definition.get("type") != model_type.__name__: 1546 raise ValueError( 1547 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1548 ) 1549 1550 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1551 1552 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1553 raise ValueError( 1554 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1555 ) 1556 1557 interpolated_start_value = ( 1558 InterpolatedString.create( 1559 incrementing_count_cursor_model.start_value, # type: ignore 1560 parameters=incrementing_count_cursor_model.parameters or {}, 1561 ) 1562 if incrementing_count_cursor_model.start_value 1563 else 0 1564 ) 1565 1566 cursor_field = self._get_catalog_defined_cursor_field( 1567 stream_name=stream_name, 1568 allow_catalog_defined_cursor_field=incrementing_count_cursor_model.allow_catalog_defined_cursor_field 1569 or False, 1570 ) 1571 1572 if not cursor_field: 1573 interpolated_cursor_field = InterpolatedString.create( 1574 incrementing_count_cursor_model.cursor_field, 1575 parameters=incrementing_count_cursor_model.parameters or {}, 1576 ) 1577 cursor_field = CursorField( 1578 cursor_field_key=interpolated_cursor_field.eval(config=config), 1579 supports_catalog_defined_cursor_field=incrementing_count_cursor_model.allow_catalog_defined_cursor_field 1580 or False, 1581 ) 1582 1583 connector_state_converter = IncrementingCountStreamStateConverter( 1584 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1585 ) 1586 1587 return ConcurrentCursor( 1588 stream_name=stream_name, 1589 stream_namespace=stream_namespace, 1590 stream_state=stream_state, 1591 message_repository=message_repository or self._message_repository, 1592 connector_state_manager=self._connector_state_manager, 1593 connector_state_converter=connector_state_converter, 1594 cursor_field=cursor_field, 1595 slice_boundary_fields=None, 1596 start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1597 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1598 ) 1599 1600 def _assemble_weekday(self, weekday: str) -> Weekday: 1601 match weekday: 1602 case "MONDAY": 1603 return Weekday.MONDAY 1604 case "TUESDAY": 1605 return Weekday.TUESDAY 1606 case "WEDNESDAY": 1607 return Weekday.WEDNESDAY 1608 case "THURSDAY": 1609 return Weekday.THURSDAY 1610 case "FRIDAY": 1611 return Weekday.FRIDAY 1612 case "SATURDAY": 1613 return Weekday.SATURDAY 1614 case "SUNDAY": 1615 return Weekday.SUNDAY 1616 case _: 1617 raise ValueError(f"Unknown weekday {weekday}") 1618 1619 def create_concurrent_cursor_from_perpartition_cursor( 1620 self, 1621 state_manager: ConnectorStateManager, 1622 model_type: Type[BaseModel], 1623 component_definition: ComponentDefinition, 1624 stream_name: str, 1625 stream_namespace: Optional[str], 1626 config: Config, 1627 stream_state: MutableMapping[str, Any], 1628 partition_router: PartitionRouter, 1629 attempt_to_create_cursor_if_not_provided: bool = False, 1630 **kwargs: Any, 1631 ) -> ConcurrentPerPartitionCursor: 1632 component_type = component_definition.get("type") 1633 if component_definition.get("type") != model_type.__name__: 1634 raise ValueError( 1635 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1636 ) 1637 1638 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1639 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1640 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1641 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1642 if "$parameters" not in component_definition and "parameters" in component_definition: 1643 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1644 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1645 1646 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1647 raise ValueError( 1648 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1649 ) 1650 1651 cursor_field = self._get_catalog_defined_cursor_field( 1652 stream_name=stream_name, 1653 allow_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1654 or False, 1655 ) 1656 1657 if not cursor_field: 1658 interpolated_cursor_field = InterpolatedString.create( 1659 datetime_based_cursor_model.cursor_field, 1660 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1661 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1662 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1663 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1664 parameters=datetime_based_cursor_model.parameters or {}, 1665 ) 1666 cursor_field = CursorField( 1667 cursor_field_key=interpolated_cursor_field.eval(config=config), 1668 supports_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1669 or False, 1670 ) 1671 1672 datetime_format = datetime_based_cursor_model.datetime_format 1673 1674 cursor_granularity = ( 1675 parse_duration(datetime_based_cursor_model.cursor_granularity) 1676 if datetime_based_cursor_model.cursor_granularity 1677 else None 1678 ) 1679 1680 connector_state_converter: DateTimeStreamStateConverter 1681 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1682 datetime_format=datetime_format, 1683 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1684 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1685 cursor_granularity=cursor_granularity, 1686 ) 1687 1688 # Create the cursor factory 1689 cursor_factory = ConcurrentCursorFactory( 1690 partial( 1691 self.create_concurrent_cursor_from_datetime_based_cursor, 1692 state_manager=state_manager, 1693 model_type=model_type, 1694 component_definition=component_definition, 1695 stream_name=stream_name, 1696 stream_namespace=stream_namespace, 1697 config=config, 1698 message_repository=NoopMessageRepository(), 1699 ) 1700 ) 1701 1702 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state 1703 use_global_cursor = isinstance( 1704 partition_router, GroupingPartitionRouter 1705 ) or component_definition.get("global_substream_cursor", False) 1706 1707 # Return the concurrent cursor and state converter 1708 return ConcurrentPerPartitionCursor( 1709 cursor_factory=cursor_factory, 1710 partition_router=partition_router, 1711 stream_name=stream_name, 1712 stream_namespace=stream_namespace, 1713 stream_state=stream_state, 1714 message_repository=self._message_repository, # type: ignore 1715 connector_state_manager=state_manager, 1716 connector_state_converter=connector_state_converter, 1717 cursor_field=cursor_field, 1718 use_global_cursor=use_global_cursor, 1719 attempt_to_create_cursor_if_not_provided=attempt_to_create_cursor_if_not_provided, 1720 ) 1721 1722 @staticmethod 1723 def create_constant_backoff_strategy( 1724 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1725 ) -> ConstantBackoffStrategy: 1726 return ConstantBackoffStrategy( 1727 backoff_time_in_seconds=model.backoff_time_in_seconds, 1728 config=config, 1729 parameters=model.parameters or {}, 1730 ) 1731 1732 def create_cursor_pagination( 1733 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1734 ) -> CursorPaginationStrategy: 1735 if isinstance(decoder, PaginationDecoderDecorator): 1736 inner_decoder = decoder.decoder 1737 else: 1738 inner_decoder = decoder 1739 decoder = PaginationDecoderDecorator(decoder=decoder) 1740 1741 if self._is_supported_decoder_for_pagination(inner_decoder): 1742 decoder_to_use = decoder 1743 else: 1744 raise ValueError( 1745 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1746 ) 1747 1748 return CursorPaginationStrategy( 1749 cursor_value=model.cursor_value, 1750 decoder=decoder_to_use, 1751 page_size=model.page_size, 1752 stop_condition=model.stop_condition, 1753 config=config, 1754 parameters=model.parameters or {}, 1755 ) 1756 1757 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1758 """ 1759 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1760 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1761 :param model: The Pydantic model of the custom component being created 1762 :param config: The custom defined connector config 1763 :return: The declarative component built from the Pydantic model to be used at runtime 1764 """ 1765 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1766 component_fields = get_type_hints(custom_component_class) 1767 model_args = model.dict() 1768 model_args["config"] = config 1769 1770 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1771 # we defer to these arguments over the component's definition 1772 for key, arg in kwargs.items(): 1773 model_args[key] = arg 1774 1775 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1776 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1777 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1778 for model_field, model_value in model_args.items(): 1779 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1780 if ( 1781 isinstance(model_value, dict) 1782 and "type" not in model_value 1783 and model_field in component_fields 1784 ): 1785 derived_type = self._derive_component_type_from_type_hints( 1786 component_fields.get(model_field) 1787 ) 1788 if derived_type: 1789 model_value["type"] = derived_type 1790 1791 if self._is_component(model_value): 1792 model_args[model_field] = self._create_nested_component( 1793 model, 1794 model_field, 1795 model_value, 1796 config, 1797 **kwargs, 1798 ) 1799 elif isinstance(model_value, list): 1800 vals = [] 1801 for v in model_value: 1802 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1803 derived_type = self._derive_component_type_from_type_hints( 1804 component_fields.get(model_field) 1805 ) 1806 if derived_type: 1807 v["type"] = derived_type 1808 if self._is_component(v): 1809 vals.append( 1810 self._create_nested_component( 1811 model, 1812 model_field, 1813 v, 1814 config, 1815 **kwargs, 1816 ) 1817 ) 1818 else: 1819 vals.append(v) 1820 model_args[model_field] = vals 1821 1822 kwargs = { 1823 class_field: model_args[class_field] 1824 for class_field in component_fields.keys() 1825 if class_field in model_args 1826 } 1827 return custom_component_class(**kwargs) 1828 1829 @staticmethod 1830 def _get_class_from_fully_qualified_class_name( 1831 full_qualified_class_name: str, 1832 ) -> Any: 1833 """Get a class from its fully qualified name. 1834 1835 If a custom components module is needed, we assume it is already registered - probably 1836 as `source_declarative_manifest.components` or `components`. 1837 1838 Args: 1839 full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). 1840 1841 Returns: 1842 Any: The class object. 1843 1844 Raises: 1845 ValueError: If the class cannot be loaded. 1846 """ 1847 split = full_qualified_class_name.split(".") 1848 module_name_full = ".".join(split[:-1]) 1849 class_name = split[-1] 1850 1851 try: 1852 module_ref = importlib.import_module(module_name_full) 1853 except ModuleNotFoundError as e: 1854 if split[0] == "source_declarative_manifest": 1855 # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append 1856 try: 1857 import os 1858 1859 module_name_with_source_declarative_manifest = ".".join(split[1:-1]) 1860 module_ref = importlib.import_module( 1861 module_name_with_source_declarative_manifest 1862 ) 1863 except ModuleNotFoundError: 1864 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1865 else: 1866 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1867 1868 try: 1869 return getattr(module_ref, class_name) 1870 except AttributeError as e: 1871 raise ValueError( 1872 f"Could not load class `{class_name}` from module `{module_name_full}`.", 1873 ) from e 1874 1875 @staticmethod 1876 def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: 1877 interface = field_type 1878 while True: 1879 origin = get_origin(interface) 1880 if origin: 1881 # Unnest types until we reach the raw type 1882 # List[T] -> T 1883 # Optional[List[T]] -> T 1884 args = get_args(interface) 1885 interface = args[0] 1886 else: 1887 break 1888 if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface): 1889 return interface.__name__ 1890 return None 1891 1892 @staticmethod 1893 def is_builtin_type(cls: Optional[Type[Any]]) -> bool: 1894 if not cls: 1895 return False 1896 return cls.__module__ == "builtins" 1897 1898 @staticmethod 1899 def _extract_missing_parameters(error: TypeError) -> List[str]: 1900 parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error)) 1901 if parameter_search: 1902 return re.findall(r"\'(.+?)\'", parameter_search.group(1)) 1903 else: 1904 return [] 1905 1906 def _create_nested_component( 1907 self, model: Any, model_field: str, model_value: Any, config: Config, **kwargs: Any 1908 ) -> Any: 1909 type_name = model_value.get("type", None) 1910 if not type_name: 1911 # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent 1912 return model_value 1913 1914 model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None) 1915 if model_type: 1916 parsed_model = model_type.parse_obj(model_value) 1917 try: 1918 # To improve usability of the language, certain fields are shared between components. This can come in the form of 1919 # a parent component passing some of its fields to a child component or the parent extracting fields from other child 1920 # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base 1921 # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created 1922 # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that 1923 # are needed by a component and could not be shared. 1924 model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__) 1925 constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs 1926 model_parameters = model_value.get("$parameters", {}) 1927 matching_parameters = { 1928 kwarg: model_parameters[kwarg] 1929 for kwarg in constructor_kwargs 1930 if kwarg in model_parameters 1931 } 1932 matching_kwargs = { 1933 kwarg: kwargs[kwarg] for kwarg in constructor_kwargs if kwarg in kwargs 1934 } 1935 return self._create_component_from_model( 1936 model=parsed_model, config=config, **(matching_parameters | matching_kwargs) 1937 ) 1938 except TypeError as error: 1939 missing_parameters = self._extract_missing_parameters(error) 1940 if missing_parameters: 1941 raise ValueError( 1942 f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " 1943 + ", ".join( 1944 ( 1945 f"{type_name}.$parameters.{parameter}" 1946 for parameter in missing_parameters 1947 ) 1948 ) 1949 ) 1950 raise TypeError( 1951 f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" 1952 ) 1953 else: 1954 raise ValueError( 1955 f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" 1956 ) 1957 1958 @staticmethod 1959 def _is_component(model_value: Any) -> bool: 1960 return isinstance(model_value, dict) and model_value.get("type") is not None 1961 1962 def create_default_stream( 1963 self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any 1964 ) -> AbstractStream: 1965 primary_key = model.primary_key.__root__ if model.primary_key else None 1966 self._migrate_state(model, config) 1967 1968 partition_router = self._build_stream_slicer_from_partition_router( 1969 model.retriever, 1970 config, 1971 stream_name=model.name, 1972 **kwargs, 1973 ) 1974 concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config) 1975 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1976 cursor_model: DatetimeBasedCursorModel = model.incremental_sync 1977 1978 end_time_option = ( 1979 self._create_component_from_model( 1980 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 1981 ) 1982 if cursor_model.end_time_option 1983 else None 1984 ) 1985 start_time_option = ( 1986 self._create_component_from_model( 1987 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 1988 ) 1989 if cursor_model.start_time_option 1990 else None 1991 ) 1992 1993 datetime_request_options_provider = DatetimeBasedRequestOptionsProvider( 1994 start_time_option=start_time_option, 1995 end_time_option=end_time_option, 1996 partition_field_start=cursor_model.partition_field_start, 1997 partition_field_end=cursor_model.partition_field_end, 1998 config=config, 1999 parameters=model.parameters or {}, 2000 ) 2001 request_options_provider = ( 2002 datetime_request_options_provider 2003 if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor) 2004 else PerPartitionRequestOptionsProvider( 2005 partition_router, datetime_request_options_provider 2006 ) 2007 ) 2008 elif model.incremental_sync and isinstance( 2009 model.incremental_sync, IncrementingCountCursorModel 2010 ): 2011 if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor): 2012 raise ValueError( 2013 "PerPartition does not support per partition states because switching to global state is time based" 2014 ) 2015 2016 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 2017 2018 start_time_option = ( 2019 self._create_component_from_model( 2020 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2021 config, 2022 parameters=cursor_model.parameters or {}, 2023 ) 2024 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2025 else None 2026 ) 2027 2028 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 2029 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 2030 partition_field_start = "start" 2031 2032 request_options_provider = DatetimeBasedRequestOptionsProvider( 2033 start_time_option=start_time_option, 2034 partition_field_start=partition_field_start, 2035 config=config, 2036 parameters=model.parameters or {}, 2037 ) 2038 else: 2039 request_options_provider = None 2040 2041 transformations = [] 2042 if model.transformations: 2043 for transformation_model in model.transformations: 2044 transformations.append( 2045 self._create_component_from_model(model=transformation_model, config=config) 2046 ) 2047 file_uploader = None 2048 if model.file_uploader: 2049 file_uploader = self._create_component_from_model( 2050 model=model.file_uploader, config=config 2051 ) 2052 2053 stream_slicer: ConcurrentStreamSlicer = ( 2054 partition_router 2055 if isinstance(concurrent_cursor, FinalStateCursor) 2056 else concurrent_cursor 2057 ) 2058 2059 retriever = self._create_component_from_model( 2060 model=model.retriever, 2061 config=config, 2062 name=model.name, 2063 primary_key=primary_key, 2064 request_options_provider=request_options_provider, 2065 stream_slicer=stream_slicer, 2066 partition_router=partition_router, 2067 has_stop_condition_cursor=self._is_stop_condition_on_cursor(model), 2068 is_client_side_incremental_sync=self._is_client_side_filtering_enabled(model), 2069 cursor=concurrent_cursor, 2070 transformations=transformations, 2071 file_uploader=file_uploader, 2072 incremental_sync=model.incremental_sync, 2073 ) 2074 if isinstance(retriever, AsyncRetriever): 2075 stream_slicer = retriever.stream_slicer 2076 2077 schema_loader: SchemaLoader 2078 if model.schema_loader and isinstance(model.schema_loader, list): 2079 nested_schema_loaders = [ 2080 self._create_component_from_model(model=nested_schema_loader, config=config) 2081 for nested_schema_loader in model.schema_loader 2082 ] 2083 schema_loader = CompositeSchemaLoader( 2084 schema_loaders=nested_schema_loaders, parameters={} 2085 ) 2086 elif model.schema_loader: 2087 schema_loader = self._create_component_from_model( 2088 model=model.schema_loader, # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models 2089 config=config, 2090 ) 2091 else: 2092 options = model.parameters or {} 2093 if "name" not in options: 2094 options["name"] = model.name 2095 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 2096 schema_loader = CachingSchemaLoaderDecorator(schema_loader) 2097 2098 stream_name = model.name or "" 2099 return DefaultStream( 2100 partition_generator=StreamSlicerPartitionGenerator( 2101 DeclarativePartitionFactory( 2102 stream_name, 2103 schema_loader, 2104 retriever, 2105 self._message_repository, 2106 ), 2107 stream_slicer, 2108 slice_limit=self._limit_slices_fetched, 2109 ), 2110 name=stream_name, 2111 json_schema=schema_loader.get_json_schema, 2112 primary_key=get_primary_key_from_stream(primary_key), 2113 cursor_field=concurrent_cursor.cursor_field 2114 if hasattr(concurrent_cursor, "cursor_field") 2115 else CursorField( 2116 cursor_field_key="" 2117 ), # FIXME we should have the cursor field has part of the interface of cursor, 2118 logger=logging.getLogger(f"airbyte.{stream_name}"), 2119 cursor=concurrent_cursor, 2120 supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader), 2121 ) 2122 2123 def _migrate_state(self, model: DeclarativeStreamModel, config: Config) -> None: 2124 stream_name = model.name or "" 2125 stream_state = self._connector_state_manager.get_stream_state( 2126 stream_name=stream_name, namespace=None 2127 ) 2128 if model.state_migrations: 2129 state_transformations = [ 2130 self._create_component_from_model(state_migration, config, declarative_stream=model) 2131 for state_migration in model.state_migrations 2132 ] 2133 else: 2134 state_transformations = [] 2135 stream_state = self.apply_stream_state_migrations(state_transformations, stream_state) 2136 self._connector_state_manager.update_state_for_stream( 2137 stream_name=stream_name, namespace=None, value=stream_state 2138 ) 2139 2140 def _is_stop_condition_on_cursor(self, model: DeclarativeStreamModel) -> bool: 2141 return bool( 2142 model.incremental_sync 2143 and hasattr(model.incremental_sync, "is_data_feed") 2144 and model.incremental_sync.is_data_feed 2145 ) 2146 2147 def _is_client_side_filtering_enabled(self, model: DeclarativeStreamModel) -> bool: 2148 return bool( 2149 model.incremental_sync 2150 and hasattr(model.incremental_sync, "is_client_side_incremental") 2151 and model.incremental_sync.is_client_side_incremental 2152 ) 2153 2154 def _build_stream_slicer_from_partition_router( 2155 self, 2156 model: Union[ 2157 AsyncRetrieverModel, 2158 CustomRetrieverModel, 2159 SimpleRetrieverModel, 2160 ], 2161 config: Config, 2162 stream_name: Optional[str] = None, 2163 **kwargs: Any, 2164 ) -> PartitionRouter: 2165 if ( 2166 hasattr(model, "partition_router") 2167 and isinstance(model, (SimpleRetrieverModel, AsyncRetrieverModel, CustomRetrieverModel)) 2168 and model.partition_router 2169 ): 2170 stream_slicer_model = model.partition_router 2171 if isinstance(stream_slicer_model, list): 2172 return CartesianProductStreamSlicer( 2173 [ 2174 self._create_component_from_model( 2175 model=slicer, config=config, stream_name=stream_name or "" 2176 ) 2177 for slicer in stream_slicer_model 2178 ], 2179 parameters={}, 2180 ) 2181 elif isinstance(stream_slicer_model, dict): 2182 # partition router comes from CustomRetrieverModel therefore has not been parsed as a model 2183 params = stream_slicer_model.get("$parameters") 2184 if not isinstance(params, dict): 2185 params = {} 2186 stream_slicer_model["$parameters"] = params 2187 2188 if stream_name is not None: 2189 params["stream_name"] = stream_name 2190 2191 return self._create_nested_component( # type: ignore[no-any-return] # There is no guarantee that this will return a stream slicer. If not, we expect an AttributeError during the call to `stream_slices` 2192 model, 2193 "partition_router", 2194 stream_slicer_model, 2195 config, 2196 **kwargs, 2197 ) 2198 else: 2199 return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router 2200 model=stream_slicer_model, config=config, stream_name=stream_name or "" 2201 ) 2202 return SinglePartitionRouter(parameters={}) 2203 2204 def _build_concurrent_cursor( 2205 self, 2206 model: DeclarativeStreamModel, 2207 stream_slicer: Optional[PartitionRouter], 2208 config: Config, 2209 ) -> Cursor: 2210 stream_name = model.name or "" 2211 stream_state = self._connector_state_manager.get_stream_state(stream_name, None) 2212 2213 if ( 2214 model.incremental_sync 2215 and stream_slicer 2216 and not isinstance(stream_slicer, SinglePartitionRouter) 2217 ): 2218 if isinstance(model.incremental_sync, IncrementingCountCursorModel): 2219 # We don't currently support usage of partition routing and IncrementingCountCursor at the 2220 # same time because we didn't solve for design questions like what the lookback window would 2221 # be as well as global cursor fall backs. We have not seen customers that have needed both 2222 # at the same time yet and are currently punting on this until we need to solve it. 2223 raise ValueError( 2224 f"The low-code framework does not currently support usage of a PartitionRouter and an IncrementingCountCursor at the same time. Please specify only one of these options for stream {stream_name}." 2225 ) 2226 return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2227 state_manager=self._connector_state_manager, 2228 model_type=DatetimeBasedCursorModel, 2229 component_definition=model.incremental_sync.__dict__, 2230 stream_name=stream_name, 2231 stream_state=stream_state, 2232 stream_namespace=None, 2233 config=config or {}, 2234 partition_router=stream_slicer, 2235 attempt_to_create_cursor_if_not_provided=True, # FIXME can we remove that now? 2236 ) 2237 elif model.incremental_sync: 2238 if type(model.incremental_sync) == IncrementingCountCursorModel: 2239 return self.create_concurrent_cursor_from_incrementing_count_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2240 model_type=IncrementingCountCursorModel, 2241 component_definition=model.incremental_sync.__dict__, 2242 stream_name=stream_name, 2243 stream_namespace=None, 2244 stream_state=stream_state, 2245 config=config or {}, 2246 ) 2247 elif type(model.incremental_sync) == DatetimeBasedCursorModel: 2248 return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2249 model_type=type(model.incremental_sync), 2250 component_definition=model.incremental_sync.__dict__, 2251 stream_name=stream_name, 2252 stream_namespace=None, 2253 stream_state=stream_state, 2254 config=config or {}, 2255 attempt_to_create_cursor_if_not_provided=True, 2256 ) 2257 else: 2258 raise ValueError( 2259 f"Incremental sync of type {type(model.incremental_sync)} is not supported" 2260 ) 2261 return FinalStateCursor(stream_name, None, self._message_repository) 2262 2263 def create_default_error_handler( 2264 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 2265 ) -> DefaultErrorHandler: 2266 backoff_strategies = [] 2267 if model.backoff_strategies: 2268 for backoff_strategy_model in model.backoff_strategies: 2269 backoff_strategies.append( 2270 self._create_component_from_model(model=backoff_strategy_model, config=config) 2271 ) 2272 2273 response_filters = [] 2274 if model.response_filters: 2275 for response_filter_model in model.response_filters: 2276 response_filters.append( 2277 self._create_component_from_model(model=response_filter_model, config=config) 2278 ) 2279 response_filters.append( 2280 HttpResponseFilter(config=config, parameters=model.parameters or {}) 2281 ) 2282 2283 return DefaultErrorHandler( 2284 backoff_strategies=backoff_strategies, 2285 max_retries=model.max_retries, 2286 response_filters=response_filters, 2287 config=config, 2288 parameters=model.parameters or {}, 2289 ) 2290 2291 def create_default_paginator( 2292 self, 2293 model: DefaultPaginatorModel, 2294 config: Config, 2295 *, 2296 url_base: str, 2297 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2298 decoder: Optional[Decoder] = None, 2299 cursor_used_for_stop_condition: Optional[Cursor] = None, 2300 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 2301 if decoder: 2302 if self._is_supported_decoder_for_pagination(decoder): 2303 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 2304 else: 2305 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 2306 else: 2307 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 2308 page_size_option = ( 2309 self._create_component_from_model(model=model.page_size_option, config=config) 2310 if model.page_size_option 2311 else None 2312 ) 2313 page_token_option = ( 2314 self._create_component_from_model(model=model.page_token_option, config=config) 2315 if model.page_token_option 2316 else None 2317 ) 2318 pagination_strategy = self._create_component_from_model( 2319 model=model.pagination_strategy, 2320 config=config, 2321 decoder=decoder_to_use, 2322 extractor_model=extractor_model, 2323 ) 2324 if cursor_used_for_stop_condition: 2325 pagination_strategy = StopConditionPaginationStrategyDecorator( 2326 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2327 ) 2328 paginator = DefaultPaginator( 2329 decoder=decoder_to_use, 2330 page_size_option=page_size_option, 2331 page_token_option=page_token_option, 2332 pagination_strategy=pagination_strategy, 2333 url_base=url_base, 2334 config=config, 2335 parameters=model.parameters or {}, 2336 ) 2337 if self._limit_pages_fetched_per_slice: 2338 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2339 return paginator 2340 2341 def create_dpath_extractor( 2342 self, 2343 model: DpathExtractorModel, 2344 config: Config, 2345 decoder: Optional[Decoder] = None, 2346 **kwargs: Any, 2347 ) -> DpathExtractor: 2348 if decoder: 2349 decoder_to_use = decoder 2350 else: 2351 decoder_to_use = JsonDecoder(parameters={}) 2352 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2353 return DpathExtractor( 2354 decoder=decoder_to_use, 2355 field_path=model_field_path, 2356 config=config, 2357 parameters=model.parameters or {}, 2358 ) 2359 2360 @staticmethod 2361 def create_response_to_file_extractor( 2362 model: ResponseToFileExtractorModel, 2363 **kwargs: Any, 2364 ) -> ResponseToFileExtractor: 2365 return ResponseToFileExtractor(parameters=model.parameters or {}) 2366 2367 @staticmethod 2368 def create_exponential_backoff_strategy( 2369 model: ExponentialBackoffStrategyModel, config: Config 2370 ) -> ExponentialBackoffStrategy: 2371 return ExponentialBackoffStrategy( 2372 factor=model.factor or 5, parameters=model.parameters or {}, config=config 2373 ) 2374 2375 @staticmethod 2376 def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey: 2377 return GroupByKey(model.key, config=config, parameters=model.parameters or {}) 2378 2379 def create_http_requester( 2380 self, 2381 model: HttpRequesterModel, 2382 config: Config, 2383 decoder: Decoder = JsonDecoder(parameters={}), 2384 query_properties_key: Optional[str] = None, 2385 use_cache: Optional[bool] = None, 2386 *, 2387 name: str, 2388 ) -> HttpRequester: 2389 authenticator = ( 2390 self._create_component_from_model( 2391 model=model.authenticator, 2392 config=config, 2393 url_base=model.url or model.url_base, 2394 name=name, 2395 decoder=decoder, 2396 ) 2397 if model.authenticator 2398 else None 2399 ) 2400 error_handler = ( 2401 self._create_component_from_model(model=model.error_handler, config=config) 2402 if model.error_handler 2403 else DefaultErrorHandler( 2404 backoff_strategies=[], 2405 response_filters=[], 2406 config=config, 2407 parameters=model.parameters or {}, 2408 ) 2409 ) 2410 2411 api_budget = self._api_budget 2412 2413 request_options_provider = InterpolatedRequestOptionsProvider( 2414 request_body=model.request_body, 2415 request_body_data=model.request_body_data, 2416 request_body_json=model.request_body_json, 2417 request_headers=model.request_headers, 2418 request_parameters=model.request_parameters, # type: ignore # QueryProperties have been removed in `create_simple_retriever` 2419 query_properties_key=query_properties_key, 2420 config=config, 2421 parameters=model.parameters or {}, 2422 ) 2423 2424 assert model.use_cache is not None # for mypy 2425 assert model.http_method is not None # for mypy 2426 2427 should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache 2428 2429 return HttpRequester( 2430 name=name, 2431 url=model.url, 2432 url_base=model.url_base, 2433 path=model.path, 2434 authenticator=authenticator, 2435 error_handler=error_handler, 2436 api_budget=api_budget, 2437 http_method=HttpMethod[model.http_method.value], 2438 request_options_provider=request_options_provider, 2439 config=config, 2440 disable_retries=self._disable_retries, 2441 parameters=model.parameters or {}, 2442 message_repository=self._message_repository, 2443 use_cache=should_use_cache, 2444 decoder=decoder, 2445 stream_response=decoder.is_stream_response() if decoder else False, 2446 ) 2447 2448 @staticmethod 2449 def create_http_response_filter( 2450 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2451 ) -> HttpResponseFilter: 2452 if model.action: 2453 action = ResponseAction(model.action.value) 2454 else: 2455 action = None 2456 2457 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2458 2459 http_codes = ( 2460 set(model.http_codes) if model.http_codes else set() 2461 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2462 2463 return HttpResponseFilter( 2464 action=action, 2465 failure_type=failure_type, 2466 error_message=model.error_message or "", 2467 error_message_contains=model.error_message_contains or "", 2468 http_codes=http_codes, 2469 predicate=model.predicate or "", 2470 config=config, 2471 parameters=model.parameters or {}, 2472 ) 2473 2474 @staticmethod 2475 def create_inline_schema_loader( 2476 model: InlineSchemaLoaderModel, config: Config, **kwargs: Any 2477 ) -> InlineSchemaLoader: 2478 return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) 2479 2480 def create_complex_field_type( 2481 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2482 ) -> ComplexFieldType: 2483 items = ( 2484 self._create_component_from_model(model=model.items, config=config) 2485 if isinstance(model.items, ComplexFieldTypeModel) 2486 else model.items 2487 ) 2488 2489 return ComplexFieldType(field_type=model.field_type, items=items) 2490 2491 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2492 target_type = ( 2493 self._create_component_from_model(model=model.target_type, config=config) 2494 if isinstance(model.target_type, ComplexFieldTypeModel) 2495 else model.target_type 2496 ) 2497 2498 return TypesMap( 2499 target_type=target_type, 2500 current_type=model.current_type, 2501 condition=model.condition if model.condition is not None else "True", 2502 ) 2503 2504 def create_schema_type_identifier( 2505 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2506 ) -> SchemaTypeIdentifier: 2507 types_mapping = [] 2508 if model.types_mapping: 2509 types_mapping.extend( 2510 [ 2511 self._create_component_from_model(types_map, config=config) 2512 for types_map in model.types_mapping 2513 ] 2514 ) 2515 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2516 [x for x in model.schema_pointer] if model.schema_pointer else [] 2517 ) 2518 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2519 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2520 [x for x in model.type_pointer] if model.type_pointer else None 2521 ) 2522 2523 return SchemaTypeIdentifier( 2524 schema_pointer=model_schema_pointer, 2525 key_pointer=model_key_pointer, 2526 type_pointer=model_type_pointer, 2527 types_mapping=types_mapping, 2528 parameters=model.parameters or {}, 2529 ) 2530 2531 def create_dynamic_schema_loader( 2532 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2533 ) -> DynamicSchemaLoader: 2534 schema_transformations = [] 2535 if model.schema_transformations: 2536 for transformation_model in model.schema_transformations: 2537 schema_transformations.append( 2538 self._create_component_from_model(model=transformation_model, config=config) 2539 ) 2540 name = "dynamic_properties" 2541 retriever = self._create_component_from_model( 2542 model=model.retriever, 2543 config=config, 2544 name=name, 2545 primary_key=None, 2546 partition_router=self._build_stream_slicer_from_partition_router( 2547 model.retriever, config 2548 ), 2549 transformations=[], 2550 use_cache=True, 2551 log_formatter=( 2552 lambda response: format_http_message( 2553 response, 2554 f"Schema loader '{name}' request", 2555 f"Request performed in order to extract schema.", 2556 name, 2557 is_auxiliary=True, 2558 ) 2559 ), 2560 ) 2561 schema_type_identifier = self._create_component_from_model( 2562 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2563 ) 2564 schema_filter = ( 2565 self._create_component_from_model( 2566 model.schema_filter, config=config, parameters=model.parameters or {} 2567 ) 2568 if model.schema_filter is not None 2569 else None 2570 ) 2571 2572 return DynamicSchemaLoader( 2573 retriever=retriever, 2574 config=config, 2575 schema_transformations=schema_transformations, 2576 schema_filter=schema_filter, 2577 schema_type_identifier=schema_type_identifier, 2578 parameters=model.parameters or {}, 2579 ) 2580 2581 @staticmethod 2582 def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2583 return JsonDecoder(parameters={}) 2584 2585 def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2586 return CompositeRawDecoder( 2587 parser=ModelToComponentFactory._get_parser(model, config), 2588 stream_response=False if self._emit_connector_builder_messages else True, 2589 ) 2590 2591 def create_jsonl_decoder( 2592 self, model: JsonlDecoderModel, config: Config, **kwargs: Any 2593 ) -> Decoder: 2594 return CompositeRawDecoder( 2595 parser=ModelToComponentFactory._get_parser(model, config), 2596 stream_response=False if self._emit_connector_builder_messages else True, 2597 ) 2598 2599 def create_gzip_decoder( 2600 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2601 ) -> Decoder: 2602 _compressed_response_types = { 2603 "gzip", 2604 "x-gzip", 2605 "gzip, deflate", 2606 "x-gzip, deflate", 2607 "application/zip", 2608 "application/gzip", 2609 "application/x-gzip", 2610 "application/x-zip-compressed", 2611 } 2612 2613 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2614 2615 if self._emit_connector_builder_messages: 2616 # This is very surprising but if the response is not streamed, 2617 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2618 # which uses urllib3 directly and does not uncompress the data. 2619 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2620 2621 return CompositeRawDecoder.by_headers( 2622 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2623 stream_response=True, 2624 fallback_parser=gzip_parser.inner_parser, 2625 ) 2626 2627 @staticmethod 2628 def create_iterable_decoder( 2629 model: IterableDecoderModel, config: Config, **kwargs: Any 2630 ) -> IterableDecoder: 2631 return IterableDecoder(parameters={}) 2632 2633 @staticmethod 2634 def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder: 2635 return XmlDecoder(parameters={}) 2636 2637 def create_zipfile_decoder( 2638 self, model: ZipfileDecoderModel, config: Config, **kwargs: Any 2639 ) -> ZipfileDecoder: 2640 return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config)) 2641 2642 @staticmethod 2643 def _get_parser(model: BaseModel, config: Config) -> Parser: 2644 if isinstance(model, JsonDecoderModel): 2645 # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases 2646 return JsonParser() 2647 elif isinstance(model, JsonlDecoderModel): 2648 return JsonLineParser() 2649 elif isinstance(model, CsvDecoderModel): 2650 return CsvParser( 2651 encoding=model.encoding, 2652 delimiter=model.delimiter, 2653 set_values_to_none=model.set_values_to_none, 2654 ) 2655 elif isinstance(model, GzipDecoderModel): 2656 return GzipParser( 2657 inner_parser=ModelToComponentFactory._get_parser(model.decoder, config) 2658 ) 2659 elif isinstance( 2660 model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel) 2661 ): 2662 raise ValueError(f"Decoder type {model} does not have parser associated to it") 2663 2664 raise ValueError(f"Unknown decoder type {model}") 2665 2666 @staticmethod 2667 def create_json_file_schema_loader( 2668 model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any 2669 ) -> JsonFileSchemaLoader: 2670 return JsonFileSchemaLoader( 2671 file_path=model.file_path or "", config=config, parameters=model.parameters or {} 2672 ) 2673 2674 def create_jwt_authenticator( 2675 self, model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2676 ) -> JwtAuthenticator: 2677 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2678 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2679 request_option = ( 2680 self._create_component_from_model(model.request_option, config) 2681 if model.request_option 2682 else None 2683 ) 2684 return JwtAuthenticator( 2685 config=config, 2686 parameters=model.parameters or {}, 2687 algorithm=JwtAlgorithm(model.algorithm.value), 2688 secret_key=model.secret_key, 2689 base64_encode_secret_key=model.base64_encode_secret_key, 2690 token_duration=model.token_duration, 2691 header_prefix=model.header_prefix, 2692 kid=jwt_headers.kid, 2693 typ=jwt_headers.typ, 2694 cty=jwt_headers.cty, 2695 iss=jwt_payload.iss, 2696 sub=jwt_payload.sub, 2697 aud=jwt_payload.aud, 2698 additional_jwt_headers=model.additional_jwt_headers, 2699 additional_jwt_payload=model.additional_jwt_payload, 2700 passphrase=model.passphrase, 2701 request_option=request_option, 2702 ) 2703 2704 def create_list_partition_router( 2705 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2706 ) -> ListPartitionRouter: 2707 request_option = ( 2708 self._create_component_from_model(model.request_option, config) 2709 if model.request_option 2710 else None 2711 ) 2712 return ListPartitionRouter( 2713 cursor_field=model.cursor_field, 2714 request_option=request_option, 2715 values=model.values, 2716 config=config, 2717 parameters=model.parameters or {}, 2718 ) 2719 2720 @staticmethod 2721 def create_min_max_datetime( 2722 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2723 ) -> MinMaxDatetime: 2724 return MinMaxDatetime( 2725 datetime=model.datetime, 2726 datetime_format=model.datetime_format or "", 2727 max_datetime=model.max_datetime or "", 2728 min_datetime=model.min_datetime or "", 2729 parameters=model.parameters or {}, 2730 ) 2731 2732 @staticmethod 2733 def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: 2734 return NoAuth(parameters=model.parameters or {}) 2735 2736 @staticmethod 2737 def create_no_pagination( 2738 model: NoPaginationModel, config: Config, **kwargs: Any 2739 ) -> NoPagination: 2740 return NoPagination(parameters={}) 2741 2742 def create_oauth_authenticator( 2743 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2744 ) -> DeclarativeOauth2Authenticator: 2745 profile_assertion = ( 2746 self._create_component_from_model(model.profile_assertion, config=config) 2747 if model.profile_assertion 2748 else None 2749 ) 2750 2751 refresh_token_error_status_codes, refresh_token_error_key, refresh_token_error_values = ( 2752 self._get_refresh_token_error_information(model) 2753 ) 2754 if model.refresh_token_updater: 2755 # ignore type error because fixing it would have a lot of dependencies, revisit later 2756 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2757 config, 2758 InterpolatedString.create( 2759 model.token_refresh_endpoint, # type: ignore 2760 parameters=model.parameters or {}, 2761 ).eval(config), 2762 access_token_name=InterpolatedString.create( 2763 model.access_token_name or "access_token", parameters=model.parameters or {} 2764 ).eval(config), 2765 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2766 expires_in_name=InterpolatedString.create( 2767 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2768 ).eval(config), 2769 client_id_name=InterpolatedString.create( 2770 model.client_id_name or "client_id", parameters=model.parameters or {} 2771 ).eval(config), 2772 client_id=InterpolatedString.create( 2773 model.client_id, parameters=model.parameters or {} 2774 ).eval(config) 2775 if model.client_id 2776 else model.client_id, 2777 client_secret_name=InterpolatedString.create( 2778 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2779 ).eval(config), 2780 client_secret=InterpolatedString.create( 2781 model.client_secret, parameters=model.parameters or {} 2782 ).eval(config) 2783 if model.client_secret 2784 else model.client_secret, 2785 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2786 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2787 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2788 grant_type_name=InterpolatedString.create( 2789 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2790 ).eval(config), 2791 grant_type=InterpolatedString.create( 2792 model.grant_type or "refresh_token", parameters=model.parameters or {} 2793 ).eval(config), 2794 refresh_request_body=InterpolatedMapping( 2795 model.refresh_request_body or {}, parameters=model.parameters or {} 2796 ).eval(config), 2797 refresh_request_headers=InterpolatedMapping( 2798 model.refresh_request_headers or {}, parameters=model.parameters or {} 2799 ).eval(config), 2800 scopes=model.scopes, 2801 token_expiry_date_format=model.token_expiry_date_format, 2802 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2803 message_repository=self._message_repository, 2804 refresh_token_error_status_codes=refresh_token_error_status_codes, 2805 refresh_token_error_key=refresh_token_error_key, 2806 refresh_token_error_values=refresh_token_error_values, 2807 ) 2808 # ignore type error because fixing it would have a lot of dependencies, revisit later 2809 return DeclarativeOauth2Authenticator( # type: ignore 2810 access_token_name=model.access_token_name or "access_token", 2811 access_token_value=model.access_token_value, 2812 client_id_name=model.client_id_name or "client_id", 2813 client_id=model.client_id, 2814 client_secret_name=model.client_secret_name or "client_secret", 2815 client_secret=model.client_secret, 2816 expires_in_name=model.expires_in_name or "expires_in", 2817 grant_type_name=model.grant_type_name or "grant_type", 2818 grant_type=model.grant_type or "refresh_token", 2819 refresh_request_body=model.refresh_request_body, 2820 refresh_request_headers=model.refresh_request_headers, 2821 refresh_token_name=model.refresh_token_name or "refresh_token", 2822 refresh_token=model.refresh_token, 2823 scopes=model.scopes, 2824 token_expiry_date=model.token_expiry_date, 2825 token_expiry_date_format=model.token_expiry_date_format, 2826 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2827 token_refresh_endpoint=model.token_refresh_endpoint, 2828 config=config, 2829 parameters=model.parameters or {}, 2830 message_repository=self._message_repository, 2831 profile_assertion=profile_assertion, 2832 use_profile_assertion=model.use_profile_assertion, 2833 refresh_token_error_status_codes=refresh_token_error_status_codes, 2834 refresh_token_error_key=refresh_token_error_key, 2835 refresh_token_error_values=refresh_token_error_values, 2836 ) 2837 2838 @staticmethod 2839 def _get_refresh_token_error_information( 2840 model: OAuthAuthenticatorModel, 2841 ) -> Tuple[Tuple[int, ...], str, Tuple[str, ...]]: 2842 """ 2843 In a previous version of the CDK, the auth error as config_error was only done if a refresh token updater was 2844 defined. As a transition, we added those fields on the OAuthAuthenticatorModel. This method ensures that the 2845 information is defined only once and return the right fields. 2846 """ 2847 refresh_token_updater = model.refresh_token_updater 2848 is_defined_on_refresh_token_updated = refresh_token_updater and ( 2849 refresh_token_updater.refresh_token_error_status_codes 2850 or refresh_token_updater.refresh_token_error_key 2851 or refresh_token_updater.refresh_token_error_values 2852 ) 2853 is_defined_on_oauth_authenticator = ( 2854 model.refresh_token_error_status_codes 2855 or model.refresh_token_error_key 2856 or model.refresh_token_error_values 2857 ) 2858 if is_defined_on_refresh_token_updated and is_defined_on_oauth_authenticator: 2859 raise ValueError( 2860 "refresh_token_error should either be defined on the OAuthAuthenticatorModel or the RefreshTokenUpdaterModel, not both" 2861 ) 2862 2863 if is_defined_on_refresh_token_updated: 2864 not_optional_refresh_token_updater: RefreshTokenUpdaterModel = refresh_token_updater # type: ignore # we know from the condition that this is not None 2865 return ( 2866 tuple(not_optional_refresh_token_updater.refresh_token_error_status_codes) 2867 if not_optional_refresh_token_updater.refresh_token_error_status_codes 2868 else (), 2869 not_optional_refresh_token_updater.refresh_token_error_key or "", 2870 tuple(not_optional_refresh_token_updater.refresh_token_error_values) 2871 if not_optional_refresh_token_updater.refresh_token_error_values 2872 else (), 2873 ) 2874 elif is_defined_on_oauth_authenticator: 2875 return ( 2876 tuple(model.refresh_token_error_status_codes) 2877 if model.refresh_token_error_status_codes 2878 else (), 2879 model.refresh_token_error_key or "", 2880 tuple(model.refresh_token_error_values) if model.refresh_token_error_values else (), 2881 ) 2882 2883 # returning default values we think cover most cases 2884 return (400,), "error", ("invalid_grant", "invalid_permissions") 2885 2886 def create_offset_increment( 2887 self, 2888 model: OffsetIncrementModel, 2889 config: Config, 2890 decoder: Decoder, 2891 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2892 **kwargs: Any, 2893 ) -> OffsetIncrement: 2894 if isinstance(decoder, PaginationDecoderDecorator): 2895 inner_decoder = decoder.decoder 2896 else: 2897 inner_decoder = decoder 2898 decoder = PaginationDecoderDecorator(decoder=decoder) 2899 2900 if self._is_supported_decoder_for_pagination(inner_decoder): 2901 decoder_to_use = decoder 2902 else: 2903 raise ValueError( 2904 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2905 ) 2906 2907 # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever) 2908 # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the 2909 # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing 2910 # behavior by having two separate extractors with identical behavior since they use the same extractor model. 2911 # When we have more time to investigate we can look into reusing the same component. 2912 extractor = ( 2913 self._create_component_from_model( 2914 model=extractor_model, config=config, decoder=decoder_to_use 2915 ) 2916 if extractor_model 2917 else None 2918 ) 2919 2920 return OffsetIncrement( 2921 page_size=model.page_size, 2922 config=config, 2923 decoder=decoder_to_use, 2924 extractor=extractor, 2925 inject_on_first_request=model.inject_on_first_request or False, 2926 parameters=model.parameters or {}, 2927 ) 2928 2929 @staticmethod 2930 def create_page_increment( 2931 model: PageIncrementModel, config: Config, **kwargs: Any 2932 ) -> PageIncrement: 2933 return PageIncrement( 2934 page_size=model.page_size, 2935 config=config, 2936 start_from_page=model.start_from_page or 0, 2937 inject_on_first_request=model.inject_on_first_request or False, 2938 parameters=model.parameters or {}, 2939 ) 2940 2941 def create_parent_stream_config( 2942 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 2943 ) -> ParentStreamConfig: 2944 declarative_stream = self._create_component_from_model( 2945 model.stream, 2946 config=config, 2947 is_parent=True, 2948 **kwargs, 2949 ) 2950 request_option = ( 2951 self._create_component_from_model(model.request_option, config=config) 2952 if model.request_option 2953 else None 2954 ) 2955 2956 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2957 raise ValueError( 2958 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2959 ) 2960 2961 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2962 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2963 ) 2964 2965 return ParentStreamConfig( 2966 parent_key=model.parent_key, 2967 request_option=request_option, 2968 stream=declarative_stream, 2969 partition_field=model.partition_field, 2970 config=config, 2971 incremental_dependency=model.incremental_dependency or False, 2972 parameters=model.parameters or {}, 2973 extra_fields=model.extra_fields, 2974 lazy_read_pointer=model_lazy_read_pointer, 2975 ) 2976 2977 def create_properties_from_endpoint( 2978 self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any 2979 ) -> PropertiesFromEndpoint: 2980 retriever = self._create_component_from_model( 2981 model=model.retriever, 2982 config=config, 2983 name="dynamic_properties", 2984 primary_key=None, 2985 stream_slicer=None, 2986 transformations=[], 2987 use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different 2988 ) 2989 return PropertiesFromEndpoint( 2990 property_field_path=model.property_field_path, 2991 retriever=retriever, 2992 config=config, 2993 parameters=model.parameters or {}, 2994 ) 2995 2996 def create_property_chunking( 2997 self, model: PropertyChunkingModel, config: Config, **kwargs: Any 2998 ) -> PropertyChunking: 2999 record_merge_strategy = ( 3000 self._create_component_from_model( 3001 model=model.record_merge_strategy, config=config, **kwargs 3002 ) 3003 if model.record_merge_strategy 3004 else None 3005 ) 3006 3007 property_limit_type: PropertyLimitType 3008 match model.property_limit_type: 3009 case PropertyLimitTypeModel.property_count: 3010 property_limit_type = PropertyLimitType.property_count 3011 case PropertyLimitTypeModel.characters: 3012 property_limit_type = PropertyLimitType.characters 3013 case _: 3014 raise ValueError(f"Invalid PropertyLimitType {property_limit_type}") 3015 3016 return PropertyChunking( 3017 property_limit_type=property_limit_type, 3018 property_limit=model.property_limit, 3019 record_merge_strategy=record_merge_strategy, 3020 config=config, 3021 parameters=model.parameters or {}, 3022 ) 3023 3024 def create_query_properties( 3025 self, model: QueryPropertiesModel, config: Config, *, stream_name: str, **kwargs: Any 3026 ) -> QueryProperties: 3027 if isinstance(model.property_list, list): 3028 property_list = model.property_list 3029 else: 3030 property_list = self._create_component_from_model( 3031 model=model.property_list, config=config, **kwargs 3032 ) 3033 3034 property_chunking = ( 3035 self._create_component_from_model( 3036 model=model.property_chunking, config=config, **kwargs 3037 ) 3038 if model.property_chunking 3039 else None 3040 ) 3041 3042 property_selector = ( 3043 self._create_component_from_model( 3044 model=model.property_selector, config=config, stream_name=stream_name, **kwargs 3045 ) 3046 if model.property_selector 3047 else None 3048 ) 3049 3050 return QueryProperties( 3051 property_list=property_list, 3052 always_include_properties=model.always_include_properties, 3053 property_chunking=property_chunking, 3054 property_selector=property_selector, 3055 config=config, 3056 parameters=model.parameters or {}, 3057 ) 3058 3059 def create_json_schema_property_selector( 3060 self, 3061 model: JsonSchemaPropertySelectorModel, 3062 config: Config, 3063 *, 3064 stream_name: str, 3065 **kwargs: Any, 3066 ) -> JsonSchemaPropertySelector: 3067 configured_stream = self._stream_name_to_configured_stream.get(stream_name) 3068 3069 transformations = [] 3070 if model.transformations: 3071 for transformation_model in model.transformations: 3072 transformations.append( 3073 self._create_component_from_model(model=transformation_model, config=config) 3074 ) 3075 3076 return JsonSchemaPropertySelector( 3077 configured_stream=configured_stream, 3078 properties_transformations=transformations, 3079 config=config, 3080 parameters=model.parameters or {}, 3081 ) 3082 3083 @staticmethod 3084 def create_record_filter( 3085 model: RecordFilterModel, config: Config, **kwargs: Any 3086 ) -> RecordFilter: 3087 return RecordFilter( 3088 condition=model.condition or "", config=config, parameters=model.parameters or {} 3089 ) 3090 3091 @staticmethod 3092 def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: 3093 return RequestPath(parameters={}) 3094 3095 @staticmethod 3096 def create_request_option( 3097 model: RequestOptionModel, config: Config, **kwargs: Any 3098 ) -> RequestOption: 3099 inject_into = RequestOptionType(model.inject_into.value) 3100 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 3101 [ 3102 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 3103 for segment in model.field_path 3104 ] 3105 if model.field_path 3106 else None 3107 ) 3108 field_name = ( 3109 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 3110 if model.field_name 3111 else None 3112 ) 3113 return RequestOption( 3114 field_name=field_name, 3115 field_path=field_path, 3116 inject_into=inject_into, 3117 parameters=kwargs.get("parameters", {}), 3118 ) 3119 3120 def create_record_selector( 3121 self, 3122 model: RecordSelectorModel, 3123 config: Config, 3124 *, 3125 name: str, 3126 transformations: List[RecordTransformation] | None = None, 3127 decoder: Decoder | None = None, 3128 client_side_incremental_sync_cursor: Optional[Cursor] = None, 3129 file_uploader: Optional[DefaultFileUploader] = None, 3130 **kwargs: Any, 3131 ) -> RecordSelector: 3132 extractor = self._create_component_from_model( 3133 model=model.extractor, decoder=decoder, config=config 3134 ) 3135 record_filter = ( 3136 self._create_component_from_model(model.record_filter, config=config) 3137 if model.record_filter 3138 else None 3139 ) 3140 3141 transform_before_filtering = ( 3142 False if model.transform_before_filtering is None else model.transform_before_filtering 3143 ) 3144 if client_side_incremental_sync_cursor: 3145 record_filter = ClientSideIncrementalRecordFilterDecorator( 3146 config=config, 3147 parameters=model.parameters, 3148 condition=model.record_filter.condition 3149 if (model.record_filter and hasattr(model.record_filter, "condition")) 3150 else None, 3151 cursor=client_side_incremental_sync_cursor, 3152 ) 3153 transform_before_filtering = ( 3154 True 3155 if model.transform_before_filtering is None 3156 else model.transform_before_filtering 3157 ) 3158 3159 if model.schema_normalization is None: 3160 # default to no schema normalization if not set 3161 model.schema_normalization = SchemaNormalizationModel.None_ 3162 3163 schema_normalization = ( 3164 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 3165 if isinstance(model.schema_normalization, SchemaNormalizationModel) 3166 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 3167 ) 3168 3169 return RecordSelector( 3170 extractor=extractor, 3171 name=name, 3172 config=config, 3173 record_filter=record_filter, 3174 transformations=transformations or [], 3175 file_uploader=file_uploader, 3176 schema_normalization=schema_normalization, 3177 parameters=model.parameters or {}, 3178 transform_before_filtering=transform_before_filtering, 3179 ) 3180 3181 @staticmethod 3182 def create_remove_fields( 3183 model: RemoveFieldsModel, config: Config, **kwargs: Any 3184 ) -> RemoveFields: 3185 return RemoveFields( 3186 field_pointers=model.field_pointers, condition=model.condition or "", parameters={} 3187 ) 3188 3189 def create_selective_authenticator( 3190 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 3191 ) -> DeclarativeAuthenticator: 3192 authenticators = { 3193 name: self._create_component_from_model(model=auth, config=config) 3194 for name, auth in model.authenticators.items() 3195 } 3196 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 3197 return SelectiveAuthenticator( # type: ignore[abstract] 3198 config=config, 3199 authenticators=authenticators, 3200 authenticator_selection_path=model.authenticator_selection_path, 3201 **kwargs, 3202 ) 3203 3204 @staticmethod 3205 def create_legacy_session_token_authenticator( 3206 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 3207 ) -> LegacySessionTokenAuthenticator: 3208 return LegacySessionTokenAuthenticator( 3209 api_url=url_base, 3210 header=model.header, 3211 login_url=model.login_url, 3212 password=model.password or "", 3213 session_token=model.session_token or "", 3214 session_token_response_key=model.session_token_response_key or "", 3215 username=model.username or "", 3216 validate_session_url=model.validate_session_url, 3217 config=config, 3218 parameters=model.parameters or {}, 3219 ) 3220 3221 def create_simple_retriever( 3222 self, 3223 model: SimpleRetrieverModel, 3224 config: Config, 3225 *, 3226 name: str, 3227 primary_key: Optional[Union[str, List[str], List[List[str]]]], 3228 request_options_provider: Optional[RequestOptionsProvider] = None, 3229 cursor: Optional[Cursor] = None, 3230 has_stop_condition_cursor: bool = False, 3231 is_client_side_incremental_sync: bool = False, 3232 transformations: List[RecordTransformation], 3233 file_uploader: Optional[DefaultFileUploader] = None, 3234 incremental_sync: Optional[ 3235 Union[IncrementingCountCursorModel, DatetimeBasedCursorModel] 3236 ] = None, 3237 use_cache: Optional[bool] = None, 3238 log_formatter: Optional[Callable[[Response], Any]] = None, 3239 partition_router: Optional[PartitionRouter] = None, 3240 **kwargs: Any, 3241 ) -> SimpleRetriever: 3242 def _get_url(req: Requester) -> str: 3243 """ 3244 Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever. 3245 This is needed because the URL is not set until the requester is created. 3246 """ 3247 3248 _url: str = ( 3249 model.requester.url 3250 if hasattr(model.requester, "url") and model.requester.url is not None 3251 else req.get_url(stream_state=None, stream_slice=None, next_page_token=None) 3252 ) 3253 _url_base: str = ( 3254 model.requester.url_base 3255 if hasattr(model.requester, "url_base") and model.requester.url_base is not None 3256 else req.get_url_base(stream_state=None, stream_slice=None, next_page_token=None) 3257 ) 3258 3259 return _url or _url_base 3260 3261 if cursor is None: 3262 cursor = FinalStateCursor(name, None, self._message_repository) 3263 3264 decoder = ( 3265 self._create_component_from_model(model=model.decoder, config=config) 3266 if model.decoder 3267 else JsonDecoder(parameters={}) 3268 ) 3269 record_selector = self._create_component_from_model( 3270 model=model.record_selector, 3271 name=name, 3272 config=config, 3273 decoder=decoder, 3274 transformations=transformations, 3275 client_side_incremental_sync_cursor=cursor if is_client_side_incremental_sync else None, 3276 file_uploader=file_uploader, 3277 ) 3278 3279 query_properties: Optional[QueryProperties] = None 3280 query_properties_key: Optional[str] = None 3281 self._ensure_query_properties_to_model(model.requester) 3282 if self._has_query_properties_in_request_parameters(model.requester): 3283 # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple 3284 # places instead of default to request_parameters which isn't clearly documented 3285 if ( 3286 hasattr(model.requester, "fetch_properties_from_endpoint") 3287 and model.requester.fetch_properties_from_endpoint 3288 ): 3289 raise ValueError( 3290 f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters" 3291 ) 3292 3293 query_properties_definitions = [] 3294 for key, request_parameter in model.requester.request_parameters.items(): # type: ignore # request_parameters is already validated to be a Mapping using _has_query_properties_in_request_parameters() 3295 if isinstance(request_parameter, QueryPropertiesModel): 3296 query_properties_key = key 3297 query_properties_definitions.append(request_parameter) 3298 3299 if len(query_properties_definitions) > 1: 3300 raise ValueError( 3301 f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages" 3302 ) 3303 3304 if len(query_properties_definitions) == 1: 3305 query_properties = self._create_component_from_model( 3306 model=query_properties_definitions[0], stream_name=name, config=config 3307 ) 3308 3309 # Removes QueryProperties components from the interpolated mappings because it has been designed 3310 # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly 3311 # instead of through jinja interpolation 3312 if hasattr(model.requester, "request_parameters") and isinstance( 3313 model.requester.request_parameters, Mapping 3314 ): 3315 model.requester.request_parameters = self._remove_query_properties( 3316 model.requester.request_parameters 3317 ) 3318 elif ( 3319 hasattr(model.requester, "fetch_properties_from_endpoint") 3320 and model.requester.fetch_properties_from_endpoint 3321 ): 3322 # todo: Deprecate this condition once dependent connectors migrate to query_properties 3323 query_properties_definition = QueryPropertiesModel( 3324 type="QueryProperties", 3325 property_list=model.requester.fetch_properties_from_endpoint, 3326 always_include_properties=None, 3327 property_chunking=None, 3328 ) # type: ignore # $parameters has a default value 3329 3330 query_properties = self.create_query_properties( 3331 model=query_properties_definition, 3332 stream_name=name, 3333 config=config, 3334 ) 3335 elif hasattr(model.requester, "query_properties") and model.requester.query_properties: 3336 query_properties = self.create_query_properties( 3337 model=model.requester.query_properties, 3338 stream_name=name, 3339 config=config, 3340 ) 3341 3342 requester = self._create_component_from_model( 3343 model=model.requester, 3344 decoder=decoder, 3345 name=name, 3346 query_properties_key=query_properties_key, 3347 use_cache=use_cache, 3348 config=config, 3349 ) 3350 3351 if not request_options_provider: 3352 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 3353 if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance( 3354 partition_router, PartitionRouter 3355 ): 3356 request_options_provider = partition_router 3357 3358 paginator = ( 3359 self._create_component_from_model( 3360 model=model.paginator, 3361 config=config, 3362 url_base=_get_url(requester), 3363 extractor_model=model.record_selector.extractor, 3364 decoder=decoder, 3365 cursor_used_for_stop_condition=cursor if has_stop_condition_cursor else None, 3366 ) 3367 if model.paginator 3368 else NoPagination(parameters={}) 3369 ) 3370 3371 ignore_stream_slicer_parameters_on_paginated_requests = ( 3372 model.ignore_stream_slicer_parameters_on_paginated_requests or False 3373 ) 3374 3375 if ( 3376 model.partition_router 3377 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 3378 and not bool(self._connector_state_manager.get_stream_state(name, None)) 3379 and any( 3380 parent_stream_config.lazy_read_pointer 3381 for parent_stream_config in model.partition_router.parent_stream_configs 3382 ) 3383 ): 3384 if incremental_sync: 3385 if incremental_sync.type != "DatetimeBasedCursor": 3386 raise ValueError( 3387 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 3388 ) 3389 3390 elif incremental_sync.step or incremental_sync.cursor_granularity: 3391 raise ValueError( 3392 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 3393 ) 3394 3395 if model.decoder and model.decoder.type != "JsonDecoder": 3396 raise ValueError( 3397 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 3398 ) 3399 3400 return LazySimpleRetriever( 3401 name=name, 3402 paginator=paginator, 3403 primary_key=primary_key, 3404 requester=requester, 3405 record_selector=record_selector, 3406 stream_slicer=_NO_STREAM_SLICING, 3407 request_option_provider=request_options_provider, 3408 config=config, 3409 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3410 parameters=model.parameters or {}, 3411 ) 3412 3413 if ( 3414 model.record_selector.record_filter 3415 and model.pagination_reset 3416 and model.pagination_reset.limits 3417 ): 3418 raise ValueError("PaginationResetLimits are not supported while having record filter.") 3419 3420 return SimpleRetriever( 3421 name=name, 3422 paginator=paginator, 3423 primary_key=primary_key, 3424 requester=requester, 3425 record_selector=record_selector, 3426 stream_slicer=_NO_STREAM_SLICING, 3427 request_option_provider=request_options_provider, 3428 config=config, 3429 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3430 additional_query_properties=query_properties, 3431 log_formatter=self._get_log_formatter(log_formatter, name), 3432 pagination_tracker_factory=self._create_pagination_tracker_factory( 3433 model.pagination_reset, cursor 3434 ), 3435 parameters=model.parameters or {}, 3436 ) 3437 3438 def _create_pagination_tracker_factory( 3439 self, model: Optional[PaginationResetModel], cursor: Cursor 3440 ) -> Callable[[], PaginationTracker]: 3441 if model is None: 3442 return lambda: PaginationTracker() 3443 3444 # Until we figure out a way to use any cursor for PaginationTracker, we will have to have this cursor selector logic 3445 cursor_factory: Callable[[], Optional[ConcurrentCursor]] = lambda: None 3446 if model.action == PaginationResetActionModel.RESET: 3447 # in that case, we will let cursor_factory to return None even if the stream has a cursor 3448 pass 3449 elif model.action == PaginationResetActionModel.SPLIT_USING_CURSOR: 3450 if isinstance(cursor, ConcurrentCursor): 3451 cursor_factory = lambda: cursor.copy_without_state() # type: ignore # the if condition validates that it is a ConcurrentCursor 3452 elif isinstance(cursor, ConcurrentPerPartitionCursor): 3453 cursor_factory = lambda: cursor._cursor_factory.create( # type: ignore # if this becomes a problem, we would need to extract the cursor_factory instantiation logic and make it accessible here 3454 {}, datetime.timedelta(0) 3455 ) 3456 elif not isinstance(cursor, FinalStateCursor): 3457 LOGGER.warning( 3458 "Unknown cursor for PaginationTracker. Pagination resets might not work properly" 3459 ) 3460 else: 3461 raise ValueError(f"Unknown PaginationReset action: {model.action}") 3462 3463 limit = model.limits.number_of_records if model and model.limits else None 3464 return lambda: PaginationTracker(cursor_factory(), limit) 3465 3466 def _get_log_formatter( 3467 self, log_formatter: Callable[[Response], Any] | None, name: str 3468 ) -> Callable[[Response], Any] | None: 3469 if self._should_limit_slices_fetched(): 3470 return ( 3471 ( 3472 lambda response: format_http_message( 3473 response, 3474 f"Stream '{name}' request", 3475 f"Request performed in order to extract records for stream '{name}'", 3476 name, 3477 ) 3478 ) 3479 if not log_formatter 3480 else log_formatter 3481 ) 3482 return None 3483 3484 def _should_limit_slices_fetched(self) -> bool: 3485 """ 3486 Returns True if the number of slices fetched should be limited, False otherwise. 3487 This is used to limit the number of slices fetched during tests. 3488 """ 3489 return bool(self._limit_slices_fetched or self._emit_connector_builder_messages) 3490 3491 @staticmethod 3492 def _has_query_properties_in_request_parameters( 3493 requester: Union[HttpRequesterModel, CustomRequesterModel], 3494 ) -> bool: 3495 if not hasattr(requester, "request_parameters"): 3496 return False 3497 request_parameters = requester.request_parameters 3498 if request_parameters and isinstance(request_parameters, Mapping): 3499 for request_parameter in request_parameters.values(): 3500 if isinstance(request_parameter, QueryPropertiesModel): 3501 return True 3502 return False 3503 3504 @staticmethod 3505 def _remove_query_properties( 3506 request_parameters: Mapping[str, Union[str, QueryPropertiesModel]], 3507 ) -> Mapping[str, str]: 3508 return { 3509 parameter_field: request_parameter 3510 for parameter_field, request_parameter in request_parameters.items() 3511 if not isinstance(request_parameter, QueryPropertiesModel) 3512 } 3513 3514 def create_state_delegating_stream( 3515 self, 3516 model: StateDelegatingStreamModel, 3517 config: Config, 3518 has_parent_state: Optional[bool] = None, 3519 **kwargs: Any, 3520 ) -> DefaultStream: 3521 if ( 3522 model.full_refresh_stream.name != model.name 3523 or model.name != model.incremental_stream.name 3524 ): 3525 raise ValueError( 3526 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 3527 ) 3528 3529 stream_model = self._get_state_delegating_stream_model( 3530 False if has_parent_state is None else has_parent_state, model 3531 ) 3532 3533 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel 3534 3535 def _get_state_delegating_stream_model( 3536 self, has_parent_state: bool, model: StateDelegatingStreamModel 3537 ) -> DeclarativeStreamModel: 3538 return ( 3539 model.incremental_stream 3540 if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state 3541 else model.full_refresh_stream 3542 ) 3543 3544 def _create_async_job_status_mapping( 3545 self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any 3546 ) -> Mapping[str, AsyncJobStatus]: 3547 api_status_to_cdk_status = {} 3548 for cdk_status, api_statuses in model.dict().items(): 3549 if cdk_status == "type": 3550 # This is an element of the dict because of the typing of the CDK but it is not a CDK status 3551 continue 3552 3553 for status in api_statuses: 3554 if status in api_status_to_cdk_status: 3555 raise ValueError( 3556 f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once" 3557 ) 3558 api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status) 3559 return api_status_to_cdk_status 3560 3561 def _get_async_job_status(self, status: str) -> AsyncJobStatus: 3562 match status: 3563 case "running": 3564 return AsyncJobStatus.RUNNING 3565 case "completed": 3566 return AsyncJobStatus.COMPLETED 3567 case "failed": 3568 return AsyncJobStatus.FAILED 3569 case "timeout": 3570 return AsyncJobStatus.TIMED_OUT 3571 case _: 3572 raise ValueError(f"Unsupported CDK status {status}") 3573 3574 def create_async_retriever( 3575 self, 3576 model: AsyncRetrieverModel, 3577 config: Config, 3578 *, 3579 name: str, 3580 primary_key: Optional[ 3581 Union[str, List[str], List[List[str]]] 3582 ], # this seems to be needed to match create_simple_retriever 3583 stream_slicer: Optional[StreamSlicer], 3584 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3585 transformations: List[RecordTransformation], 3586 **kwargs: Any, 3587 ) -> AsyncRetriever: 3588 if model.download_target_requester and not model.download_target_extractor: 3589 raise ValueError( 3590 f"`download_target_extractor` required if using a `download_target_requester`" 3591 ) 3592 3593 def _get_download_retriever( 3594 requester: Requester, extractor: RecordExtractor, _decoder: Decoder 3595 ) -> SimpleRetriever: 3596 # We create a record selector for the download retriever 3597 # with no schema normalization and no transformations, neither record filter 3598 # as all this occurs in the record_selector of the AsyncRetriever 3599 record_selector = RecordSelector( 3600 extractor=extractor, 3601 name=name, 3602 record_filter=None, 3603 transformations=[], 3604 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 3605 config=config, 3606 parameters={}, 3607 ) 3608 paginator = ( 3609 self._create_component_from_model( 3610 model=model.download_paginator, 3611 decoder=_decoder, 3612 config=config, 3613 url_base="", 3614 ) 3615 if model.download_paginator 3616 else NoPagination(parameters={}) 3617 ) 3618 3619 return SimpleRetriever( 3620 requester=requester, 3621 record_selector=record_selector, 3622 primary_key=None, 3623 name=name, 3624 paginator=paginator, 3625 config=config, 3626 parameters={}, 3627 log_formatter=self._get_log_formatter(None, name), 3628 ) 3629 3630 def _get_job_timeout() -> datetime.timedelta: 3631 user_defined_timeout: Optional[int] = ( 3632 int( 3633 InterpolatedString.create( 3634 str(model.polling_job_timeout), 3635 parameters={}, 3636 ).eval(config) 3637 ) 3638 if model.polling_job_timeout 3639 else None 3640 ) 3641 3642 # check for user defined timeout during the test read or 15 minutes 3643 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 3644 # default value for non-connector builder is 60 minutes. 3645 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 3646 3647 return ( 3648 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 3649 ) 3650 3651 decoder = ( 3652 self._create_component_from_model(model=model.decoder, config=config) 3653 if model.decoder 3654 else JsonDecoder(parameters={}) 3655 ) 3656 record_selector = self._create_component_from_model( 3657 model=model.record_selector, 3658 config=config, 3659 decoder=decoder, 3660 name=name, 3661 transformations=transformations, 3662 client_side_incremental_sync=client_side_incremental_sync, 3663 ) 3664 3665 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 3666 if self._should_limit_slices_fetched(): 3667 stream_slicer = cast( 3668 StreamSlicer, 3669 StreamSlicerTestReadDecorator( 3670 wrapped_slicer=stream_slicer, 3671 maximum_number_of_slices=self._limit_slices_fetched or 5, 3672 ), 3673 ) 3674 3675 creation_requester = self._create_component_from_model( 3676 model=model.creation_requester, 3677 decoder=decoder, 3678 config=config, 3679 name=f"job creation - {name}", 3680 ) 3681 polling_requester = self._create_component_from_model( 3682 model=model.polling_requester, 3683 decoder=decoder, 3684 config=config, 3685 name=f"job polling - {name}", 3686 ) 3687 job_download_components_name = f"job download - {name}" 3688 download_decoder = ( 3689 self._create_component_from_model(model=model.download_decoder, config=config) 3690 if model.download_decoder 3691 else JsonDecoder(parameters={}) 3692 ) 3693 download_extractor = ( 3694 self._create_component_from_model( 3695 model=model.download_extractor, 3696 config=config, 3697 decoder=download_decoder, 3698 parameters=model.parameters, 3699 ) 3700 if model.download_extractor 3701 else DpathExtractor( 3702 [], 3703 config=config, 3704 decoder=download_decoder, 3705 parameters=model.parameters or {}, 3706 ) 3707 ) 3708 download_requester = self._create_component_from_model( 3709 model=model.download_requester, 3710 decoder=download_decoder, 3711 config=config, 3712 name=job_download_components_name, 3713 ) 3714 download_retriever = _get_download_retriever( 3715 download_requester, download_extractor, download_decoder 3716 ) 3717 abort_requester = ( 3718 self._create_component_from_model( 3719 model=model.abort_requester, 3720 decoder=decoder, 3721 config=config, 3722 name=f"job abort - {name}", 3723 ) 3724 if model.abort_requester 3725 else None 3726 ) 3727 delete_requester = ( 3728 self._create_component_from_model( 3729 model=model.delete_requester, 3730 decoder=decoder, 3731 config=config, 3732 name=f"job delete - {name}", 3733 ) 3734 if model.delete_requester 3735 else None 3736 ) 3737 download_target_requester = ( 3738 self._create_component_from_model( 3739 model=model.download_target_requester, 3740 decoder=decoder, 3741 config=config, 3742 name=f"job extract_url - {name}", 3743 ) 3744 if model.download_target_requester 3745 else None 3746 ) 3747 status_extractor = self._create_component_from_model( 3748 model=model.status_extractor, decoder=decoder, config=config, name=name 3749 ) 3750 download_target_extractor = ( 3751 self._create_component_from_model( 3752 model=model.download_target_extractor, 3753 decoder=decoder, 3754 config=config, 3755 name=name, 3756 ) 3757 if model.download_target_extractor 3758 else None 3759 ) 3760 3761 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3762 creation_requester=creation_requester, 3763 polling_requester=polling_requester, 3764 download_retriever=download_retriever, 3765 download_target_requester=download_target_requester, 3766 abort_requester=abort_requester, 3767 delete_requester=delete_requester, 3768 status_extractor=status_extractor, 3769 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3770 download_target_extractor=download_target_extractor, 3771 job_timeout=_get_job_timeout(), 3772 ) 3773 3774 async_job_partition_router = AsyncJobPartitionRouter( 3775 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3776 job_repository, 3777 stream_slices, 3778 self._job_tracker, 3779 self._message_repository, 3780 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3781 has_bulk_parent=False, 3782 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3783 # `None` == default retry is set to 3 attempts, under the hood. 3784 job_max_retry=1 if self._emit_connector_builder_messages else None, 3785 ), 3786 stream_slicer=stream_slicer, 3787 config=config, 3788 parameters=model.parameters or {}, 3789 ) 3790 3791 return AsyncRetriever( 3792 record_selector=record_selector, 3793 stream_slicer=async_job_partition_router, 3794 config=config, 3795 parameters=model.parameters or {}, 3796 ) 3797 3798 def create_spec(self, model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3799 config_migrations = [ 3800 self._create_component_from_model(migration, config) 3801 for migration in ( 3802 model.config_normalization_rules.config_migrations 3803 if ( 3804 model.config_normalization_rules 3805 and model.config_normalization_rules.config_migrations 3806 ) 3807 else [] 3808 ) 3809 ] 3810 config_transformations = [ 3811 self._create_component_from_model(transformation, config) 3812 for transformation in ( 3813 model.config_normalization_rules.transformations 3814 if ( 3815 model.config_normalization_rules 3816 and model.config_normalization_rules.transformations 3817 ) 3818 else [] 3819 ) 3820 ] 3821 config_validations = [ 3822 self._create_component_from_model(validation, config) 3823 for validation in ( 3824 model.config_normalization_rules.validations 3825 if ( 3826 model.config_normalization_rules 3827 and model.config_normalization_rules.validations 3828 ) 3829 else [] 3830 ) 3831 ] 3832 3833 return Spec( 3834 connection_specification=model.connection_specification, 3835 documentation_url=model.documentation_url, 3836 advanced_auth=model.advanced_auth, 3837 parameters={}, 3838 config_migrations=config_migrations, 3839 config_transformations=config_transformations, 3840 config_validations=config_validations, 3841 ) 3842 3843 def create_substream_partition_router( 3844 self, 3845 model: SubstreamPartitionRouterModel, 3846 config: Config, 3847 *, 3848 stream_name: str, 3849 **kwargs: Any, 3850 ) -> SubstreamPartitionRouter: 3851 parent_stream_configs = [] 3852 if model.parent_stream_configs: 3853 parent_stream_configs.extend( 3854 [ 3855 self.create_parent_stream_config_with_substream_wrapper( 3856 model=parent_stream_config, config=config, stream_name=stream_name, **kwargs 3857 ) 3858 for parent_stream_config in model.parent_stream_configs 3859 ] 3860 ) 3861 3862 return SubstreamPartitionRouter( 3863 parent_stream_configs=parent_stream_configs, 3864 parameters=model.parameters or {}, 3865 config=config, 3866 ) 3867 3868 def create_parent_stream_config_with_substream_wrapper( 3869 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 3870 ) -> Any: 3871 # getting the parent state 3872 child_state = self._connector_state_manager.get_stream_state(stream_name, None) 3873 3874 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3875 has_parent_state = bool( 3876 self._connector_state_manager.get_stream_state(stream_name, None) 3877 if model.incremental_dependency 3878 else False 3879 ) 3880 connector_state_manager = self._instantiate_parent_stream_state_manager( 3881 child_state, config, model, has_parent_state 3882 ) 3883 3884 substream_factory = ModelToComponentFactory( 3885 connector_state_manager=connector_state_manager, 3886 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3887 limit_slices_fetched=self._limit_slices_fetched, 3888 emit_connector_builder_messages=self._emit_connector_builder_messages, 3889 disable_retries=self._disable_retries, 3890 disable_cache=self._disable_cache, 3891 message_repository=StateFilteringMessageRepository( 3892 LogAppenderMessageRepositoryDecorator( 3893 { 3894 "airbyte_cdk": {"stream": {"is_substream": True}}, 3895 "http": {"is_auxiliary": True}, 3896 }, 3897 self._message_repository, 3898 self._evaluate_log_level(self._emit_connector_builder_messages), 3899 ), 3900 ), 3901 api_budget=self._api_budget, 3902 ) 3903 3904 return substream_factory.create_parent_stream_config( 3905 model=model, config=config, stream_name=stream_name, **kwargs 3906 ) 3907 3908 def _instantiate_parent_stream_state_manager( 3909 self, 3910 child_state: MutableMapping[str, Any], 3911 config: Config, 3912 model: ParentStreamConfigModel, 3913 has_parent_state: bool, 3914 ) -> ConnectorStateManager: 3915 """ 3916 With DefaultStream, the state needs to be provided during __init__ of the cursor as opposed to the 3917 `set_initial_state` flow that existed for the declarative cursors. This state is taken from 3918 self._connector_state_manager.get_stream_state (`self` being a newly created ModelToComponentFactory to account 3919 for the MessageRepository being different). So we need to pass a ConnectorStateManager to the 3920 ModelToComponentFactory that has the parent states. This method populates this if there is a child state and if 3921 incremental_dependency is set. 3922 """ 3923 if model.incremental_dependency and child_state: 3924 parent_stream_name = model.stream.name or "" 3925 parent_state = ConcurrentPerPartitionCursor.get_parent_state( 3926 child_state, parent_stream_name 3927 ) 3928 3929 if not parent_state: 3930 # there are two migration cases: state value from child stream or from global state 3931 parent_state = ConcurrentPerPartitionCursor.get_global_state( 3932 child_state, parent_stream_name 3933 ) 3934 3935 if not parent_state and not isinstance(parent_state, dict): 3936 cursor_values = child_state.values() 3937 if cursor_values and len(cursor_values) == 1: 3938 # We assume the child state is a pair `{<cursor_field>: <cursor_value>}` and we will use the 3939 # cursor value as a parent state. 3940 incremental_sync_model: Union[ 3941 DatetimeBasedCursorModel, 3942 IncrementingCountCursorModel, 3943 ] = ( 3944 model.stream.incremental_sync # type: ignore # if we are there, it is because there is incremental_dependency and therefore there is an incremental_sync on the parent stream 3945 if isinstance(model.stream, DeclarativeStreamModel) 3946 else self._get_state_delegating_stream_model( 3947 has_parent_state, model.stream 3948 ).incremental_sync 3949 ) 3950 cursor_field = InterpolatedString.create( 3951 incremental_sync_model.cursor_field, 3952 parameters=incremental_sync_model.parameters or {}, 3953 ).eval(config) 3954 parent_state = AirbyteStateMessage( 3955 type=AirbyteStateType.STREAM, 3956 stream=AirbyteStreamState( 3957 stream_descriptor=StreamDescriptor( 3958 name=parent_stream_name, namespace=None 3959 ), 3960 stream_state=AirbyteStateBlob( 3961 {cursor_field: list(cursor_values)[0]} 3962 ), 3963 ), 3964 ) 3965 return ConnectorStateManager([parent_state] if parent_state else []) 3966 3967 return ConnectorStateManager([]) 3968 3969 @staticmethod 3970 def create_wait_time_from_header( 3971 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 3972 ) -> WaitTimeFromHeaderBackoffStrategy: 3973 return WaitTimeFromHeaderBackoffStrategy( 3974 header=model.header, 3975 parameters=model.parameters or {}, 3976 config=config, 3977 regex=model.regex, 3978 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 3979 if model.max_waiting_time_in_seconds is not None 3980 else None, 3981 ) 3982 3983 @staticmethod 3984 def create_wait_until_time_from_header( 3985 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 3986 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 3987 return WaitUntilTimeFromHeaderBackoffStrategy( 3988 header=model.header, 3989 parameters=model.parameters or {}, 3990 config=config, 3991 min_wait=model.min_wait, 3992 regex=model.regex, 3993 ) 3994 3995 def get_message_repository(self) -> MessageRepository: 3996 return self._message_repository 3997 3998 def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: 3999 return Level.DEBUG if emit_connector_builder_messages else Level.INFO 4000 4001 @staticmethod 4002 def create_components_mapping_definition( 4003 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 4004 ) -> ComponentMappingDefinition: 4005 interpolated_value = InterpolatedString.create( 4006 model.value, parameters=model.parameters or {} 4007 ) 4008 field_path = [ 4009 InterpolatedString.create(path, parameters=model.parameters or {}) 4010 for path in model.field_path 4011 ] 4012 return ComponentMappingDefinition( 4013 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 4014 value=interpolated_value, 4015 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 4016 create_or_update=model.create_or_update, 4017 condition=model.condition, 4018 parameters=model.parameters or {}, 4019 ) 4020 4021 def create_http_components_resolver( 4022 self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None 4023 ) -> Any: 4024 retriever = self._create_component_from_model( 4025 model=model.retriever, 4026 config=config, 4027 name=f"{stream_name if stream_name else '__http_components_resolver'}", 4028 primary_key=None, 4029 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 4030 transformations=[], 4031 ) 4032 4033 components_mapping = [] 4034 for component_mapping_definition_model in model.components_mapping: 4035 if component_mapping_definition_model.condition: 4036 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 4037 components_mapping.append( 4038 self._create_component_from_model( 4039 model=component_mapping_definition_model, 4040 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4041 component_mapping_definition_model.value_type 4042 ), 4043 config=config, 4044 ) 4045 ) 4046 4047 return HttpComponentsResolver( 4048 retriever=retriever, 4049 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 4050 config=config, 4051 components_mapping=components_mapping, 4052 parameters=model.parameters or {}, 4053 ) 4054 4055 @staticmethod 4056 def create_stream_config( 4057 model: StreamConfigModel, config: Config, **kwargs: Any 4058 ) -> StreamConfig: 4059 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 4060 [x for x in model.configs_pointer] if model.configs_pointer else [] 4061 ) 4062 4063 return StreamConfig( 4064 configs_pointer=model_configs_pointer, 4065 default_values=model.default_values, 4066 parameters=model.parameters or {}, 4067 ) 4068 4069 def create_config_components_resolver( 4070 self, 4071 model: ConfigComponentsResolverModel, 4072 config: Config, 4073 ) -> Any: 4074 model_stream_configs = ( 4075 model.stream_config if isinstance(model.stream_config, list) else [model.stream_config] 4076 ) 4077 4078 stream_configs = [ 4079 self._create_component_from_model( 4080 stream_config, config=config, parameters=model.parameters or {} 4081 ) 4082 for stream_config in model_stream_configs 4083 ] 4084 4085 components_mapping = [ 4086 self._create_component_from_model( 4087 model=components_mapping_definition_model, 4088 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4089 components_mapping_definition_model.value_type 4090 ), 4091 config=config, 4092 parameters=model.parameters, 4093 ) 4094 for components_mapping_definition_model in model.components_mapping 4095 ] 4096 4097 return ConfigComponentsResolver( 4098 stream_configs=stream_configs, 4099 config=config, 4100 components_mapping=components_mapping, 4101 parameters=model.parameters or {}, 4102 ) 4103 4104 def create_parametrized_components_resolver( 4105 self, 4106 model: ParametrizedComponentsResolverModel, 4107 config: Config, 4108 ) -> ParametrizedComponentsResolver: 4109 stream_parameters = StreamParametersDefinition( 4110 list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream 4111 ) 4112 4113 components_mapping = [] 4114 for components_mapping_definition_model in model.components_mapping: 4115 if components_mapping_definition_model.condition: 4116 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 4117 components_mapping.append( 4118 self._create_component_from_model( 4119 model=components_mapping_definition_model, 4120 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4121 components_mapping_definition_model.value_type 4122 ), 4123 config=config, 4124 ) 4125 ) 4126 return ParametrizedComponentsResolver( 4127 stream_parameters=stream_parameters, 4128 config=config, 4129 components_mapping=components_mapping, 4130 parameters=model.parameters or {}, 4131 ) 4132 4133 _UNSUPPORTED_DECODER_ERROR = ( 4134 "Specified decoder of {decoder_type} is not supported for pagination." 4135 "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead." 4136 "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`." 4137 ) 4138 4139 def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool: 4140 if isinstance(decoder, (JsonDecoder, XmlDecoder)): 4141 return True 4142 elif isinstance(decoder, CompositeRawDecoder): 4143 return self._is_supported_parser_for_pagination(decoder.parser) 4144 else: 4145 return False 4146 4147 def _is_supported_parser_for_pagination(self, parser: Parser) -> bool: 4148 if isinstance(parser, JsonParser): 4149 return True 4150 elif isinstance(parser, GzipParser): 4151 return isinstance(parser.inner_parser, JsonParser) 4152 else: 4153 return False 4154 4155 def create_http_api_budget( 4156 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 4157 ) -> HttpAPIBudget: 4158 policies = [ 4159 self._create_component_from_model(model=policy, config=config) 4160 for policy in model.policies 4161 ] 4162 4163 return HttpAPIBudget( 4164 policies=policies, 4165 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 4166 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 4167 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 4168 ) 4169 4170 def create_fixed_window_call_rate_policy( 4171 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 4172 ) -> FixedWindowCallRatePolicy: 4173 matchers = [ 4174 self._create_component_from_model(model=matcher, config=config) 4175 for matcher in model.matchers 4176 ] 4177 4178 # Set the initial reset timestamp to 10 days from now. 4179 # This value will be updated by the first request. 4180 return FixedWindowCallRatePolicy( 4181 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 4182 period=parse_duration(model.period), 4183 call_limit=model.call_limit, 4184 matchers=matchers, 4185 ) 4186 4187 def create_file_uploader( 4188 self, model: FileUploaderModel, config: Config, **kwargs: Any 4189 ) -> FileUploader: 4190 name = "File Uploader" 4191 requester = self._create_component_from_model( 4192 model=model.requester, 4193 config=config, 4194 name=name, 4195 **kwargs, 4196 ) 4197 download_target_extractor = self._create_component_from_model( 4198 model=model.download_target_extractor, 4199 config=config, 4200 name=name, 4201 **kwargs, 4202 ) 4203 emit_connector_builder_messages = self._emit_connector_builder_messages 4204 file_uploader = DefaultFileUploader( 4205 requester=requester, 4206 download_target_extractor=download_target_extractor, 4207 config=config, 4208 file_writer=NoopFileWriter() 4209 if emit_connector_builder_messages 4210 else LocalFileSystemFileWriter(), 4211 parameters=model.parameters or {}, 4212 filename_extractor=model.filename_extractor if model.filename_extractor else None, 4213 ) 4214 4215 return ( 4216 ConnectorBuilderFileUploader(file_uploader) 4217 if emit_connector_builder_messages 4218 else file_uploader 4219 ) 4220 4221 def create_moving_window_call_rate_policy( 4222 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 4223 ) -> MovingWindowCallRatePolicy: 4224 rates = [ 4225 self._create_component_from_model(model=rate, config=config) for rate in model.rates 4226 ] 4227 matchers = [ 4228 self._create_component_from_model(model=matcher, config=config) 4229 for matcher in model.matchers 4230 ] 4231 return MovingWindowCallRatePolicy( 4232 rates=rates, 4233 matchers=matchers, 4234 ) 4235 4236 def create_unlimited_call_rate_policy( 4237 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 4238 ) -> UnlimitedCallRatePolicy: 4239 matchers = [ 4240 self._create_component_from_model(model=matcher, config=config) 4241 for matcher in model.matchers 4242 ] 4243 4244 return UnlimitedCallRatePolicy( 4245 matchers=matchers, 4246 ) 4247 4248 def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate: 4249 interpolated_limit = InterpolatedString.create(str(model.limit), parameters={}) 4250 return Rate( 4251 limit=int(interpolated_limit.eval(config=config)), 4252 interval=parse_duration(model.interval), 4253 ) 4254 4255 def create_http_request_matcher( 4256 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 4257 ) -> HttpRequestRegexMatcher: 4258 return HttpRequestRegexMatcher( 4259 method=model.method, 4260 url_base=model.url_base, 4261 url_path_pattern=model.url_path_pattern, 4262 params=model.params, 4263 headers=model.headers, 4264 ) 4265 4266 def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None: 4267 self._api_budget = self.create_component( 4268 model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config 4269 ) 4270 4271 def create_grouping_partition_router( 4272 self, 4273 model: GroupingPartitionRouterModel, 4274 config: Config, 4275 *, 4276 stream_name: str, 4277 **kwargs: Any, 4278 ) -> GroupingPartitionRouter: 4279 underlying_router = self._create_component_from_model( 4280 model=model.underlying_partition_router, 4281 config=config, 4282 stream_name=stream_name, 4283 **kwargs, 4284 ) 4285 if model.group_size < 1: 4286 raise ValueError(f"Group size must be greater than 0, got {model.group_size}") 4287 4288 # Request options in underlying partition routers are not supported for GroupingPartitionRouter 4289 # because they are specific to individual partitions and cannot be aggregated or handled 4290 # when grouping, potentially leading to incorrect API calls. Any request customization 4291 # should be managed at the stream level through the requester's configuration. 4292 if isinstance(underlying_router, SubstreamPartitionRouter): 4293 if any( 4294 parent_config.request_option 4295 for parent_config in underlying_router.parent_stream_configs 4296 ): 4297 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4298 4299 if isinstance(underlying_router, ListPartitionRouter): 4300 if underlying_router.request_option: 4301 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4302 4303 return GroupingPartitionRouter( 4304 group_size=model.group_size, 4305 underlying_partition_router=underlying_router, 4306 deduplicate=model.deduplicate if model.deduplicate is not None else True, 4307 config=config, 4308 ) 4309 4310 def _ensure_query_properties_to_model( 4311 self, requester: Union[HttpRequesterModel, CustomRequesterModel] 4312 ) -> None: 4313 """ 4314 For some reason, it seems like CustomRequesterModel request_parameters stays as dictionaries which means that 4315 the other conditions relying on it being QueryPropertiesModel instead of a dict fail. Here, we migrate them to 4316 proper model. 4317 """ 4318 if not hasattr(requester, "request_parameters"): 4319 return 4320 4321 request_parameters = requester.request_parameters 4322 if request_parameters and isinstance(request_parameters, Dict): 4323 for request_parameter_key in request_parameters.keys(): 4324 request_parameter = request_parameters[request_parameter_key] 4325 if ( 4326 isinstance(request_parameter, Dict) 4327 and request_parameter.get("type") == "QueryProperties" 4328 ): 4329 request_parameters[request_parameter_key] = QueryPropertiesModel.parse_obj( 4330 request_parameter 4331 ) 4332 4333 def _get_catalog_defined_cursor_field( 4334 self, stream_name: str, allow_catalog_defined_cursor_field: bool 4335 ) -> Optional[CursorField]: 4336 if not allow_catalog_defined_cursor_field: 4337 return None 4338 4339 configured_stream = self._stream_name_to_configured_stream.get(stream_name) 4340 4341 # Depending on the operation is being performed, there may not be a configured stream yet. In this 4342 # case we return None which will then use the default cursor field defined on the cursor model 4343 if not configured_stream or not configured_stream.cursor_field: 4344 return None 4345 elif len(configured_stream.cursor_field) > 1: 4346 raise ValueError( 4347 f"The `{stream_name}` stream does not support nested cursor_field. Please specify only a single cursor_field for the stream in the configured catalog." 4348 ) 4349 else: 4350 return CursorField( 4351 cursor_field_key=configured_stream.cursor_field[0], 4352 supports_catalog_defined_cursor_field=allow_catalog_defined_cursor_field, 4353 )
671class ModelToComponentFactory: 672 EPOCH_DATETIME_FORMAT = "%s" 673 674 def __init__( 675 self, 676 limit_pages_fetched_per_slice: Optional[int] = None, 677 limit_slices_fetched: Optional[int] = None, 678 emit_connector_builder_messages: bool = False, 679 disable_retries: bool = False, 680 disable_cache: bool = False, 681 message_repository: Optional[MessageRepository] = None, 682 connector_state_manager: Optional[ConnectorStateManager] = None, 683 max_concurrent_async_job_count: Optional[int] = None, 684 configured_catalog: Optional[ConfiguredAirbyteCatalog] = None, 685 api_budget: Optional[APIBudget] = None, 686 ): 687 self._init_mappings() 688 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 689 self._limit_slices_fetched = limit_slices_fetched 690 self._emit_connector_builder_messages = emit_connector_builder_messages 691 self._disable_retries = disable_retries 692 self._disable_cache = disable_cache 693 self._message_repository = message_repository or InMemoryMessageRepository( 694 self._evaluate_log_level(emit_connector_builder_messages) 695 ) 696 self._stream_name_to_configured_stream = self._create_stream_name_to_configured_stream( 697 configured_catalog 698 ) 699 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 700 self._api_budget: Optional[Union[APIBudget]] = api_budget 701 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 702 # placeholder for deprecation warnings 703 self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = [] 704 705 def _init_mappings(self) -> None: 706 self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = { 707 AddedFieldDefinitionModel: self.create_added_field_definition, 708 AddFieldsModel: self.create_add_fields, 709 ApiKeyAuthenticatorModel: self.create_api_key_authenticator, 710 BasicHttpAuthenticatorModel: self.create_basic_http_authenticator, 711 BearerAuthenticatorModel: self.create_bearer_authenticator, 712 CheckStreamModel: self.create_check_stream, 713 DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config, 714 CheckDynamicStreamModel: self.create_check_dynamic_stream, 715 CompositeErrorHandlerModel: self.create_composite_error_handler, 716 ConcurrencyLevelModel: self.create_concurrency_level, 717 ConfigMigrationModel: self.create_config_migration, 718 ConfigAddFieldsModel: self.create_config_add_fields, 719 ConfigRemapFieldModel: self.create_config_remap_field, 720 ConfigRemoveFieldsModel: self.create_config_remove_fields, 721 ConstantBackoffStrategyModel: self.create_constant_backoff_strategy, 722 CsvDecoderModel: self.create_csv_decoder, 723 CursorPaginationModel: self.create_cursor_pagination, 724 CustomAuthenticatorModel: self.create_custom_component, 725 CustomBackoffStrategyModel: self.create_custom_component, 726 CustomDecoderModel: self.create_custom_component, 727 CustomErrorHandlerModel: self.create_custom_component, 728 CustomRecordExtractorModel: self.create_custom_component, 729 CustomRecordFilterModel: self.create_custom_component, 730 CustomRequesterModel: self.create_custom_component, 731 CustomRetrieverModel: self.create_custom_component, 732 CustomSchemaLoader: self.create_custom_component, 733 CustomSchemaNormalizationModel: self.create_custom_component, 734 CustomStateMigration: self.create_custom_component, 735 CustomPaginationStrategyModel: self.create_custom_component, 736 CustomPartitionRouterModel: self.create_custom_component, 737 CustomTransformationModel: self.create_custom_component, 738 CustomValidationStrategyModel: self.create_custom_component, 739 CustomConfigTransformationModel: self.create_custom_component, 740 DeclarativeStreamModel: self.create_default_stream, 741 DefaultErrorHandlerModel: self.create_default_error_handler, 742 DefaultPaginatorModel: self.create_default_paginator, 743 DpathExtractorModel: self.create_dpath_extractor, 744 DpathValidatorModel: self.create_dpath_validator, 745 ResponseToFileExtractorModel: self.create_response_to_file_extractor, 746 ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy, 747 SessionTokenAuthenticatorModel: self.create_session_token_authenticator, 748 GroupByKeyMergeStrategyModel: self.create_group_by_key, 749 HttpRequesterModel: self.create_http_requester, 750 HttpResponseFilterModel: self.create_http_response_filter, 751 InlineSchemaLoaderModel: self.create_inline_schema_loader, 752 JsonDecoderModel: self.create_json_decoder, 753 JsonlDecoderModel: self.create_jsonl_decoder, 754 JsonSchemaPropertySelectorModel: self.create_json_schema_property_selector, 755 GzipDecoderModel: self.create_gzip_decoder, 756 KeysToLowerModel: self.create_keys_to_lower_transformation, 757 KeysToSnakeCaseModel: self.create_keys_to_snake_transformation, 758 KeysReplaceModel: self.create_keys_replace_transformation, 759 FlattenFieldsModel: self.create_flatten_fields, 760 DpathFlattenFieldsModel: self.create_dpath_flatten_fields, 761 IterableDecoderModel: self.create_iterable_decoder, 762 XmlDecoderModel: self.create_xml_decoder, 763 JsonFileSchemaLoaderModel: self.create_json_file_schema_loader, 764 DynamicSchemaLoaderModel: self.create_dynamic_schema_loader, 765 SchemaTypeIdentifierModel: self.create_schema_type_identifier, 766 TypesMapModel: self.create_types_map, 767 ComplexFieldTypeModel: self.create_complex_field_type, 768 JwtAuthenticatorModel: self.create_jwt_authenticator, 769 LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration, 770 ListPartitionRouterModel: self.create_list_partition_router, 771 MinMaxDatetimeModel: self.create_min_max_datetime, 772 NoAuthModel: self.create_no_auth, 773 NoPaginationModel: self.create_no_pagination, 774 OAuthAuthenticatorModel: self.create_oauth_authenticator, 775 OffsetIncrementModel: self.create_offset_increment, 776 PageIncrementModel: self.create_page_increment, 777 ParentStreamConfigModel: self.create_parent_stream_config_with_substream_wrapper, 778 PredicateValidatorModel: self.create_predicate_validator, 779 PropertiesFromEndpointModel: self.create_properties_from_endpoint, 780 PropertyChunkingModel: self.create_property_chunking, 781 QueryPropertiesModel: self.create_query_properties, 782 RecordFilterModel: self.create_record_filter, 783 RecordSelectorModel: self.create_record_selector, 784 RemoveFieldsModel: self.create_remove_fields, 785 RequestPathModel: self.create_request_path, 786 RequestOptionModel: self.create_request_option, 787 LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator, 788 SelectiveAuthenticatorModel: self.create_selective_authenticator, 789 SimpleRetrieverModel: self.create_simple_retriever, 790 StateDelegatingStreamModel: self.create_state_delegating_stream, 791 SpecModel: self.create_spec, 792 SubstreamPartitionRouterModel: self.create_substream_partition_router, 793 ValidateAdheresToSchemaModel: self.create_validate_adheres_to_schema, 794 WaitTimeFromHeaderModel: self.create_wait_time_from_header, 795 WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header, 796 AsyncRetrieverModel: self.create_async_retriever, 797 HttpComponentsResolverModel: self.create_http_components_resolver, 798 ConfigComponentsResolverModel: self.create_config_components_resolver, 799 ParametrizedComponentsResolverModel: self.create_parametrized_components_resolver, 800 StreamConfigModel: self.create_stream_config, 801 ComponentMappingDefinitionModel: self.create_components_mapping_definition, 802 ZipfileDecoderModel: self.create_zipfile_decoder, 803 HTTPAPIBudgetModel: self.create_http_api_budget, 804 FileUploaderModel: self.create_file_uploader, 805 FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy, 806 MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy, 807 UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy, 808 RateModel: self.create_rate, 809 HttpRequestRegexMatcherModel: self.create_http_request_matcher, 810 GroupingPartitionRouterModel: self.create_grouping_partition_router, 811 } 812 813 # Needed for the case where we need to perform a second parse on the fields of a custom component 814 self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR} 815 816 @staticmethod 817 def _create_stream_name_to_configured_stream( 818 configured_catalog: Optional[ConfiguredAirbyteCatalog], 819 ) -> Mapping[str, ConfiguredAirbyteStream]: 820 return ( 821 {stream.stream.name: stream for stream in configured_catalog.streams} 822 if configured_catalog 823 else {} 824 ) 825 826 def create_component( 827 self, 828 model_type: Type[BaseModel], 829 component_definition: ComponentDefinition, 830 config: Config, 831 **kwargs: Any, 832 ) -> Any: 833 """ 834 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 835 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 836 creating declarative components from that model. 837 838 :param model_type: The type of declarative component that is being initialized 839 :param component_definition: The mapping that represents a declarative component 840 :param config: The connector config that is provided by the customer 841 :return: The declarative component to be used at runtime 842 """ 843 844 component_type = component_definition.get("type") 845 if component_definition.get("type") != model_type.__name__: 846 raise ValueError( 847 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 848 ) 849 850 declarative_component_model = model_type.parse_obj(component_definition) 851 852 if not isinstance(declarative_component_model, model_type): 853 raise ValueError( 854 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 855 ) 856 857 return self._create_component_from_model( 858 model=declarative_component_model, config=config, **kwargs 859 ) 860 861 def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: 862 if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: 863 raise ValueError( 864 f"{model.__class__} with attributes {model} is not a valid component type" 865 ) 866 component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) 867 if not component_constructor: 868 raise ValueError(f"Could not find constructor for {model.__class__}") 869 870 # collect deprecation warnings for supported models. 871 if isinstance(model, BaseModelWithDeprecations): 872 self._collect_model_deprecations(model) 873 874 return component_constructor(model=model, config=config, **kwargs) 875 876 def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]: 877 """ 878 Returns the deprecation warnings that were collected during the creation of components. 879 """ 880 return self._collected_deprecation_logs 881 882 def _collect_model_deprecations(self, model: BaseModelWithDeprecations) -> None: 883 """ 884 Collects deprecation logs from the given model and appends any new logs to the internal collection. 885 886 This method checks if the provided model has deprecation logs (identified by the presence of the DEPRECATION_LOGS_TAG attribute and a non-None `_deprecation_logs` property). It iterates through each deprecation log in the model and appends it to the `_collected_deprecation_logs` list if it has not already been collected, ensuring that duplicate logs are avoided. 887 888 Args: 889 model (BaseModelWithDeprecations): The model instance from which to collect deprecation logs. 890 """ 891 if hasattr(model, DEPRECATION_LOGS_TAG) and model._deprecation_logs is not None: 892 for log in model._deprecation_logs: 893 # avoid duplicates for deprecation logs observed. 894 if log not in self._collected_deprecation_logs: 895 self._collected_deprecation_logs.append(log) 896 897 def create_config_migration( 898 self, model: ConfigMigrationModel, config: Config 899 ) -> ConfigMigration: 900 transformations: List[ConfigTransformation] = [ 901 self._create_component_from_model(transformation, config) 902 for transformation in model.transformations 903 ] 904 905 return ConfigMigration( 906 description=model.description, 907 transformations=transformations, 908 ) 909 910 def create_config_add_fields( 911 self, model: ConfigAddFieldsModel, config: Config, **kwargs: Any 912 ) -> ConfigAddFields: 913 fields = [self._create_component_from_model(field, config) for field in model.fields] 914 return ConfigAddFields( 915 fields=fields, 916 condition=model.condition or "", 917 ) 918 919 @staticmethod 920 def create_config_remove_fields( 921 model: ConfigRemoveFieldsModel, config: Config, **kwargs: Any 922 ) -> ConfigRemoveFields: 923 return ConfigRemoveFields( 924 field_pointers=model.field_pointers, 925 condition=model.condition or "", 926 ) 927 928 @staticmethod 929 def create_config_remap_field( 930 model: ConfigRemapFieldModel, config: Config, **kwargs: Any 931 ) -> ConfigRemapField: 932 mapping = cast(Mapping[str, Any], model.map) 933 return ConfigRemapField( 934 map=mapping, 935 field_path=model.field_path, 936 config=config, 937 ) 938 939 def create_dpath_validator(self, model: DpathValidatorModel, config: Config) -> DpathValidator: 940 strategy = self._create_component_from_model(model.validation_strategy, config) 941 942 return DpathValidator( 943 field_path=model.field_path, 944 strategy=strategy, 945 ) 946 947 def create_predicate_validator( 948 self, model: PredicateValidatorModel, config: Config 949 ) -> PredicateValidator: 950 strategy = self._create_component_from_model(model.validation_strategy, config) 951 952 return PredicateValidator( 953 value=model.value, 954 strategy=strategy, 955 ) 956 957 @staticmethod 958 def create_validate_adheres_to_schema( 959 model: ValidateAdheresToSchemaModel, config: Config, **kwargs: Any 960 ) -> ValidateAdheresToSchema: 961 base_schema = cast(Mapping[str, Any], model.base_schema) 962 return ValidateAdheresToSchema( 963 schema=base_schema, 964 ) 965 966 @staticmethod 967 def create_added_field_definition( 968 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 969 ) -> AddedFieldDefinition: 970 interpolated_value = InterpolatedString.create( 971 model.value, parameters=model.parameters or {} 972 ) 973 return AddedFieldDefinition( 974 path=model.path, 975 value=interpolated_value, 976 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 977 parameters=model.parameters or {}, 978 ) 979 980 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 981 added_field_definitions = [ 982 self._create_component_from_model( 983 model=added_field_definition_model, 984 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 985 added_field_definition_model.value_type 986 ), 987 config=config, 988 ) 989 for added_field_definition_model in model.fields 990 ] 991 return AddFields( 992 fields=added_field_definitions, 993 condition=model.condition or "", 994 parameters=model.parameters or {}, 995 ) 996 997 def create_keys_to_lower_transformation( 998 self, model: KeysToLowerModel, config: Config, **kwargs: Any 999 ) -> KeysToLowerTransformation: 1000 return KeysToLowerTransformation() 1001 1002 def create_keys_to_snake_transformation( 1003 self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any 1004 ) -> KeysToSnakeCaseTransformation: 1005 return KeysToSnakeCaseTransformation() 1006 1007 def create_keys_replace_transformation( 1008 self, model: KeysReplaceModel, config: Config, **kwargs: Any 1009 ) -> KeysReplaceTransformation: 1010 return KeysReplaceTransformation( 1011 old=model.old, new=model.new, parameters=model.parameters or {} 1012 ) 1013 1014 def create_flatten_fields( 1015 self, model: FlattenFieldsModel, config: Config, **kwargs: Any 1016 ) -> FlattenFields: 1017 return FlattenFields( 1018 flatten_lists=model.flatten_lists if model.flatten_lists is not None else True 1019 ) 1020 1021 def create_dpath_flatten_fields( 1022 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 1023 ) -> DpathFlattenFields: 1024 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 1025 key_transformation = ( 1026 KeyTransformation( 1027 config=config, 1028 prefix=model.key_transformation.prefix, 1029 suffix=model.key_transformation.suffix, 1030 parameters=model.parameters or {}, 1031 ) 1032 if model.key_transformation is not None 1033 else None 1034 ) 1035 return DpathFlattenFields( 1036 config=config, 1037 field_path=model_field_path, 1038 delete_origin_value=model.delete_origin_value 1039 if model.delete_origin_value is not None 1040 else False, 1041 replace_record=model.replace_record if model.replace_record is not None else False, 1042 key_transformation=key_transformation, 1043 parameters=model.parameters or {}, 1044 ) 1045 1046 @staticmethod 1047 def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: 1048 if not value_type: 1049 return None 1050 names_to_types = { 1051 ValueType.string: str, 1052 ValueType.number: float, 1053 ValueType.integer: int, 1054 ValueType.boolean: bool, 1055 } 1056 return names_to_types[value_type] 1057 1058 def create_api_key_authenticator( 1059 self, 1060 model: ApiKeyAuthenticatorModel, 1061 config: Config, 1062 token_provider: Optional[TokenProvider] = None, 1063 **kwargs: Any, 1064 ) -> ApiKeyAuthenticator: 1065 if model.inject_into is None and model.header is None: 1066 raise ValueError( 1067 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 1068 ) 1069 1070 if model.inject_into is not None and model.header is not None: 1071 raise ValueError( 1072 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 1073 ) 1074 1075 if token_provider is not None and model.api_token != "": 1076 raise ValueError( 1077 "If token_provider is set, api_token is ignored and has to be set to empty string." 1078 ) 1079 1080 request_option = ( 1081 self._create_component_from_model( 1082 model.inject_into, config, parameters=model.parameters or {} 1083 ) 1084 if model.inject_into 1085 else RequestOption( 1086 inject_into=RequestOptionType.header, 1087 field_name=model.header or "", 1088 parameters=model.parameters or {}, 1089 ) 1090 ) 1091 1092 return ApiKeyAuthenticator( 1093 token_provider=( 1094 token_provider 1095 if token_provider is not None 1096 else InterpolatedStringTokenProvider( 1097 api_token=model.api_token or "", 1098 config=config, 1099 parameters=model.parameters or {}, 1100 ) 1101 ), 1102 request_option=request_option, 1103 config=config, 1104 parameters=model.parameters or {}, 1105 ) 1106 1107 def create_legacy_to_per_partition_state_migration( 1108 self, 1109 model: LegacyToPerPartitionStateMigrationModel, 1110 config: Mapping[str, Any], 1111 declarative_stream: DeclarativeStreamModel, 1112 ) -> LegacyToPerPartitionStateMigration: 1113 retriever = declarative_stream.retriever 1114 if not isinstance(retriever, (SimpleRetrieverModel, AsyncRetrieverModel)): 1115 raise ValueError( 1116 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever or AsyncRetriever. Got {type(retriever)}" 1117 ) 1118 partition_router = retriever.partition_router 1119 if not isinstance( 1120 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 1121 ): 1122 raise ValueError( 1123 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 1124 ) 1125 if not hasattr(partition_router, "parent_stream_configs"): 1126 raise ValueError( 1127 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 1128 ) 1129 1130 if not hasattr(declarative_stream, "incremental_sync"): 1131 raise ValueError( 1132 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 1133 ) 1134 1135 return LegacyToPerPartitionStateMigration( 1136 partition_router, # type: ignore # was already checked above 1137 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 1138 config, 1139 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 1140 ) 1141 1142 def create_session_token_authenticator( 1143 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 1144 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 1145 decoder = ( 1146 self._create_component_from_model(model=model.decoder, config=config) 1147 if model.decoder 1148 else JsonDecoder(parameters={}) 1149 ) 1150 login_requester = self._create_component_from_model( 1151 model=model.login_requester, 1152 config=config, 1153 name=f"{name}_login_requester", 1154 decoder=decoder, 1155 ) 1156 token_provider = SessionTokenProvider( 1157 login_requester=login_requester, 1158 session_token_path=model.session_token_path, 1159 expiration_duration=parse_duration(model.expiration_duration) 1160 if model.expiration_duration 1161 else None, 1162 parameters=model.parameters or {}, 1163 message_repository=self._message_repository, 1164 decoder=decoder, 1165 ) 1166 if model.request_authentication.type == "Bearer": 1167 return ModelToComponentFactory.create_bearer_authenticator( 1168 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 1169 config, 1170 token_provider=token_provider, 1171 ) 1172 else: 1173 return self.create_api_key_authenticator( 1174 ApiKeyAuthenticatorModel( 1175 type="ApiKeyAuthenticator", 1176 api_token="", 1177 inject_into=model.request_authentication.inject_into, 1178 ), # type: ignore # $parameters and headers default to None 1179 config=config, 1180 token_provider=token_provider, 1181 ) 1182 1183 @staticmethod 1184 def create_basic_http_authenticator( 1185 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 1186 ) -> BasicHttpAuthenticator: 1187 return BasicHttpAuthenticator( 1188 password=model.password or "", 1189 username=model.username, 1190 config=config, 1191 parameters=model.parameters or {}, 1192 ) 1193 1194 @staticmethod 1195 def create_bearer_authenticator( 1196 model: BearerAuthenticatorModel, 1197 config: Config, 1198 token_provider: Optional[TokenProvider] = None, 1199 **kwargs: Any, 1200 ) -> BearerAuthenticator: 1201 if token_provider is not None and model.api_token != "": 1202 raise ValueError( 1203 "If token_provider is set, api_token is ignored and has to be set to empty string." 1204 ) 1205 return BearerAuthenticator( 1206 token_provider=( 1207 token_provider 1208 if token_provider is not None 1209 else InterpolatedStringTokenProvider( 1210 api_token=model.api_token or "", 1211 config=config, 1212 parameters=model.parameters or {}, 1213 ) 1214 ), 1215 config=config, 1216 parameters=model.parameters or {}, 1217 ) 1218 1219 @staticmethod 1220 def create_dynamic_stream_check_config( 1221 model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any 1222 ) -> DynamicStreamCheckConfig: 1223 return DynamicStreamCheckConfig( 1224 dynamic_stream_name=model.dynamic_stream_name, 1225 stream_count=model.stream_count or 0, 1226 ) 1227 1228 def create_check_stream( 1229 self, model: CheckStreamModel, config: Config, **kwargs: Any 1230 ) -> CheckStream: 1231 if model.dynamic_streams_check_configs is None and model.stream_names is None: 1232 raise ValueError( 1233 "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream" 1234 ) 1235 1236 dynamic_streams_check_configs = ( 1237 [ 1238 self._create_component_from_model(model=dynamic_stream_check_config, config=config) 1239 for dynamic_stream_check_config in model.dynamic_streams_check_configs 1240 ] 1241 if model.dynamic_streams_check_configs 1242 else [] 1243 ) 1244 1245 return CheckStream( 1246 stream_names=model.stream_names or [], 1247 dynamic_streams_check_configs=dynamic_streams_check_configs, 1248 parameters={}, 1249 ) 1250 1251 @staticmethod 1252 def create_check_dynamic_stream( 1253 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 1254 ) -> CheckDynamicStream: 1255 assert model.use_check_availability is not None # for mypy 1256 1257 use_check_availability = model.use_check_availability 1258 1259 return CheckDynamicStream( 1260 stream_count=model.stream_count, 1261 use_check_availability=use_check_availability, 1262 parameters={}, 1263 ) 1264 1265 def create_composite_error_handler( 1266 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 1267 ) -> CompositeErrorHandler: 1268 error_handlers = [ 1269 self._create_component_from_model(model=error_handler_model, config=config) 1270 for error_handler_model in model.error_handlers 1271 ] 1272 return CompositeErrorHandler( 1273 error_handlers=error_handlers, parameters=model.parameters or {} 1274 ) 1275 1276 @staticmethod 1277 def create_concurrency_level( 1278 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 1279 ) -> ConcurrencyLevel: 1280 return ConcurrencyLevel( 1281 default_concurrency=model.default_concurrency, 1282 max_concurrency=model.max_concurrency, 1283 config=config, 1284 parameters={}, 1285 ) 1286 1287 @staticmethod 1288 def apply_stream_state_migrations( 1289 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 1290 ) -> MutableMapping[str, Any]: 1291 if stream_state_migrations: 1292 for state_migration in stream_state_migrations: 1293 if state_migration.should_migrate(stream_state): 1294 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 1295 stream_state = dict(state_migration.migrate(stream_state)) 1296 return stream_state 1297 1298 def create_concurrent_cursor_from_datetime_based_cursor( 1299 self, 1300 model_type: Type[BaseModel], 1301 component_definition: ComponentDefinition, 1302 stream_name: str, 1303 stream_namespace: Optional[str], 1304 stream_state: MutableMapping[str, Any], 1305 config: Config, 1306 message_repository: Optional[MessageRepository] = None, 1307 runtime_lookback_window: Optional[datetime.timedelta] = None, 1308 **kwargs: Any, 1309 ) -> ConcurrentCursor: 1310 component_type = component_definition.get("type") 1311 if component_definition.get("type") != model_type.__name__: 1312 raise ValueError( 1313 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1314 ) 1315 1316 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1317 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1318 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1319 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1320 if "$parameters" not in component_definition and "parameters" in component_definition: 1321 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1322 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1323 1324 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1325 raise ValueError( 1326 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1327 ) 1328 1329 model_parameters = datetime_based_cursor_model.parameters or {} 1330 1331 cursor_field = self._get_catalog_defined_cursor_field( 1332 stream_name=stream_name, 1333 allow_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1334 or False, 1335 ) 1336 1337 if not cursor_field: 1338 interpolated_cursor_field = InterpolatedString.create( 1339 datetime_based_cursor_model.cursor_field, 1340 parameters=model_parameters, 1341 ) 1342 cursor_field = CursorField( 1343 cursor_field_key=interpolated_cursor_field.eval(config=config), 1344 supports_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1345 or False, 1346 ) 1347 1348 interpolated_partition_field_start = InterpolatedString.create( 1349 datetime_based_cursor_model.partition_field_start or "start_time", 1350 parameters=model_parameters, 1351 ) 1352 interpolated_partition_field_end = InterpolatedString.create( 1353 datetime_based_cursor_model.partition_field_end or "end_time", 1354 parameters=model_parameters, 1355 ) 1356 1357 slice_boundary_fields = ( 1358 interpolated_partition_field_start.eval(config=config), 1359 interpolated_partition_field_end.eval(config=config), 1360 ) 1361 1362 datetime_format = datetime_based_cursor_model.datetime_format 1363 1364 cursor_granularity = ( 1365 parse_duration(datetime_based_cursor_model.cursor_granularity) 1366 if datetime_based_cursor_model.cursor_granularity 1367 else None 1368 ) 1369 1370 lookback_window = None 1371 interpolated_lookback_window = ( 1372 InterpolatedString.create( 1373 datetime_based_cursor_model.lookback_window, 1374 parameters=model_parameters, 1375 ) 1376 if datetime_based_cursor_model.lookback_window 1377 else None 1378 ) 1379 if interpolated_lookback_window: 1380 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1381 if evaluated_lookback_window: 1382 lookback_window = parse_duration(evaluated_lookback_window) 1383 1384 connector_state_converter: DateTimeStreamStateConverter 1385 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1386 datetime_format=datetime_format, 1387 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1388 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1389 cursor_granularity=cursor_granularity, 1390 ) 1391 1392 # Adjusts the stream state by applying the runtime lookback window. 1393 # This is used to ensure correct state handling in case of failed partitions. 1394 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1395 if runtime_lookback_window and stream_state_value: 1396 new_stream_state = ( 1397 connector_state_converter.parse_timestamp(stream_state_value) 1398 - runtime_lookback_window 1399 ) 1400 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1401 new_stream_state 1402 ) 1403 1404 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1405 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1406 start_date_runtime_value = self.create_min_max_datetime( 1407 model=datetime_based_cursor_model.start_datetime, config=config 1408 ) 1409 else: 1410 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1411 1412 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1413 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1414 end_date_runtime_value = self.create_min_max_datetime( 1415 model=datetime_based_cursor_model.end_datetime, config=config 1416 ) 1417 else: 1418 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1419 1420 interpolated_start_date = MinMaxDatetime.create( 1421 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1422 parameters=datetime_based_cursor_model.parameters, 1423 ) 1424 interpolated_end_date = ( 1425 None 1426 if not end_date_runtime_value 1427 else MinMaxDatetime.create( 1428 end_date_runtime_value, datetime_based_cursor_model.parameters 1429 ) 1430 ) 1431 1432 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1433 if not interpolated_start_date.datetime_format: 1434 interpolated_start_date.datetime_format = datetime_format 1435 if interpolated_end_date and not interpolated_end_date.datetime_format: 1436 interpolated_end_date.datetime_format = datetime_format 1437 1438 start_date = interpolated_start_date.get_datetime(config=config) 1439 end_date_provider = ( 1440 partial(interpolated_end_date.get_datetime, config) 1441 if interpolated_end_date 1442 else connector_state_converter.get_end_provider() 1443 ) 1444 1445 if ( 1446 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1447 ) or ( 1448 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1449 ): 1450 raise ValueError( 1451 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1452 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1453 ) 1454 1455 # When step is not defined, default to a step size from the starting date to the present moment 1456 step_length = datetime.timedelta.max 1457 interpolated_step = ( 1458 InterpolatedString.create( 1459 datetime_based_cursor_model.step, 1460 parameters=model_parameters, 1461 ) 1462 if datetime_based_cursor_model.step 1463 else None 1464 ) 1465 if interpolated_step: 1466 evaluated_step = interpolated_step.eval(config) 1467 if evaluated_step: 1468 step_length = parse_duration(evaluated_step) 1469 1470 clamping_strategy: ClampingStrategy = NoClamping() 1471 if datetime_based_cursor_model.clamping: 1472 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1473 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1474 # object which we want to keep agnostic of being low-code 1475 target = InterpolatedString( 1476 string=datetime_based_cursor_model.clamping.target, 1477 parameters=model_parameters, 1478 ) 1479 evaluated_target = target.eval(config=config) 1480 match evaluated_target: 1481 case "DAY": 1482 clamping_strategy = DayClampingStrategy() 1483 end_date_provider = ClampingEndProvider( 1484 DayClampingStrategy(is_ceiling=False), 1485 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1486 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1487 ) 1488 case "WEEK": 1489 if ( 1490 not datetime_based_cursor_model.clamping.target_details 1491 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1492 ): 1493 raise ValueError( 1494 "Given WEEK clamping, weekday needs to be provided as target_details" 1495 ) 1496 weekday = self._assemble_weekday( 1497 datetime_based_cursor_model.clamping.target_details["weekday"] 1498 ) 1499 clamping_strategy = WeekClampingStrategy(weekday) 1500 end_date_provider = ClampingEndProvider( 1501 WeekClampingStrategy(weekday, is_ceiling=False), 1502 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1503 granularity=cursor_granularity or datetime.timedelta(days=1), 1504 ) 1505 case "MONTH": 1506 clamping_strategy = MonthClampingStrategy() 1507 end_date_provider = ClampingEndProvider( 1508 MonthClampingStrategy(is_ceiling=False), 1509 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1510 granularity=cursor_granularity or datetime.timedelta(days=1), 1511 ) 1512 case _: 1513 raise ValueError( 1514 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1515 ) 1516 1517 return ConcurrentCursor( 1518 stream_name=stream_name, 1519 stream_namespace=stream_namespace, 1520 stream_state=stream_state, 1521 message_repository=message_repository or self._message_repository, 1522 connector_state_manager=self._connector_state_manager, 1523 connector_state_converter=connector_state_converter, 1524 cursor_field=cursor_field, 1525 slice_boundary_fields=slice_boundary_fields, 1526 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1527 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1528 lookback_window=lookback_window, 1529 slice_range=step_length, 1530 cursor_granularity=cursor_granularity, 1531 clamping_strategy=clamping_strategy, 1532 ) 1533 1534 def create_concurrent_cursor_from_incrementing_count_cursor( 1535 self, 1536 model_type: Type[BaseModel], 1537 component_definition: ComponentDefinition, 1538 stream_name: str, 1539 stream_namespace: Optional[str], 1540 stream_state: MutableMapping[str, Any], 1541 config: Config, 1542 message_repository: Optional[MessageRepository] = None, 1543 **kwargs: Any, 1544 ) -> ConcurrentCursor: 1545 component_type = component_definition.get("type") 1546 if component_definition.get("type") != model_type.__name__: 1547 raise ValueError( 1548 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1549 ) 1550 1551 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1552 1553 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1554 raise ValueError( 1555 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1556 ) 1557 1558 interpolated_start_value = ( 1559 InterpolatedString.create( 1560 incrementing_count_cursor_model.start_value, # type: ignore 1561 parameters=incrementing_count_cursor_model.parameters or {}, 1562 ) 1563 if incrementing_count_cursor_model.start_value 1564 else 0 1565 ) 1566 1567 cursor_field = self._get_catalog_defined_cursor_field( 1568 stream_name=stream_name, 1569 allow_catalog_defined_cursor_field=incrementing_count_cursor_model.allow_catalog_defined_cursor_field 1570 or False, 1571 ) 1572 1573 if not cursor_field: 1574 interpolated_cursor_field = InterpolatedString.create( 1575 incrementing_count_cursor_model.cursor_field, 1576 parameters=incrementing_count_cursor_model.parameters or {}, 1577 ) 1578 cursor_field = CursorField( 1579 cursor_field_key=interpolated_cursor_field.eval(config=config), 1580 supports_catalog_defined_cursor_field=incrementing_count_cursor_model.allow_catalog_defined_cursor_field 1581 or False, 1582 ) 1583 1584 connector_state_converter = IncrementingCountStreamStateConverter( 1585 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1586 ) 1587 1588 return ConcurrentCursor( 1589 stream_name=stream_name, 1590 stream_namespace=stream_namespace, 1591 stream_state=stream_state, 1592 message_repository=message_repository or self._message_repository, 1593 connector_state_manager=self._connector_state_manager, 1594 connector_state_converter=connector_state_converter, 1595 cursor_field=cursor_field, 1596 slice_boundary_fields=None, 1597 start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1598 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1599 ) 1600 1601 def _assemble_weekday(self, weekday: str) -> Weekday: 1602 match weekday: 1603 case "MONDAY": 1604 return Weekday.MONDAY 1605 case "TUESDAY": 1606 return Weekday.TUESDAY 1607 case "WEDNESDAY": 1608 return Weekday.WEDNESDAY 1609 case "THURSDAY": 1610 return Weekday.THURSDAY 1611 case "FRIDAY": 1612 return Weekday.FRIDAY 1613 case "SATURDAY": 1614 return Weekday.SATURDAY 1615 case "SUNDAY": 1616 return Weekday.SUNDAY 1617 case _: 1618 raise ValueError(f"Unknown weekday {weekday}") 1619 1620 def create_concurrent_cursor_from_perpartition_cursor( 1621 self, 1622 state_manager: ConnectorStateManager, 1623 model_type: Type[BaseModel], 1624 component_definition: ComponentDefinition, 1625 stream_name: str, 1626 stream_namespace: Optional[str], 1627 config: Config, 1628 stream_state: MutableMapping[str, Any], 1629 partition_router: PartitionRouter, 1630 attempt_to_create_cursor_if_not_provided: bool = False, 1631 **kwargs: Any, 1632 ) -> ConcurrentPerPartitionCursor: 1633 component_type = component_definition.get("type") 1634 if component_definition.get("type") != model_type.__name__: 1635 raise ValueError( 1636 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1637 ) 1638 1639 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1640 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1641 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1642 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1643 if "$parameters" not in component_definition and "parameters" in component_definition: 1644 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1645 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1646 1647 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1648 raise ValueError( 1649 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1650 ) 1651 1652 cursor_field = self._get_catalog_defined_cursor_field( 1653 stream_name=stream_name, 1654 allow_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1655 or False, 1656 ) 1657 1658 if not cursor_field: 1659 interpolated_cursor_field = InterpolatedString.create( 1660 datetime_based_cursor_model.cursor_field, 1661 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1662 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1663 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1664 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1665 parameters=datetime_based_cursor_model.parameters or {}, 1666 ) 1667 cursor_field = CursorField( 1668 cursor_field_key=interpolated_cursor_field.eval(config=config), 1669 supports_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1670 or False, 1671 ) 1672 1673 datetime_format = datetime_based_cursor_model.datetime_format 1674 1675 cursor_granularity = ( 1676 parse_duration(datetime_based_cursor_model.cursor_granularity) 1677 if datetime_based_cursor_model.cursor_granularity 1678 else None 1679 ) 1680 1681 connector_state_converter: DateTimeStreamStateConverter 1682 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1683 datetime_format=datetime_format, 1684 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1685 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1686 cursor_granularity=cursor_granularity, 1687 ) 1688 1689 # Create the cursor factory 1690 cursor_factory = ConcurrentCursorFactory( 1691 partial( 1692 self.create_concurrent_cursor_from_datetime_based_cursor, 1693 state_manager=state_manager, 1694 model_type=model_type, 1695 component_definition=component_definition, 1696 stream_name=stream_name, 1697 stream_namespace=stream_namespace, 1698 config=config, 1699 message_repository=NoopMessageRepository(), 1700 ) 1701 ) 1702 1703 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state 1704 use_global_cursor = isinstance( 1705 partition_router, GroupingPartitionRouter 1706 ) or component_definition.get("global_substream_cursor", False) 1707 1708 # Return the concurrent cursor and state converter 1709 return ConcurrentPerPartitionCursor( 1710 cursor_factory=cursor_factory, 1711 partition_router=partition_router, 1712 stream_name=stream_name, 1713 stream_namespace=stream_namespace, 1714 stream_state=stream_state, 1715 message_repository=self._message_repository, # type: ignore 1716 connector_state_manager=state_manager, 1717 connector_state_converter=connector_state_converter, 1718 cursor_field=cursor_field, 1719 use_global_cursor=use_global_cursor, 1720 attempt_to_create_cursor_if_not_provided=attempt_to_create_cursor_if_not_provided, 1721 ) 1722 1723 @staticmethod 1724 def create_constant_backoff_strategy( 1725 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1726 ) -> ConstantBackoffStrategy: 1727 return ConstantBackoffStrategy( 1728 backoff_time_in_seconds=model.backoff_time_in_seconds, 1729 config=config, 1730 parameters=model.parameters or {}, 1731 ) 1732 1733 def create_cursor_pagination( 1734 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1735 ) -> CursorPaginationStrategy: 1736 if isinstance(decoder, PaginationDecoderDecorator): 1737 inner_decoder = decoder.decoder 1738 else: 1739 inner_decoder = decoder 1740 decoder = PaginationDecoderDecorator(decoder=decoder) 1741 1742 if self._is_supported_decoder_for_pagination(inner_decoder): 1743 decoder_to_use = decoder 1744 else: 1745 raise ValueError( 1746 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1747 ) 1748 1749 return CursorPaginationStrategy( 1750 cursor_value=model.cursor_value, 1751 decoder=decoder_to_use, 1752 page_size=model.page_size, 1753 stop_condition=model.stop_condition, 1754 config=config, 1755 parameters=model.parameters or {}, 1756 ) 1757 1758 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1759 """ 1760 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1761 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1762 :param model: The Pydantic model of the custom component being created 1763 :param config: The custom defined connector config 1764 :return: The declarative component built from the Pydantic model to be used at runtime 1765 """ 1766 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1767 component_fields = get_type_hints(custom_component_class) 1768 model_args = model.dict() 1769 model_args["config"] = config 1770 1771 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1772 # we defer to these arguments over the component's definition 1773 for key, arg in kwargs.items(): 1774 model_args[key] = arg 1775 1776 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1777 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1778 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1779 for model_field, model_value in model_args.items(): 1780 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1781 if ( 1782 isinstance(model_value, dict) 1783 and "type" not in model_value 1784 and model_field in component_fields 1785 ): 1786 derived_type = self._derive_component_type_from_type_hints( 1787 component_fields.get(model_field) 1788 ) 1789 if derived_type: 1790 model_value["type"] = derived_type 1791 1792 if self._is_component(model_value): 1793 model_args[model_field] = self._create_nested_component( 1794 model, 1795 model_field, 1796 model_value, 1797 config, 1798 **kwargs, 1799 ) 1800 elif isinstance(model_value, list): 1801 vals = [] 1802 for v in model_value: 1803 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1804 derived_type = self._derive_component_type_from_type_hints( 1805 component_fields.get(model_field) 1806 ) 1807 if derived_type: 1808 v["type"] = derived_type 1809 if self._is_component(v): 1810 vals.append( 1811 self._create_nested_component( 1812 model, 1813 model_field, 1814 v, 1815 config, 1816 **kwargs, 1817 ) 1818 ) 1819 else: 1820 vals.append(v) 1821 model_args[model_field] = vals 1822 1823 kwargs = { 1824 class_field: model_args[class_field] 1825 for class_field in component_fields.keys() 1826 if class_field in model_args 1827 } 1828 return custom_component_class(**kwargs) 1829 1830 @staticmethod 1831 def _get_class_from_fully_qualified_class_name( 1832 full_qualified_class_name: str, 1833 ) -> Any: 1834 """Get a class from its fully qualified name. 1835 1836 If a custom components module is needed, we assume it is already registered - probably 1837 as `source_declarative_manifest.components` or `components`. 1838 1839 Args: 1840 full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). 1841 1842 Returns: 1843 Any: The class object. 1844 1845 Raises: 1846 ValueError: If the class cannot be loaded. 1847 """ 1848 split = full_qualified_class_name.split(".") 1849 module_name_full = ".".join(split[:-1]) 1850 class_name = split[-1] 1851 1852 try: 1853 module_ref = importlib.import_module(module_name_full) 1854 except ModuleNotFoundError as e: 1855 if split[0] == "source_declarative_manifest": 1856 # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append 1857 try: 1858 import os 1859 1860 module_name_with_source_declarative_manifest = ".".join(split[1:-1]) 1861 module_ref = importlib.import_module( 1862 module_name_with_source_declarative_manifest 1863 ) 1864 except ModuleNotFoundError: 1865 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1866 else: 1867 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1868 1869 try: 1870 return getattr(module_ref, class_name) 1871 except AttributeError as e: 1872 raise ValueError( 1873 f"Could not load class `{class_name}` from module `{module_name_full}`.", 1874 ) from e 1875 1876 @staticmethod 1877 def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: 1878 interface = field_type 1879 while True: 1880 origin = get_origin(interface) 1881 if origin: 1882 # Unnest types until we reach the raw type 1883 # List[T] -> T 1884 # Optional[List[T]] -> T 1885 args = get_args(interface) 1886 interface = args[0] 1887 else: 1888 break 1889 if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface): 1890 return interface.__name__ 1891 return None 1892 1893 @staticmethod 1894 def is_builtin_type(cls: Optional[Type[Any]]) -> bool: 1895 if not cls: 1896 return False 1897 return cls.__module__ == "builtins" 1898 1899 @staticmethod 1900 def _extract_missing_parameters(error: TypeError) -> List[str]: 1901 parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error)) 1902 if parameter_search: 1903 return re.findall(r"\'(.+?)\'", parameter_search.group(1)) 1904 else: 1905 return [] 1906 1907 def _create_nested_component( 1908 self, model: Any, model_field: str, model_value: Any, config: Config, **kwargs: Any 1909 ) -> Any: 1910 type_name = model_value.get("type", None) 1911 if not type_name: 1912 # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent 1913 return model_value 1914 1915 model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None) 1916 if model_type: 1917 parsed_model = model_type.parse_obj(model_value) 1918 try: 1919 # To improve usability of the language, certain fields are shared between components. This can come in the form of 1920 # a parent component passing some of its fields to a child component or the parent extracting fields from other child 1921 # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base 1922 # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created 1923 # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that 1924 # are needed by a component and could not be shared. 1925 model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__) 1926 constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs 1927 model_parameters = model_value.get("$parameters", {}) 1928 matching_parameters = { 1929 kwarg: model_parameters[kwarg] 1930 for kwarg in constructor_kwargs 1931 if kwarg in model_parameters 1932 } 1933 matching_kwargs = { 1934 kwarg: kwargs[kwarg] for kwarg in constructor_kwargs if kwarg in kwargs 1935 } 1936 return self._create_component_from_model( 1937 model=parsed_model, config=config, **(matching_parameters | matching_kwargs) 1938 ) 1939 except TypeError as error: 1940 missing_parameters = self._extract_missing_parameters(error) 1941 if missing_parameters: 1942 raise ValueError( 1943 f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " 1944 + ", ".join( 1945 ( 1946 f"{type_name}.$parameters.{parameter}" 1947 for parameter in missing_parameters 1948 ) 1949 ) 1950 ) 1951 raise TypeError( 1952 f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" 1953 ) 1954 else: 1955 raise ValueError( 1956 f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" 1957 ) 1958 1959 @staticmethod 1960 def _is_component(model_value: Any) -> bool: 1961 return isinstance(model_value, dict) and model_value.get("type") is not None 1962 1963 def create_default_stream( 1964 self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any 1965 ) -> AbstractStream: 1966 primary_key = model.primary_key.__root__ if model.primary_key else None 1967 self._migrate_state(model, config) 1968 1969 partition_router = self._build_stream_slicer_from_partition_router( 1970 model.retriever, 1971 config, 1972 stream_name=model.name, 1973 **kwargs, 1974 ) 1975 concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config) 1976 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1977 cursor_model: DatetimeBasedCursorModel = model.incremental_sync 1978 1979 end_time_option = ( 1980 self._create_component_from_model( 1981 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 1982 ) 1983 if cursor_model.end_time_option 1984 else None 1985 ) 1986 start_time_option = ( 1987 self._create_component_from_model( 1988 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 1989 ) 1990 if cursor_model.start_time_option 1991 else None 1992 ) 1993 1994 datetime_request_options_provider = DatetimeBasedRequestOptionsProvider( 1995 start_time_option=start_time_option, 1996 end_time_option=end_time_option, 1997 partition_field_start=cursor_model.partition_field_start, 1998 partition_field_end=cursor_model.partition_field_end, 1999 config=config, 2000 parameters=model.parameters or {}, 2001 ) 2002 request_options_provider = ( 2003 datetime_request_options_provider 2004 if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor) 2005 else PerPartitionRequestOptionsProvider( 2006 partition_router, datetime_request_options_provider 2007 ) 2008 ) 2009 elif model.incremental_sync and isinstance( 2010 model.incremental_sync, IncrementingCountCursorModel 2011 ): 2012 if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor): 2013 raise ValueError( 2014 "PerPartition does not support per partition states because switching to global state is time based" 2015 ) 2016 2017 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 2018 2019 start_time_option = ( 2020 self._create_component_from_model( 2021 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2022 config, 2023 parameters=cursor_model.parameters or {}, 2024 ) 2025 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2026 else None 2027 ) 2028 2029 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 2030 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 2031 partition_field_start = "start" 2032 2033 request_options_provider = DatetimeBasedRequestOptionsProvider( 2034 start_time_option=start_time_option, 2035 partition_field_start=partition_field_start, 2036 config=config, 2037 parameters=model.parameters or {}, 2038 ) 2039 else: 2040 request_options_provider = None 2041 2042 transformations = [] 2043 if model.transformations: 2044 for transformation_model in model.transformations: 2045 transformations.append( 2046 self._create_component_from_model(model=transformation_model, config=config) 2047 ) 2048 file_uploader = None 2049 if model.file_uploader: 2050 file_uploader = self._create_component_from_model( 2051 model=model.file_uploader, config=config 2052 ) 2053 2054 stream_slicer: ConcurrentStreamSlicer = ( 2055 partition_router 2056 if isinstance(concurrent_cursor, FinalStateCursor) 2057 else concurrent_cursor 2058 ) 2059 2060 retriever = self._create_component_from_model( 2061 model=model.retriever, 2062 config=config, 2063 name=model.name, 2064 primary_key=primary_key, 2065 request_options_provider=request_options_provider, 2066 stream_slicer=stream_slicer, 2067 partition_router=partition_router, 2068 has_stop_condition_cursor=self._is_stop_condition_on_cursor(model), 2069 is_client_side_incremental_sync=self._is_client_side_filtering_enabled(model), 2070 cursor=concurrent_cursor, 2071 transformations=transformations, 2072 file_uploader=file_uploader, 2073 incremental_sync=model.incremental_sync, 2074 ) 2075 if isinstance(retriever, AsyncRetriever): 2076 stream_slicer = retriever.stream_slicer 2077 2078 schema_loader: SchemaLoader 2079 if model.schema_loader and isinstance(model.schema_loader, list): 2080 nested_schema_loaders = [ 2081 self._create_component_from_model(model=nested_schema_loader, config=config) 2082 for nested_schema_loader in model.schema_loader 2083 ] 2084 schema_loader = CompositeSchemaLoader( 2085 schema_loaders=nested_schema_loaders, parameters={} 2086 ) 2087 elif model.schema_loader: 2088 schema_loader = self._create_component_from_model( 2089 model=model.schema_loader, # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models 2090 config=config, 2091 ) 2092 else: 2093 options = model.parameters or {} 2094 if "name" not in options: 2095 options["name"] = model.name 2096 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 2097 schema_loader = CachingSchemaLoaderDecorator(schema_loader) 2098 2099 stream_name = model.name or "" 2100 return DefaultStream( 2101 partition_generator=StreamSlicerPartitionGenerator( 2102 DeclarativePartitionFactory( 2103 stream_name, 2104 schema_loader, 2105 retriever, 2106 self._message_repository, 2107 ), 2108 stream_slicer, 2109 slice_limit=self._limit_slices_fetched, 2110 ), 2111 name=stream_name, 2112 json_schema=schema_loader.get_json_schema, 2113 primary_key=get_primary_key_from_stream(primary_key), 2114 cursor_field=concurrent_cursor.cursor_field 2115 if hasattr(concurrent_cursor, "cursor_field") 2116 else CursorField( 2117 cursor_field_key="" 2118 ), # FIXME we should have the cursor field has part of the interface of cursor, 2119 logger=logging.getLogger(f"airbyte.{stream_name}"), 2120 cursor=concurrent_cursor, 2121 supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader), 2122 ) 2123 2124 def _migrate_state(self, model: DeclarativeStreamModel, config: Config) -> None: 2125 stream_name = model.name or "" 2126 stream_state = self._connector_state_manager.get_stream_state( 2127 stream_name=stream_name, namespace=None 2128 ) 2129 if model.state_migrations: 2130 state_transformations = [ 2131 self._create_component_from_model(state_migration, config, declarative_stream=model) 2132 for state_migration in model.state_migrations 2133 ] 2134 else: 2135 state_transformations = [] 2136 stream_state = self.apply_stream_state_migrations(state_transformations, stream_state) 2137 self._connector_state_manager.update_state_for_stream( 2138 stream_name=stream_name, namespace=None, value=stream_state 2139 ) 2140 2141 def _is_stop_condition_on_cursor(self, model: DeclarativeStreamModel) -> bool: 2142 return bool( 2143 model.incremental_sync 2144 and hasattr(model.incremental_sync, "is_data_feed") 2145 and model.incremental_sync.is_data_feed 2146 ) 2147 2148 def _is_client_side_filtering_enabled(self, model: DeclarativeStreamModel) -> bool: 2149 return bool( 2150 model.incremental_sync 2151 and hasattr(model.incremental_sync, "is_client_side_incremental") 2152 and model.incremental_sync.is_client_side_incremental 2153 ) 2154 2155 def _build_stream_slicer_from_partition_router( 2156 self, 2157 model: Union[ 2158 AsyncRetrieverModel, 2159 CustomRetrieverModel, 2160 SimpleRetrieverModel, 2161 ], 2162 config: Config, 2163 stream_name: Optional[str] = None, 2164 **kwargs: Any, 2165 ) -> PartitionRouter: 2166 if ( 2167 hasattr(model, "partition_router") 2168 and isinstance(model, (SimpleRetrieverModel, AsyncRetrieverModel, CustomRetrieverModel)) 2169 and model.partition_router 2170 ): 2171 stream_slicer_model = model.partition_router 2172 if isinstance(stream_slicer_model, list): 2173 return CartesianProductStreamSlicer( 2174 [ 2175 self._create_component_from_model( 2176 model=slicer, config=config, stream_name=stream_name or "" 2177 ) 2178 for slicer in stream_slicer_model 2179 ], 2180 parameters={}, 2181 ) 2182 elif isinstance(stream_slicer_model, dict): 2183 # partition router comes from CustomRetrieverModel therefore has not been parsed as a model 2184 params = stream_slicer_model.get("$parameters") 2185 if not isinstance(params, dict): 2186 params = {} 2187 stream_slicer_model["$parameters"] = params 2188 2189 if stream_name is not None: 2190 params["stream_name"] = stream_name 2191 2192 return self._create_nested_component( # type: ignore[no-any-return] # There is no guarantee that this will return a stream slicer. If not, we expect an AttributeError during the call to `stream_slices` 2193 model, 2194 "partition_router", 2195 stream_slicer_model, 2196 config, 2197 **kwargs, 2198 ) 2199 else: 2200 return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router 2201 model=stream_slicer_model, config=config, stream_name=stream_name or "" 2202 ) 2203 return SinglePartitionRouter(parameters={}) 2204 2205 def _build_concurrent_cursor( 2206 self, 2207 model: DeclarativeStreamModel, 2208 stream_slicer: Optional[PartitionRouter], 2209 config: Config, 2210 ) -> Cursor: 2211 stream_name = model.name or "" 2212 stream_state = self._connector_state_manager.get_stream_state(stream_name, None) 2213 2214 if ( 2215 model.incremental_sync 2216 and stream_slicer 2217 and not isinstance(stream_slicer, SinglePartitionRouter) 2218 ): 2219 if isinstance(model.incremental_sync, IncrementingCountCursorModel): 2220 # We don't currently support usage of partition routing and IncrementingCountCursor at the 2221 # same time because we didn't solve for design questions like what the lookback window would 2222 # be as well as global cursor fall backs. We have not seen customers that have needed both 2223 # at the same time yet and are currently punting on this until we need to solve it. 2224 raise ValueError( 2225 f"The low-code framework does not currently support usage of a PartitionRouter and an IncrementingCountCursor at the same time. Please specify only one of these options for stream {stream_name}." 2226 ) 2227 return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2228 state_manager=self._connector_state_manager, 2229 model_type=DatetimeBasedCursorModel, 2230 component_definition=model.incremental_sync.__dict__, 2231 stream_name=stream_name, 2232 stream_state=stream_state, 2233 stream_namespace=None, 2234 config=config or {}, 2235 partition_router=stream_slicer, 2236 attempt_to_create_cursor_if_not_provided=True, # FIXME can we remove that now? 2237 ) 2238 elif model.incremental_sync: 2239 if type(model.incremental_sync) == IncrementingCountCursorModel: 2240 return self.create_concurrent_cursor_from_incrementing_count_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2241 model_type=IncrementingCountCursorModel, 2242 component_definition=model.incremental_sync.__dict__, 2243 stream_name=stream_name, 2244 stream_namespace=None, 2245 stream_state=stream_state, 2246 config=config or {}, 2247 ) 2248 elif type(model.incremental_sync) == DatetimeBasedCursorModel: 2249 return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2250 model_type=type(model.incremental_sync), 2251 component_definition=model.incremental_sync.__dict__, 2252 stream_name=stream_name, 2253 stream_namespace=None, 2254 stream_state=stream_state, 2255 config=config or {}, 2256 attempt_to_create_cursor_if_not_provided=True, 2257 ) 2258 else: 2259 raise ValueError( 2260 f"Incremental sync of type {type(model.incremental_sync)} is not supported" 2261 ) 2262 return FinalStateCursor(stream_name, None, self._message_repository) 2263 2264 def create_default_error_handler( 2265 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 2266 ) -> DefaultErrorHandler: 2267 backoff_strategies = [] 2268 if model.backoff_strategies: 2269 for backoff_strategy_model in model.backoff_strategies: 2270 backoff_strategies.append( 2271 self._create_component_from_model(model=backoff_strategy_model, config=config) 2272 ) 2273 2274 response_filters = [] 2275 if model.response_filters: 2276 for response_filter_model in model.response_filters: 2277 response_filters.append( 2278 self._create_component_from_model(model=response_filter_model, config=config) 2279 ) 2280 response_filters.append( 2281 HttpResponseFilter(config=config, parameters=model.parameters or {}) 2282 ) 2283 2284 return DefaultErrorHandler( 2285 backoff_strategies=backoff_strategies, 2286 max_retries=model.max_retries, 2287 response_filters=response_filters, 2288 config=config, 2289 parameters=model.parameters or {}, 2290 ) 2291 2292 def create_default_paginator( 2293 self, 2294 model: DefaultPaginatorModel, 2295 config: Config, 2296 *, 2297 url_base: str, 2298 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2299 decoder: Optional[Decoder] = None, 2300 cursor_used_for_stop_condition: Optional[Cursor] = None, 2301 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 2302 if decoder: 2303 if self._is_supported_decoder_for_pagination(decoder): 2304 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 2305 else: 2306 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 2307 else: 2308 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 2309 page_size_option = ( 2310 self._create_component_from_model(model=model.page_size_option, config=config) 2311 if model.page_size_option 2312 else None 2313 ) 2314 page_token_option = ( 2315 self._create_component_from_model(model=model.page_token_option, config=config) 2316 if model.page_token_option 2317 else None 2318 ) 2319 pagination_strategy = self._create_component_from_model( 2320 model=model.pagination_strategy, 2321 config=config, 2322 decoder=decoder_to_use, 2323 extractor_model=extractor_model, 2324 ) 2325 if cursor_used_for_stop_condition: 2326 pagination_strategy = StopConditionPaginationStrategyDecorator( 2327 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2328 ) 2329 paginator = DefaultPaginator( 2330 decoder=decoder_to_use, 2331 page_size_option=page_size_option, 2332 page_token_option=page_token_option, 2333 pagination_strategy=pagination_strategy, 2334 url_base=url_base, 2335 config=config, 2336 parameters=model.parameters or {}, 2337 ) 2338 if self._limit_pages_fetched_per_slice: 2339 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2340 return paginator 2341 2342 def create_dpath_extractor( 2343 self, 2344 model: DpathExtractorModel, 2345 config: Config, 2346 decoder: Optional[Decoder] = None, 2347 **kwargs: Any, 2348 ) -> DpathExtractor: 2349 if decoder: 2350 decoder_to_use = decoder 2351 else: 2352 decoder_to_use = JsonDecoder(parameters={}) 2353 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2354 return DpathExtractor( 2355 decoder=decoder_to_use, 2356 field_path=model_field_path, 2357 config=config, 2358 parameters=model.parameters or {}, 2359 ) 2360 2361 @staticmethod 2362 def create_response_to_file_extractor( 2363 model: ResponseToFileExtractorModel, 2364 **kwargs: Any, 2365 ) -> ResponseToFileExtractor: 2366 return ResponseToFileExtractor(parameters=model.parameters or {}) 2367 2368 @staticmethod 2369 def create_exponential_backoff_strategy( 2370 model: ExponentialBackoffStrategyModel, config: Config 2371 ) -> ExponentialBackoffStrategy: 2372 return ExponentialBackoffStrategy( 2373 factor=model.factor or 5, parameters=model.parameters or {}, config=config 2374 ) 2375 2376 @staticmethod 2377 def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey: 2378 return GroupByKey(model.key, config=config, parameters=model.parameters or {}) 2379 2380 def create_http_requester( 2381 self, 2382 model: HttpRequesterModel, 2383 config: Config, 2384 decoder: Decoder = JsonDecoder(parameters={}), 2385 query_properties_key: Optional[str] = None, 2386 use_cache: Optional[bool] = None, 2387 *, 2388 name: str, 2389 ) -> HttpRequester: 2390 authenticator = ( 2391 self._create_component_from_model( 2392 model=model.authenticator, 2393 config=config, 2394 url_base=model.url or model.url_base, 2395 name=name, 2396 decoder=decoder, 2397 ) 2398 if model.authenticator 2399 else None 2400 ) 2401 error_handler = ( 2402 self._create_component_from_model(model=model.error_handler, config=config) 2403 if model.error_handler 2404 else DefaultErrorHandler( 2405 backoff_strategies=[], 2406 response_filters=[], 2407 config=config, 2408 parameters=model.parameters or {}, 2409 ) 2410 ) 2411 2412 api_budget = self._api_budget 2413 2414 request_options_provider = InterpolatedRequestOptionsProvider( 2415 request_body=model.request_body, 2416 request_body_data=model.request_body_data, 2417 request_body_json=model.request_body_json, 2418 request_headers=model.request_headers, 2419 request_parameters=model.request_parameters, # type: ignore # QueryProperties have been removed in `create_simple_retriever` 2420 query_properties_key=query_properties_key, 2421 config=config, 2422 parameters=model.parameters or {}, 2423 ) 2424 2425 assert model.use_cache is not None # for mypy 2426 assert model.http_method is not None # for mypy 2427 2428 should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache 2429 2430 return HttpRequester( 2431 name=name, 2432 url=model.url, 2433 url_base=model.url_base, 2434 path=model.path, 2435 authenticator=authenticator, 2436 error_handler=error_handler, 2437 api_budget=api_budget, 2438 http_method=HttpMethod[model.http_method.value], 2439 request_options_provider=request_options_provider, 2440 config=config, 2441 disable_retries=self._disable_retries, 2442 parameters=model.parameters or {}, 2443 message_repository=self._message_repository, 2444 use_cache=should_use_cache, 2445 decoder=decoder, 2446 stream_response=decoder.is_stream_response() if decoder else False, 2447 ) 2448 2449 @staticmethod 2450 def create_http_response_filter( 2451 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2452 ) -> HttpResponseFilter: 2453 if model.action: 2454 action = ResponseAction(model.action.value) 2455 else: 2456 action = None 2457 2458 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2459 2460 http_codes = ( 2461 set(model.http_codes) if model.http_codes else set() 2462 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2463 2464 return HttpResponseFilter( 2465 action=action, 2466 failure_type=failure_type, 2467 error_message=model.error_message or "", 2468 error_message_contains=model.error_message_contains or "", 2469 http_codes=http_codes, 2470 predicate=model.predicate or "", 2471 config=config, 2472 parameters=model.parameters or {}, 2473 ) 2474 2475 @staticmethod 2476 def create_inline_schema_loader( 2477 model: InlineSchemaLoaderModel, config: Config, **kwargs: Any 2478 ) -> InlineSchemaLoader: 2479 return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) 2480 2481 def create_complex_field_type( 2482 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2483 ) -> ComplexFieldType: 2484 items = ( 2485 self._create_component_from_model(model=model.items, config=config) 2486 if isinstance(model.items, ComplexFieldTypeModel) 2487 else model.items 2488 ) 2489 2490 return ComplexFieldType(field_type=model.field_type, items=items) 2491 2492 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2493 target_type = ( 2494 self._create_component_from_model(model=model.target_type, config=config) 2495 if isinstance(model.target_type, ComplexFieldTypeModel) 2496 else model.target_type 2497 ) 2498 2499 return TypesMap( 2500 target_type=target_type, 2501 current_type=model.current_type, 2502 condition=model.condition if model.condition is not None else "True", 2503 ) 2504 2505 def create_schema_type_identifier( 2506 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2507 ) -> SchemaTypeIdentifier: 2508 types_mapping = [] 2509 if model.types_mapping: 2510 types_mapping.extend( 2511 [ 2512 self._create_component_from_model(types_map, config=config) 2513 for types_map in model.types_mapping 2514 ] 2515 ) 2516 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2517 [x for x in model.schema_pointer] if model.schema_pointer else [] 2518 ) 2519 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2520 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2521 [x for x in model.type_pointer] if model.type_pointer else None 2522 ) 2523 2524 return SchemaTypeIdentifier( 2525 schema_pointer=model_schema_pointer, 2526 key_pointer=model_key_pointer, 2527 type_pointer=model_type_pointer, 2528 types_mapping=types_mapping, 2529 parameters=model.parameters or {}, 2530 ) 2531 2532 def create_dynamic_schema_loader( 2533 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2534 ) -> DynamicSchemaLoader: 2535 schema_transformations = [] 2536 if model.schema_transformations: 2537 for transformation_model in model.schema_transformations: 2538 schema_transformations.append( 2539 self._create_component_from_model(model=transformation_model, config=config) 2540 ) 2541 name = "dynamic_properties" 2542 retriever = self._create_component_from_model( 2543 model=model.retriever, 2544 config=config, 2545 name=name, 2546 primary_key=None, 2547 partition_router=self._build_stream_slicer_from_partition_router( 2548 model.retriever, config 2549 ), 2550 transformations=[], 2551 use_cache=True, 2552 log_formatter=( 2553 lambda response: format_http_message( 2554 response, 2555 f"Schema loader '{name}' request", 2556 f"Request performed in order to extract schema.", 2557 name, 2558 is_auxiliary=True, 2559 ) 2560 ), 2561 ) 2562 schema_type_identifier = self._create_component_from_model( 2563 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2564 ) 2565 schema_filter = ( 2566 self._create_component_from_model( 2567 model.schema_filter, config=config, parameters=model.parameters or {} 2568 ) 2569 if model.schema_filter is not None 2570 else None 2571 ) 2572 2573 return DynamicSchemaLoader( 2574 retriever=retriever, 2575 config=config, 2576 schema_transformations=schema_transformations, 2577 schema_filter=schema_filter, 2578 schema_type_identifier=schema_type_identifier, 2579 parameters=model.parameters or {}, 2580 ) 2581 2582 @staticmethod 2583 def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2584 return JsonDecoder(parameters={}) 2585 2586 def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2587 return CompositeRawDecoder( 2588 parser=ModelToComponentFactory._get_parser(model, config), 2589 stream_response=False if self._emit_connector_builder_messages else True, 2590 ) 2591 2592 def create_jsonl_decoder( 2593 self, model: JsonlDecoderModel, config: Config, **kwargs: Any 2594 ) -> Decoder: 2595 return CompositeRawDecoder( 2596 parser=ModelToComponentFactory._get_parser(model, config), 2597 stream_response=False if self._emit_connector_builder_messages else True, 2598 ) 2599 2600 def create_gzip_decoder( 2601 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2602 ) -> Decoder: 2603 _compressed_response_types = { 2604 "gzip", 2605 "x-gzip", 2606 "gzip, deflate", 2607 "x-gzip, deflate", 2608 "application/zip", 2609 "application/gzip", 2610 "application/x-gzip", 2611 "application/x-zip-compressed", 2612 } 2613 2614 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2615 2616 if self._emit_connector_builder_messages: 2617 # This is very surprising but if the response is not streamed, 2618 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2619 # which uses urllib3 directly and does not uncompress the data. 2620 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2621 2622 return CompositeRawDecoder.by_headers( 2623 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2624 stream_response=True, 2625 fallback_parser=gzip_parser.inner_parser, 2626 ) 2627 2628 @staticmethod 2629 def create_iterable_decoder( 2630 model: IterableDecoderModel, config: Config, **kwargs: Any 2631 ) -> IterableDecoder: 2632 return IterableDecoder(parameters={}) 2633 2634 @staticmethod 2635 def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder: 2636 return XmlDecoder(parameters={}) 2637 2638 def create_zipfile_decoder( 2639 self, model: ZipfileDecoderModel, config: Config, **kwargs: Any 2640 ) -> ZipfileDecoder: 2641 return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config)) 2642 2643 @staticmethod 2644 def _get_parser(model: BaseModel, config: Config) -> Parser: 2645 if isinstance(model, JsonDecoderModel): 2646 # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases 2647 return JsonParser() 2648 elif isinstance(model, JsonlDecoderModel): 2649 return JsonLineParser() 2650 elif isinstance(model, CsvDecoderModel): 2651 return CsvParser( 2652 encoding=model.encoding, 2653 delimiter=model.delimiter, 2654 set_values_to_none=model.set_values_to_none, 2655 ) 2656 elif isinstance(model, GzipDecoderModel): 2657 return GzipParser( 2658 inner_parser=ModelToComponentFactory._get_parser(model.decoder, config) 2659 ) 2660 elif isinstance( 2661 model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel) 2662 ): 2663 raise ValueError(f"Decoder type {model} does not have parser associated to it") 2664 2665 raise ValueError(f"Unknown decoder type {model}") 2666 2667 @staticmethod 2668 def create_json_file_schema_loader( 2669 model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any 2670 ) -> JsonFileSchemaLoader: 2671 return JsonFileSchemaLoader( 2672 file_path=model.file_path or "", config=config, parameters=model.parameters or {} 2673 ) 2674 2675 def create_jwt_authenticator( 2676 self, model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2677 ) -> JwtAuthenticator: 2678 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2679 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2680 request_option = ( 2681 self._create_component_from_model(model.request_option, config) 2682 if model.request_option 2683 else None 2684 ) 2685 return JwtAuthenticator( 2686 config=config, 2687 parameters=model.parameters or {}, 2688 algorithm=JwtAlgorithm(model.algorithm.value), 2689 secret_key=model.secret_key, 2690 base64_encode_secret_key=model.base64_encode_secret_key, 2691 token_duration=model.token_duration, 2692 header_prefix=model.header_prefix, 2693 kid=jwt_headers.kid, 2694 typ=jwt_headers.typ, 2695 cty=jwt_headers.cty, 2696 iss=jwt_payload.iss, 2697 sub=jwt_payload.sub, 2698 aud=jwt_payload.aud, 2699 additional_jwt_headers=model.additional_jwt_headers, 2700 additional_jwt_payload=model.additional_jwt_payload, 2701 passphrase=model.passphrase, 2702 request_option=request_option, 2703 ) 2704 2705 def create_list_partition_router( 2706 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2707 ) -> ListPartitionRouter: 2708 request_option = ( 2709 self._create_component_from_model(model.request_option, config) 2710 if model.request_option 2711 else None 2712 ) 2713 return ListPartitionRouter( 2714 cursor_field=model.cursor_field, 2715 request_option=request_option, 2716 values=model.values, 2717 config=config, 2718 parameters=model.parameters or {}, 2719 ) 2720 2721 @staticmethod 2722 def create_min_max_datetime( 2723 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2724 ) -> MinMaxDatetime: 2725 return MinMaxDatetime( 2726 datetime=model.datetime, 2727 datetime_format=model.datetime_format or "", 2728 max_datetime=model.max_datetime or "", 2729 min_datetime=model.min_datetime or "", 2730 parameters=model.parameters or {}, 2731 ) 2732 2733 @staticmethod 2734 def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: 2735 return NoAuth(parameters=model.parameters or {}) 2736 2737 @staticmethod 2738 def create_no_pagination( 2739 model: NoPaginationModel, config: Config, **kwargs: Any 2740 ) -> NoPagination: 2741 return NoPagination(parameters={}) 2742 2743 def create_oauth_authenticator( 2744 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2745 ) -> DeclarativeOauth2Authenticator: 2746 profile_assertion = ( 2747 self._create_component_from_model(model.profile_assertion, config=config) 2748 if model.profile_assertion 2749 else None 2750 ) 2751 2752 refresh_token_error_status_codes, refresh_token_error_key, refresh_token_error_values = ( 2753 self._get_refresh_token_error_information(model) 2754 ) 2755 if model.refresh_token_updater: 2756 # ignore type error because fixing it would have a lot of dependencies, revisit later 2757 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2758 config, 2759 InterpolatedString.create( 2760 model.token_refresh_endpoint, # type: ignore 2761 parameters=model.parameters or {}, 2762 ).eval(config), 2763 access_token_name=InterpolatedString.create( 2764 model.access_token_name or "access_token", parameters=model.parameters or {} 2765 ).eval(config), 2766 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2767 expires_in_name=InterpolatedString.create( 2768 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2769 ).eval(config), 2770 client_id_name=InterpolatedString.create( 2771 model.client_id_name or "client_id", parameters=model.parameters or {} 2772 ).eval(config), 2773 client_id=InterpolatedString.create( 2774 model.client_id, parameters=model.parameters or {} 2775 ).eval(config) 2776 if model.client_id 2777 else model.client_id, 2778 client_secret_name=InterpolatedString.create( 2779 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2780 ).eval(config), 2781 client_secret=InterpolatedString.create( 2782 model.client_secret, parameters=model.parameters or {} 2783 ).eval(config) 2784 if model.client_secret 2785 else model.client_secret, 2786 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2787 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2788 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2789 grant_type_name=InterpolatedString.create( 2790 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2791 ).eval(config), 2792 grant_type=InterpolatedString.create( 2793 model.grant_type or "refresh_token", parameters=model.parameters or {} 2794 ).eval(config), 2795 refresh_request_body=InterpolatedMapping( 2796 model.refresh_request_body or {}, parameters=model.parameters or {} 2797 ).eval(config), 2798 refresh_request_headers=InterpolatedMapping( 2799 model.refresh_request_headers or {}, parameters=model.parameters or {} 2800 ).eval(config), 2801 scopes=model.scopes, 2802 token_expiry_date_format=model.token_expiry_date_format, 2803 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2804 message_repository=self._message_repository, 2805 refresh_token_error_status_codes=refresh_token_error_status_codes, 2806 refresh_token_error_key=refresh_token_error_key, 2807 refresh_token_error_values=refresh_token_error_values, 2808 ) 2809 # ignore type error because fixing it would have a lot of dependencies, revisit later 2810 return DeclarativeOauth2Authenticator( # type: ignore 2811 access_token_name=model.access_token_name or "access_token", 2812 access_token_value=model.access_token_value, 2813 client_id_name=model.client_id_name or "client_id", 2814 client_id=model.client_id, 2815 client_secret_name=model.client_secret_name or "client_secret", 2816 client_secret=model.client_secret, 2817 expires_in_name=model.expires_in_name or "expires_in", 2818 grant_type_name=model.grant_type_name or "grant_type", 2819 grant_type=model.grant_type or "refresh_token", 2820 refresh_request_body=model.refresh_request_body, 2821 refresh_request_headers=model.refresh_request_headers, 2822 refresh_token_name=model.refresh_token_name or "refresh_token", 2823 refresh_token=model.refresh_token, 2824 scopes=model.scopes, 2825 token_expiry_date=model.token_expiry_date, 2826 token_expiry_date_format=model.token_expiry_date_format, 2827 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2828 token_refresh_endpoint=model.token_refresh_endpoint, 2829 config=config, 2830 parameters=model.parameters or {}, 2831 message_repository=self._message_repository, 2832 profile_assertion=profile_assertion, 2833 use_profile_assertion=model.use_profile_assertion, 2834 refresh_token_error_status_codes=refresh_token_error_status_codes, 2835 refresh_token_error_key=refresh_token_error_key, 2836 refresh_token_error_values=refresh_token_error_values, 2837 ) 2838 2839 @staticmethod 2840 def _get_refresh_token_error_information( 2841 model: OAuthAuthenticatorModel, 2842 ) -> Tuple[Tuple[int, ...], str, Tuple[str, ...]]: 2843 """ 2844 In a previous version of the CDK, the auth error as config_error was only done if a refresh token updater was 2845 defined. As a transition, we added those fields on the OAuthAuthenticatorModel. This method ensures that the 2846 information is defined only once and return the right fields. 2847 """ 2848 refresh_token_updater = model.refresh_token_updater 2849 is_defined_on_refresh_token_updated = refresh_token_updater and ( 2850 refresh_token_updater.refresh_token_error_status_codes 2851 or refresh_token_updater.refresh_token_error_key 2852 or refresh_token_updater.refresh_token_error_values 2853 ) 2854 is_defined_on_oauth_authenticator = ( 2855 model.refresh_token_error_status_codes 2856 or model.refresh_token_error_key 2857 or model.refresh_token_error_values 2858 ) 2859 if is_defined_on_refresh_token_updated and is_defined_on_oauth_authenticator: 2860 raise ValueError( 2861 "refresh_token_error should either be defined on the OAuthAuthenticatorModel or the RefreshTokenUpdaterModel, not both" 2862 ) 2863 2864 if is_defined_on_refresh_token_updated: 2865 not_optional_refresh_token_updater: RefreshTokenUpdaterModel = refresh_token_updater # type: ignore # we know from the condition that this is not None 2866 return ( 2867 tuple(not_optional_refresh_token_updater.refresh_token_error_status_codes) 2868 if not_optional_refresh_token_updater.refresh_token_error_status_codes 2869 else (), 2870 not_optional_refresh_token_updater.refresh_token_error_key or "", 2871 tuple(not_optional_refresh_token_updater.refresh_token_error_values) 2872 if not_optional_refresh_token_updater.refresh_token_error_values 2873 else (), 2874 ) 2875 elif is_defined_on_oauth_authenticator: 2876 return ( 2877 tuple(model.refresh_token_error_status_codes) 2878 if model.refresh_token_error_status_codes 2879 else (), 2880 model.refresh_token_error_key or "", 2881 tuple(model.refresh_token_error_values) if model.refresh_token_error_values else (), 2882 ) 2883 2884 # returning default values we think cover most cases 2885 return (400,), "error", ("invalid_grant", "invalid_permissions") 2886 2887 def create_offset_increment( 2888 self, 2889 model: OffsetIncrementModel, 2890 config: Config, 2891 decoder: Decoder, 2892 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2893 **kwargs: Any, 2894 ) -> OffsetIncrement: 2895 if isinstance(decoder, PaginationDecoderDecorator): 2896 inner_decoder = decoder.decoder 2897 else: 2898 inner_decoder = decoder 2899 decoder = PaginationDecoderDecorator(decoder=decoder) 2900 2901 if self._is_supported_decoder_for_pagination(inner_decoder): 2902 decoder_to_use = decoder 2903 else: 2904 raise ValueError( 2905 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2906 ) 2907 2908 # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever) 2909 # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the 2910 # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing 2911 # behavior by having two separate extractors with identical behavior since they use the same extractor model. 2912 # When we have more time to investigate we can look into reusing the same component. 2913 extractor = ( 2914 self._create_component_from_model( 2915 model=extractor_model, config=config, decoder=decoder_to_use 2916 ) 2917 if extractor_model 2918 else None 2919 ) 2920 2921 return OffsetIncrement( 2922 page_size=model.page_size, 2923 config=config, 2924 decoder=decoder_to_use, 2925 extractor=extractor, 2926 inject_on_first_request=model.inject_on_first_request or False, 2927 parameters=model.parameters or {}, 2928 ) 2929 2930 @staticmethod 2931 def create_page_increment( 2932 model: PageIncrementModel, config: Config, **kwargs: Any 2933 ) -> PageIncrement: 2934 return PageIncrement( 2935 page_size=model.page_size, 2936 config=config, 2937 start_from_page=model.start_from_page or 0, 2938 inject_on_first_request=model.inject_on_first_request or False, 2939 parameters=model.parameters or {}, 2940 ) 2941 2942 def create_parent_stream_config( 2943 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 2944 ) -> ParentStreamConfig: 2945 declarative_stream = self._create_component_from_model( 2946 model.stream, 2947 config=config, 2948 is_parent=True, 2949 **kwargs, 2950 ) 2951 request_option = ( 2952 self._create_component_from_model(model.request_option, config=config) 2953 if model.request_option 2954 else None 2955 ) 2956 2957 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2958 raise ValueError( 2959 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2960 ) 2961 2962 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2963 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2964 ) 2965 2966 return ParentStreamConfig( 2967 parent_key=model.parent_key, 2968 request_option=request_option, 2969 stream=declarative_stream, 2970 partition_field=model.partition_field, 2971 config=config, 2972 incremental_dependency=model.incremental_dependency or False, 2973 parameters=model.parameters or {}, 2974 extra_fields=model.extra_fields, 2975 lazy_read_pointer=model_lazy_read_pointer, 2976 ) 2977 2978 def create_properties_from_endpoint( 2979 self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any 2980 ) -> PropertiesFromEndpoint: 2981 retriever = self._create_component_from_model( 2982 model=model.retriever, 2983 config=config, 2984 name="dynamic_properties", 2985 primary_key=None, 2986 stream_slicer=None, 2987 transformations=[], 2988 use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different 2989 ) 2990 return PropertiesFromEndpoint( 2991 property_field_path=model.property_field_path, 2992 retriever=retriever, 2993 config=config, 2994 parameters=model.parameters or {}, 2995 ) 2996 2997 def create_property_chunking( 2998 self, model: PropertyChunkingModel, config: Config, **kwargs: Any 2999 ) -> PropertyChunking: 3000 record_merge_strategy = ( 3001 self._create_component_from_model( 3002 model=model.record_merge_strategy, config=config, **kwargs 3003 ) 3004 if model.record_merge_strategy 3005 else None 3006 ) 3007 3008 property_limit_type: PropertyLimitType 3009 match model.property_limit_type: 3010 case PropertyLimitTypeModel.property_count: 3011 property_limit_type = PropertyLimitType.property_count 3012 case PropertyLimitTypeModel.characters: 3013 property_limit_type = PropertyLimitType.characters 3014 case _: 3015 raise ValueError(f"Invalid PropertyLimitType {property_limit_type}") 3016 3017 return PropertyChunking( 3018 property_limit_type=property_limit_type, 3019 property_limit=model.property_limit, 3020 record_merge_strategy=record_merge_strategy, 3021 config=config, 3022 parameters=model.parameters or {}, 3023 ) 3024 3025 def create_query_properties( 3026 self, model: QueryPropertiesModel, config: Config, *, stream_name: str, **kwargs: Any 3027 ) -> QueryProperties: 3028 if isinstance(model.property_list, list): 3029 property_list = model.property_list 3030 else: 3031 property_list = self._create_component_from_model( 3032 model=model.property_list, config=config, **kwargs 3033 ) 3034 3035 property_chunking = ( 3036 self._create_component_from_model( 3037 model=model.property_chunking, config=config, **kwargs 3038 ) 3039 if model.property_chunking 3040 else None 3041 ) 3042 3043 property_selector = ( 3044 self._create_component_from_model( 3045 model=model.property_selector, config=config, stream_name=stream_name, **kwargs 3046 ) 3047 if model.property_selector 3048 else None 3049 ) 3050 3051 return QueryProperties( 3052 property_list=property_list, 3053 always_include_properties=model.always_include_properties, 3054 property_chunking=property_chunking, 3055 property_selector=property_selector, 3056 config=config, 3057 parameters=model.parameters or {}, 3058 ) 3059 3060 def create_json_schema_property_selector( 3061 self, 3062 model: JsonSchemaPropertySelectorModel, 3063 config: Config, 3064 *, 3065 stream_name: str, 3066 **kwargs: Any, 3067 ) -> JsonSchemaPropertySelector: 3068 configured_stream = self._stream_name_to_configured_stream.get(stream_name) 3069 3070 transformations = [] 3071 if model.transformations: 3072 for transformation_model in model.transformations: 3073 transformations.append( 3074 self._create_component_from_model(model=transformation_model, config=config) 3075 ) 3076 3077 return JsonSchemaPropertySelector( 3078 configured_stream=configured_stream, 3079 properties_transformations=transformations, 3080 config=config, 3081 parameters=model.parameters or {}, 3082 ) 3083 3084 @staticmethod 3085 def create_record_filter( 3086 model: RecordFilterModel, config: Config, **kwargs: Any 3087 ) -> RecordFilter: 3088 return RecordFilter( 3089 condition=model.condition or "", config=config, parameters=model.parameters or {} 3090 ) 3091 3092 @staticmethod 3093 def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: 3094 return RequestPath(parameters={}) 3095 3096 @staticmethod 3097 def create_request_option( 3098 model: RequestOptionModel, config: Config, **kwargs: Any 3099 ) -> RequestOption: 3100 inject_into = RequestOptionType(model.inject_into.value) 3101 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 3102 [ 3103 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 3104 for segment in model.field_path 3105 ] 3106 if model.field_path 3107 else None 3108 ) 3109 field_name = ( 3110 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 3111 if model.field_name 3112 else None 3113 ) 3114 return RequestOption( 3115 field_name=field_name, 3116 field_path=field_path, 3117 inject_into=inject_into, 3118 parameters=kwargs.get("parameters", {}), 3119 ) 3120 3121 def create_record_selector( 3122 self, 3123 model: RecordSelectorModel, 3124 config: Config, 3125 *, 3126 name: str, 3127 transformations: List[RecordTransformation] | None = None, 3128 decoder: Decoder | None = None, 3129 client_side_incremental_sync_cursor: Optional[Cursor] = None, 3130 file_uploader: Optional[DefaultFileUploader] = None, 3131 **kwargs: Any, 3132 ) -> RecordSelector: 3133 extractor = self._create_component_from_model( 3134 model=model.extractor, decoder=decoder, config=config 3135 ) 3136 record_filter = ( 3137 self._create_component_from_model(model.record_filter, config=config) 3138 if model.record_filter 3139 else None 3140 ) 3141 3142 transform_before_filtering = ( 3143 False if model.transform_before_filtering is None else model.transform_before_filtering 3144 ) 3145 if client_side_incremental_sync_cursor: 3146 record_filter = ClientSideIncrementalRecordFilterDecorator( 3147 config=config, 3148 parameters=model.parameters, 3149 condition=model.record_filter.condition 3150 if (model.record_filter and hasattr(model.record_filter, "condition")) 3151 else None, 3152 cursor=client_side_incremental_sync_cursor, 3153 ) 3154 transform_before_filtering = ( 3155 True 3156 if model.transform_before_filtering is None 3157 else model.transform_before_filtering 3158 ) 3159 3160 if model.schema_normalization is None: 3161 # default to no schema normalization if not set 3162 model.schema_normalization = SchemaNormalizationModel.None_ 3163 3164 schema_normalization = ( 3165 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 3166 if isinstance(model.schema_normalization, SchemaNormalizationModel) 3167 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 3168 ) 3169 3170 return RecordSelector( 3171 extractor=extractor, 3172 name=name, 3173 config=config, 3174 record_filter=record_filter, 3175 transformations=transformations or [], 3176 file_uploader=file_uploader, 3177 schema_normalization=schema_normalization, 3178 parameters=model.parameters or {}, 3179 transform_before_filtering=transform_before_filtering, 3180 ) 3181 3182 @staticmethod 3183 def create_remove_fields( 3184 model: RemoveFieldsModel, config: Config, **kwargs: Any 3185 ) -> RemoveFields: 3186 return RemoveFields( 3187 field_pointers=model.field_pointers, condition=model.condition or "", parameters={} 3188 ) 3189 3190 def create_selective_authenticator( 3191 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 3192 ) -> DeclarativeAuthenticator: 3193 authenticators = { 3194 name: self._create_component_from_model(model=auth, config=config) 3195 for name, auth in model.authenticators.items() 3196 } 3197 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 3198 return SelectiveAuthenticator( # type: ignore[abstract] 3199 config=config, 3200 authenticators=authenticators, 3201 authenticator_selection_path=model.authenticator_selection_path, 3202 **kwargs, 3203 ) 3204 3205 @staticmethod 3206 def create_legacy_session_token_authenticator( 3207 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 3208 ) -> LegacySessionTokenAuthenticator: 3209 return LegacySessionTokenAuthenticator( 3210 api_url=url_base, 3211 header=model.header, 3212 login_url=model.login_url, 3213 password=model.password or "", 3214 session_token=model.session_token or "", 3215 session_token_response_key=model.session_token_response_key or "", 3216 username=model.username or "", 3217 validate_session_url=model.validate_session_url, 3218 config=config, 3219 parameters=model.parameters or {}, 3220 ) 3221 3222 def create_simple_retriever( 3223 self, 3224 model: SimpleRetrieverModel, 3225 config: Config, 3226 *, 3227 name: str, 3228 primary_key: Optional[Union[str, List[str], List[List[str]]]], 3229 request_options_provider: Optional[RequestOptionsProvider] = None, 3230 cursor: Optional[Cursor] = None, 3231 has_stop_condition_cursor: bool = False, 3232 is_client_side_incremental_sync: bool = False, 3233 transformations: List[RecordTransformation], 3234 file_uploader: Optional[DefaultFileUploader] = None, 3235 incremental_sync: Optional[ 3236 Union[IncrementingCountCursorModel, DatetimeBasedCursorModel] 3237 ] = None, 3238 use_cache: Optional[bool] = None, 3239 log_formatter: Optional[Callable[[Response], Any]] = None, 3240 partition_router: Optional[PartitionRouter] = None, 3241 **kwargs: Any, 3242 ) -> SimpleRetriever: 3243 def _get_url(req: Requester) -> str: 3244 """ 3245 Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever. 3246 This is needed because the URL is not set until the requester is created. 3247 """ 3248 3249 _url: str = ( 3250 model.requester.url 3251 if hasattr(model.requester, "url") and model.requester.url is not None 3252 else req.get_url(stream_state=None, stream_slice=None, next_page_token=None) 3253 ) 3254 _url_base: str = ( 3255 model.requester.url_base 3256 if hasattr(model.requester, "url_base") and model.requester.url_base is not None 3257 else req.get_url_base(stream_state=None, stream_slice=None, next_page_token=None) 3258 ) 3259 3260 return _url or _url_base 3261 3262 if cursor is None: 3263 cursor = FinalStateCursor(name, None, self._message_repository) 3264 3265 decoder = ( 3266 self._create_component_from_model(model=model.decoder, config=config) 3267 if model.decoder 3268 else JsonDecoder(parameters={}) 3269 ) 3270 record_selector = self._create_component_from_model( 3271 model=model.record_selector, 3272 name=name, 3273 config=config, 3274 decoder=decoder, 3275 transformations=transformations, 3276 client_side_incremental_sync_cursor=cursor if is_client_side_incremental_sync else None, 3277 file_uploader=file_uploader, 3278 ) 3279 3280 query_properties: Optional[QueryProperties] = None 3281 query_properties_key: Optional[str] = None 3282 self._ensure_query_properties_to_model(model.requester) 3283 if self._has_query_properties_in_request_parameters(model.requester): 3284 # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple 3285 # places instead of default to request_parameters which isn't clearly documented 3286 if ( 3287 hasattr(model.requester, "fetch_properties_from_endpoint") 3288 and model.requester.fetch_properties_from_endpoint 3289 ): 3290 raise ValueError( 3291 f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters" 3292 ) 3293 3294 query_properties_definitions = [] 3295 for key, request_parameter in model.requester.request_parameters.items(): # type: ignore # request_parameters is already validated to be a Mapping using _has_query_properties_in_request_parameters() 3296 if isinstance(request_parameter, QueryPropertiesModel): 3297 query_properties_key = key 3298 query_properties_definitions.append(request_parameter) 3299 3300 if len(query_properties_definitions) > 1: 3301 raise ValueError( 3302 f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages" 3303 ) 3304 3305 if len(query_properties_definitions) == 1: 3306 query_properties = self._create_component_from_model( 3307 model=query_properties_definitions[0], stream_name=name, config=config 3308 ) 3309 3310 # Removes QueryProperties components from the interpolated mappings because it has been designed 3311 # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly 3312 # instead of through jinja interpolation 3313 if hasattr(model.requester, "request_parameters") and isinstance( 3314 model.requester.request_parameters, Mapping 3315 ): 3316 model.requester.request_parameters = self._remove_query_properties( 3317 model.requester.request_parameters 3318 ) 3319 elif ( 3320 hasattr(model.requester, "fetch_properties_from_endpoint") 3321 and model.requester.fetch_properties_from_endpoint 3322 ): 3323 # todo: Deprecate this condition once dependent connectors migrate to query_properties 3324 query_properties_definition = QueryPropertiesModel( 3325 type="QueryProperties", 3326 property_list=model.requester.fetch_properties_from_endpoint, 3327 always_include_properties=None, 3328 property_chunking=None, 3329 ) # type: ignore # $parameters has a default value 3330 3331 query_properties = self.create_query_properties( 3332 model=query_properties_definition, 3333 stream_name=name, 3334 config=config, 3335 ) 3336 elif hasattr(model.requester, "query_properties") and model.requester.query_properties: 3337 query_properties = self.create_query_properties( 3338 model=model.requester.query_properties, 3339 stream_name=name, 3340 config=config, 3341 ) 3342 3343 requester = self._create_component_from_model( 3344 model=model.requester, 3345 decoder=decoder, 3346 name=name, 3347 query_properties_key=query_properties_key, 3348 use_cache=use_cache, 3349 config=config, 3350 ) 3351 3352 if not request_options_provider: 3353 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 3354 if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance( 3355 partition_router, PartitionRouter 3356 ): 3357 request_options_provider = partition_router 3358 3359 paginator = ( 3360 self._create_component_from_model( 3361 model=model.paginator, 3362 config=config, 3363 url_base=_get_url(requester), 3364 extractor_model=model.record_selector.extractor, 3365 decoder=decoder, 3366 cursor_used_for_stop_condition=cursor if has_stop_condition_cursor else None, 3367 ) 3368 if model.paginator 3369 else NoPagination(parameters={}) 3370 ) 3371 3372 ignore_stream_slicer_parameters_on_paginated_requests = ( 3373 model.ignore_stream_slicer_parameters_on_paginated_requests or False 3374 ) 3375 3376 if ( 3377 model.partition_router 3378 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 3379 and not bool(self._connector_state_manager.get_stream_state(name, None)) 3380 and any( 3381 parent_stream_config.lazy_read_pointer 3382 for parent_stream_config in model.partition_router.parent_stream_configs 3383 ) 3384 ): 3385 if incremental_sync: 3386 if incremental_sync.type != "DatetimeBasedCursor": 3387 raise ValueError( 3388 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 3389 ) 3390 3391 elif incremental_sync.step or incremental_sync.cursor_granularity: 3392 raise ValueError( 3393 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 3394 ) 3395 3396 if model.decoder and model.decoder.type != "JsonDecoder": 3397 raise ValueError( 3398 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 3399 ) 3400 3401 return LazySimpleRetriever( 3402 name=name, 3403 paginator=paginator, 3404 primary_key=primary_key, 3405 requester=requester, 3406 record_selector=record_selector, 3407 stream_slicer=_NO_STREAM_SLICING, 3408 request_option_provider=request_options_provider, 3409 config=config, 3410 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3411 parameters=model.parameters or {}, 3412 ) 3413 3414 if ( 3415 model.record_selector.record_filter 3416 and model.pagination_reset 3417 and model.pagination_reset.limits 3418 ): 3419 raise ValueError("PaginationResetLimits are not supported while having record filter.") 3420 3421 return SimpleRetriever( 3422 name=name, 3423 paginator=paginator, 3424 primary_key=primary_key, 3425 requester=requester, 3426 record_selector=record_selector, 3427 stream_slicer=_NO_STREAM_SLICING, 3428 request_option_provider=request_options_provider, 3429 config=config, 3430 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3431 additional_query_properties=query_properties, 3432 log_formatter=self._get_log_formatter(log_formatter, name), 3433 pagination_tracker_factory=self._create_pagination_tracker_factory( 3434 model.pagination_reset, cursor 3435 ), 3436 parameters=model.parameters or {}, 3437 ) 3438 3439 def _create_pagination_tracker_factory( 3440 self, model: Optional[PaginationResetModel], cursor: Cursor 3441 ) -> Callable[[], PaginationTracker]: 3442 if model is None: 3443 return lambda: PaginationTracker() 3444 3445 # Until we figure out a way to use any cursor for PaginationTracker, we will have to have this cursor selector logic 3446 cursor_factory: Callable[[], Optional[ConcurrentCursor]] = lambda: None 3447 if model.action == PaginationResetActionModel.RESET: 3448 # in that case, we will let cursor_factory to return None even if the stream has a cursor 3449 pass 3450 elif model.action == PaginationResetActionModel.SPLIT_USING_CURSOR: 3451 if isinstance(cursor, ConcurrentCursor): 3452 cursor_factory = lambda: cursor.copy_without_state() # type: ignore # the if condition validates that it is a ConcurrentCursor 3453 elif isinstance(cursor, ConcurrentPerPartitionCursor): 3454 cursor_factory = lambda: cursor._cursor_factory.create( # type: ignore # if this becomes a problem, we would need to extract the cursor_factory instantiation logic and make it accessible here 3455 {}, datetime.timedelta(0) 3456 ) 3457 elif not isinstance(cursor, FinalStateCursor): 3458 LOGGER.warning( 3459 "Unknown cursor for PaginationTracker. Pagination resets might not work properly" 3460 ) 3461 else: 3462 raise ValueError(f"Unknown PaginationReset action: {model.action}") 3463 3464 limit = model.limits.number_of_records if model and model.limits else None 3465 return lambda: PaginationTracker(cursor_factory(), limit) 3466 3467 def _get_log_formatter( 3468 self, log_formatter: Callable[[Response], Any] | None, name: str 3469 ) -> Callable[[Response], Any] | None: 3470 if self._should_limit_slices_fetched(): 3471 return ( 3472 ( 3473 lambda response: format_http_message( 3474 response, 3475 f"Stream '{name}' request", 3476 f"Request performed in order to extract records for stream '{name}'", 3477 name, 3478 ) 3479 ) 3480 if not log_formatter 3481 else log_formatter 3482 ) 3483 return None 3484 3485 def _should_limit_slices_fetched(self) -> bool: 3486 """ 3487 Returns True if the number of slices fetched should be limited, False otherwise. 3488 This is used to limit the number of slices fetched during tests. 3489 """ 3490 return bool(self._limit_slices_fetched or self._emit_connector_builder_messages) 3491 3492 @staticmethod 3493 def _has_query_properties_in_request_parameters( 3494 requester: Union[HttpRequesterModel, CustomRequesterModel], 3495 ) -> bool: 3496 if not hasattr(requester, "request_parameters"): 3497 return False 3498 request_parameters = requester.request_parameters 3499 if request_parameters and isinstance(request_parameters, Mapping): 3500 for request_parameter in request_parameters.values(): 3501 if isinstance(request_parameter, QueryPropertiesModel): 3502 return True 3503 return False 3504 3505 @staticmethod 3506 def _remove_query_properties( 3507 request_parameters: Mapping[str, Union[str, QueryPropertiesModel]], 3508 ) -> Mapping[str, str]: 3509 return { 3510 parameter_field: request_parameter 3511 for parameter_field, request_parameter in request_parameters.items() 3512 if not isinstance(request_parameter, QueryPropertiesModel) 3513 } 3514 3515 def create_state_delegating_stream( 3516 self, 3517 model: StateDelegatingStreamModel, 3518 config: Config, 3519 has_parent_state: Optional[bool] = None, 3520 **kwargs: Any, 3521 ) -> DefaultStream: 3522 if ( 3523 model.full_refresh_stream.name != model.name 3524 or model.name != model.incremental_stream.name 3525 ): 3526 raise ValueError( 3527 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 3528 ) 3529 3530 stream_model = self._get_state_delegating_stream_model( 3531 False if has_parent_state is None else has_parent_state, model 3532 ) 3533 3534 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel 3535 3536 def _get_state_delegating_stream_model( 3537 self, has_parent_state: bool, model: StateDelegatingStreamModel 3538 ) -> DeclarativeStreamModel: 3539 return ( 3540 model.incremental_stream 3541 if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state 3542 else model.full_refresh_stream 3543 ) 3544 3545 def _create_async_job_status_mapping( 3546 self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any 3547 ) -> Mapping[str, AsyncJobStatus]: 3548 api_status_to_cdk_status = {} 3549 for cdk_status, api_statuses in model.dict().items(): 3550 if cdk_status == "type": 3551 # This is an element of the dict because of the typing of the CDK but it is not a CDK status 3552 continue 3553 3554 for status in api_statuses: 3555 if status in api_status_to_cdk_status: 3556 raise ValueError( 3557 f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once" 3558 ) 3559 api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status) 3560 return api_status_to_cdk_status 3561 3562 def _get_async_job_status(self, status: str) -> AsyncJobStatus: 3563 match status: 3564 case "running": 3565 return AsyncJobStatus.RUNNING 3566 case "completed": 3567 return AsyncJobStatus.COMPLETED 3568 case "failed": 3569 return AsyncJobStatus.FAILED 3570 case "timeout": 3571 return AsyncJobStatus.TIMED_OUT 3572 case _: 3573 raise ValueError(f"Unsupported CDK status {status}") 3574 3575 def create_async_retriever( 3576 self, 3577 model: AsyncRetrieverModel, 3578 config: Config, 3579 *, 3580 name: str, 3581 primary_key: Optional[ 3582 Union[str, List[str], List[List[str]]] 3583 ], # this seems to be needed to match create_simple_retriever 3584 stream_slicer: Optional[StreamSlicer], 3585 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3586 transformations: List[RecordTransformation], 3587 **kwargs: Any, 3588 ) -> AsyncRetriever: 3589 if model.download_target_requester and not model.download_target_extractor: 3590 raise ValueError( 3591 f"`download_target_extractor` required if using a `download_target_requester`" 3592 ) 3593 3594 def _get_download_retriever( 3595 requester: Requester, extractor: RecordExtractor, _decoder: Decoder 3596 ) -> SimpleRetriever: 3597 # We create a record selector for the download retriever 3598 # with no schema normalization and no transformations, neither record filter 3599 # as all this occurs in the record_selector of the AsyncRetriever 3600 record_selector = RecordSelector( 3601 extractor=extractor, 3602 name=name, 3603 record_filter=None, 3604 transformations=[], 3605 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 3606 config=config, 3607 parameters={}, 3608 ) 3609 paginator = ( 3610 self._create_component_from_model( 3611 model=model.download_paginator, 3612 decoder=_decoder, 3613 config=config, 3614 url_base="", 3615 ) 3616 if model.download_paginator 3617 else NoPagination(parameters={}) 3618 ) 3619 3620 return SimpleRetriever( 3621 requester=requester, 3622 record_selector=record_selector, 3623 primary_key=None, 3624 name=name, 3625 paginator=paginator, 3626 config=config, 3627 parameters={}, 3628 log_formatter=self._get_log_formatter(None, name), 3629 ) 3630 3631 def _get_job_timeout() -> datetime.timedelta: 3632 user_defined_timeout: Optional[int] = ( 3633 int( 3634 InterpolatedString.create( 3635 str(model.polling_job_timeout), 3636 parameters={}, 3637 ).eval(config) 3638 ) 3639 if model.polling_job_timeout 3640 else None 3641 ) 3642 3643 # check for user defined timeout during the test read or 15 minutes 3644 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 3645 # default value for non-connector builder is 60 minutes. 3646 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 3647 3648 return ( 3649 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 3650 ) 3651 3652 decoder = ( 3653 self._create_component_from_model(model=model.decoder, config=config) 3654 if model.decoder 3655 else JsonDecoder(parameters={}) 3656 ) 3657 record_selector = self._create_component_from_model( 3658 model=model.record_selector, 3659 config=config, 3660 decoder=decoder, 3661 name=name, 3662 transformations=transformations, 3663 client_side_incremental_sync=client_side_incremental_sync, 3664 ) 3665 3666 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 3667 if self._should_limit_slices_fetched(): 3668 stream_slicer = cast( 3669 StreamSlicer, 3670 StreamSlicerTestReadDecorator( 3671 wrapped_slicer=stream_slicer, 3672 maximum_number_of_slices=self._limit_slices_fetched or 5, 3673 ), 3674 ) 3675 3676 creation_requester = self._create_component_from_model( 3677 model=model.creation_requester, 3678 decoder=decoder, 3679 config=config, 3680 name=f"job creation - {name}", 3681 ) 3682 polling_requester = self._create_component_from_model( 3683 model=model.polling_requester, 3684 decoder=decoder, 3685 config=config, 3686 name=f"job polling - {name}", 3687 ) 3688 job_download_components_name = f"job download - {name}" 3689 download_decoder = ( 3690 self._create_component_from_model(model=model.download_decoder, config=config) 3691 if model.download_decoder 3692 else JsonDecoder(parameters={}) 3693 ) 3694 download_extractor = ( 3695 self._create_component_from_model( 3696 model=model.download_extractor, 3697 config=config, 3698 decoder=download_decoder, 3699 parameters=model.parameters, 3700 ) 3701 if model.download_extractor 3702 else DpathExtractor( 3703 [], 3704 config=config, 3705 decoder=download_decoder, 3706 parameters=model.parameters or {}, 3707 ) 3708 ) 3709 download_requester = self._create_component_from_model( 3710 model=model.download_requester, 3711 decoder=download_decoder, 3712 config=config, 3713 name=job_download_components_name, 3714 ) 3715 download_retriever = _get_download_retriever( 3716 download_requester, download_extractor, download_decoder 3717 ) 3718 abort_requester = ( 3719 self._create_component_from_model( 3720 model=model.abort_requester, 3721 decoder=decoder, 3722 config=config, 3723 name=f"job abort - {name}", 3724 ) 3725 if model.abort_requester 3726 else None 3727 ) 3728 delete_requester = ( 3729 self._create_component_from_model( 3730 model=model.delete_requester, 3731 decoder=decoder, 3732 config=config, 3733 name=f"job delete - {name}", 3734 ) 3735 if model.delete_requester 3736 else None 3737 ) 3738 download_target_requester = ( 3739 self._create_component_from_model( 3740 model=model.download_target_requester, 3741 decoder=decoder, 3742 config=config, 3743 name=f"job extract_url - {name}", 3744 ) 3745 if model.download_target_requester 3746 else None 3747 ) 3748 status_extractor = self._create_component_from_model( 3749 model=model.status_extractor, decoder=decoder, config=config, name=name 3750 ) 3751 download_target_extractor = ( 3752 self._create_component_from_model( 3753 model=model.download_target_extractor, 3754 decoder=decoder, 3755 config=config, 3756 name=name, 3757 ) 3758 if model.download_target_extractor 3759 else None 3760 ) 3761 3762 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3763 creation_requester=creation_requester, 3764 polling_requester=polling_requester, 3765 download_retriever=download_retriever, 3766 download_target_requester=download_target_requester, 3767 abort_requester=abort_requester, 3768 delete_requester=delete_requester, 3769 status_extractor=status_extractor, 3770 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3771 download_target_extractor=download_target_extractor, 3772 job_timeout=_get_job_timeout(), 3773 ) 3774 3775 async_job_partition_router = AsyncJobPartitionRouter( 3776 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3777 job_repository, 3778 stream_slices, 3779 self._job_tracker, 3780 self._message_repository, 3781 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3782 has_bulk_parent=False, 3783 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3784 # `None` == default retry is set to 3 attempts, under the hood. 3785 job_max_retry=1 if self._emit_connector_builder_messages else None, 3786 ), 3787 stream_slicer=stream_slicer, 3788 config=config, 3789 parameters=model.parameters or {}, 3790 ) 3791 3792 return AsyncRetriever( 3793 record_selector=record_selector, 3794 stream_slicer=async_job_partition_router, 3795 config=config, 3796 parameters=model.parameters or {}, 3797 ) 3798 3799 def create_spec(self, model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3800 config_migrations = [ 3801 self._create_component_from_model(migration, config) 3802 for migration in ( 3803 model.config_normalization_rules.config_migrations 3804 if ( 3805 model.config_normalization_rules 3806 and model.config_normalization_rules.config_migrations 3807 ) 3808 else [] 3809 ) 3810 ] 3811 config_transformations = [ 3812 self._create_component_from_model(transformation, config) 3813 for transformation in ( 3814 model.config_normalization_rules.transformations 3815 if ( 3816 model.config_normalization_rules 3817 and model.config_normalization_rules.transformations 3818 ) 3819 else [] 3820 ) 3821 ] 3822 config_validations = [ 3823 self._create_component_from_model(validation, config) 3824 for validation in ( 3825 model.config_normalization_rules.validations 3826 if ( 3827 model.config_normalization_rules 3828 and model.config_normalization_rules.validations 3829 ) 3830 else [] 3831 ) 3832 ] 3833 3834 return Spec( 3835 connection_specification=model.connection_specification, 3836 documentation_url=model.documentation_url, 3837 advanced_auth=model.advanced_auth, 3838 parameters={}, 3839 config_migrations=config_migrations, 3840 config_transformations=config_transformations, 3841 config_validations=config_validations, 3842 ) 3843 3844 def create_substream_partition_router( 3845 self, 3846 model: SubstreamPartitionRouterModel, 3847 config: Config, 3848 *, 3849 stream_name: str, 3850 **kwargs: Any, 3851 ) -> SubstreamPartitionRouter: 3852 parent_stream_configs = [] 3853 if model.parent_stream_configs: 3854 parent_stream_configs.extend( 3855 [ 3856 self.create_parent_stream_config_with_substream_wrapper( 3857 model=parent_stream_config, config=config, stream_name=stream_name, **kwargs 3858 ) 3859 for parent_stream_config in model.parent_stream_configs 3860 ] 3861 ) 3862 3863 return SubstreamPartitionRouter( 3864 parent_stream_configs=parent_stream_configs, 3865 parameters=model.parameters or {}, 3866 config=config, 3867 ) 3868 3869 def create_parent_stream_config_with_substream_wrapper( 3870 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 3871 ) -> Any: 3872 # getting the parent state 3873 child_state = self._connector_state_manager.get_stream_state(stream_name, None) 3874 3875 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3876 has_parent_state = bool( 3877 self._connector_state_manager.get_stream_state(stream_name, None) 3878 if model.incremental_dependency 3879 else False 3880 ) 3881 connector_state_manager = self._instantiate_parent_stream_state_manager( 3882 child_state, config, model, has_parent_state 3883 ) 3884 3885 substream_factory = ModelToComponentFactory( 3886 connector_state_manager=connector_state_manager, 3887 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3888 limit_slices_fetched=self._limit_slices_fetched, 3889 emit_connector_builder_messages=self._emit_connector_builder_messages, 3890 disable_retries=self._disable_retries, 3891 disable_cache=self._disable_cache, 3892 message_repository=StateFilteringMessageRepository( 3893 LogAppenderMessageRepositoryDecorator( 3894 { 3895 "airbyte_cdk": {"stream": {"is_substream": True}}, 3896 "http": {"is_auxiliary": True}, 3897 }, 3898 self._message_repository, 3899 self._evaluate_log_level(self._emit_connector_builder_messages), 3900 ), 3901 ), 3902 api_budget=self._api_budget, 3903 ) 3904 3905 return substream_factory.create_parent_stream_config( 3906 model=model, config=config, stream_name=stream_name, **kwargs 3907 ) 3908 3909 def _instantiate_parent_stream_state_manager( 3910 self, 3911 child_state: MutableMapping[str, Any], 3912 config: Config, 3913 model: ParentStreamConfigModel, 3914 has_parent_state: bool, 3915 ) -> ConnectorStateManager: 3916 """ 3917 With DefaultStream, the state needs to be provided during __init__ of the cursor as opposed to the 3918 `set_initial_state` flow that existed for the declarative cursors. This state is taken from 3919 self._connector_state_manager.get_stream_state (`self` being a newly created ModelToComponentFactory to account 3920 for the MessageRepository being different). So we need to pass a ConnectorStateManager to the 3921 ModelToComponentFactory that has the parent states. This method populates this if there is a child state and if 3922 incremental_dependency is set. 3923 """ 3924 if model.incremental_dependency and child_state: 3925 parent_stream_name = model.stream.name or "" 3926 parent_state = ConcurrentPerPartitionCursor.get_parent_state( 3927 child_state, parent_stream_name 3928 ) 3929 3930 if not parent_state: 3931 # there are two migration cases: state value from child stream or from global state 3932 parent_state = ConcurrentPerPartitionCursor.get_global_state( 3933 child_state, parent_stream_name 3934 ) 3935 3936 if not parent_state and not isinstance(parent_state, dict): 3937 cursor_values = child_state.values() 3938 if cursor_values and len(cursor_values) == 1: 3939 # We assume the child state is a pair `{<cursor_field>: <cursor_value>}` and we will use the 3940 # cursor value as a parent state. 3941 incremental_sync_model: Union[ 3942 DatetimeBasedCursorModel, 3943 IncrementingCountCursorModel, 3944 ] = ( 3945 model.stream.incremental_sync # type: ignore # if we are there, it is because there is incremental_dependency and therefore there is an incremental_sync on the parent stream 3946 if isinstance(model.stream, DeclarativeStreamModel) 3947 else self._get_state_delegating_stream_model( 3948 has_parent_state, model.stream 3949 ).incremental_sync 3950 ) 3951 cursor_field = InterpolatedString.create( 3952 incremental_sync_model.cursor_field, 3953 parameters=incremental_sync_model.parameters or {}, 3954 ).eval(config) 3955 parent_state = AirbyteStateMessage( 3956 type=AirbyteStateType.STREAM, 3957 stream=AirbyteStreamState( 3958 stream_descriptor=StreamDescriptor( 3959 name=parent_stream_name, namespace=None 3960 ), 3961 stream_state=AirbyteStateBlob( 3962 {cursor_field: list(cursor_values)[0]} 3963 ), 3964 ), 3965 ) 3966 return ConnectorStateManager([parent_state] if parent_state else []) 3967 3968 return ConnectorStateManager([]) 3969 3970 @staticmethod 3971 def create_wait_time_from_header( 3972 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 3973 ) -> WaitTimeFromHeaderBackoffStrategy: 3974 return WaitTimeFromHeaderBackoffStrategy( 3975 header=model.header, 3976 parameters=model.parameters or {}, 3977 config=config, 3978 regex=model.regex, 3979 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 3980 if model.max_waiting_time_in_seconds is not None 3981 else None, 3982 ) 3983 3984 @staticmethod 3985 def create_wait_until_time_from_header( 3986 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 3987 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 3988 return WaitUntilTimeFromHeaderBackoffStrategy( 3989 header=model.header, 3990 parameters=model.parameters or {}, 3991 config=config, 3992 min_wait=model.min_wait, 3993 regex=model.regex, 3994 ) 3995 3996 def get_message_repository(self) -> MessageRepository: 3997 return self._message_repository 3998 3999 def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: 4000 return Level.DEBUG if emit_connector_builder_messages else Level.INFO 4001 4002 @staticmethod 4003 def create_components_mapping_definition( 4004 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 4005 ) -> ComponentMappingDefinition: 4006 interpolated_value = InterpolatedString.create( 4007 model.value, parameters=model.parameters or {} 4008 ) 4009 field_path = [ 4010 InterpolatedString.create(path, parameters=model.parameters or {}) 4011 for path in model.field_path 4012 ] 4013 return ComponentMappingDefinition( 4014 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 4015 value=interpolated_value, 4016 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 4017 create_or_update=model.create_or_update, 4018 condition=model.condition, 4019 parameters=model.parameters or {}, 4020 ) 4021 4022 def create_http_components_resolver( 4023 self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None 4024 ) -> Any: 4025 retriever = self._create_component_from_model( 4026 model=model.retriever, 4027 config=config, 4028 name=f"{stream_name if stream_name else '__http_components_resolver'}", 4029 primary_key=None, 4030 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 4031 transformations=[], 4032 ) 4033 4034 components_mapping = [] 4035 for component_mapping_definition_model in model.components_mapping: 4036 if component_mapping_definition_model.condition: 4037 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 4038 components_mapping.append( 4039 self._create_component_from_model( 4040 model=component_mapping_definition_model, 4041 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4042 component_mapping_definition_model.value_type 4043 ), 4044 config=config, 4045 ) 4046 ) 4047 4048 return HttpComponentsResolver( 4049 retriever=retriever, 4050 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 4051 config=config, 4052 components_mapping=components_mapping, 4053 parameters=model.parameters or {}, 4054 ) 4055 4056 @staticmethod 4057 def create_stream_config( 4058 model: StreamConfigModel, config: Config, **kwargs: Any 4059 ) -> StreamConfig: 4060 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 4061 [x for x in model.configs_pointer] if model.configs_pointer else [] 4062 ) 4063 4064 return StreamConfig( 4065 configs_pointer=model_configs_pointer, 4066 default_values=model.default_values, 4067 parameters=model.parameters or {}, 4068 ) 4069 4070 def create_config_components_resolver( 4071 self, 4072 model: ConfigComponentsResolverModel, 4073 config: Config, 4074 ) -> Any: 4075 model_stream_configs = ( 4076 model.stream_config if isinstance(model.stream_config, list) else [model.stream_config] 4077 ) 4078 4079 stream_configs = [ 4080 self._create_component_from_model( 4081 stream_config, config=config, parameters=model.parameters or {} 4082 ) 4083 for stream_config in model_stream_configs 4084 ] 4085 4086 components_mapping = [ 4087 self._create_component_from_model( 4088 model=components_mapping_definition_model, 4089 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4090 components_mapping_definition_model.value_type 4091 ), 4092 config=config, 4093 parameters=model.parameters, 4094 ) 4095 for components_mapping_definition_model in model.components_mapping 4096 ] 4097 4098 return ConfigComponentsResolver( 4099 stream_configs=stream_configs, 4100 config=config, 4101 components_mapping=components_mapping, 4102 parameters=model.parameters or {}, 4103 ) 4104 4105 def create_parametrized_components_resolver( 4106 self, 4107 model: ParametrizedComponentsResolverModel, 4108 config: Config, 4109 ) -> ParametrizedComponentsResolver: 4110 stream_parameters = StreamParametersDefinition( 4111 list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream 4112 ) 4113 4114 components_mapping = [] 4115 for components_mapping_definition_model in model.components_mapping: 4116 if components_mapping_definition_model.condition: 4117 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 4118 components_mapping.append( 4119 self._create_component_from_model( 4120 model=components_mapping_definition_model, 4121 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4122 components_mapping_definition_model.value_type 4123 ), 4124 config=config, 4125 ) 4126 ) 4127 return ParametrizedComponentsResolver( 4128 stream_parameters=stream_parameters, 4129 config=config, 4130 components_mapping=components_mapping, 4131 parameters=model.parameters or {}, 4132 ) 4133 4134 _UNSUPPORTED_DECODER_ERROR = ( 4135 "Specified decoder of {decoder_type} is not supported for pagination." 4136 "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead." 4137 "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`." 4138 ) 4139 4140 def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool: 4141 if isinstance(decoder, (JsonDecoder, XmlDecoder)): 4142 return True 4143 elif isinstance(decoder, CompositeRawDecoder): 4144 return self._is_supported_parser_for_pagination(decoder.parser) 4145 else: 4146 return False 4147 4148 def _is_supported_parser_for_pagination(self, parser: Parser) -> bool: 4149 if isinstance(parser, JsonParser): 4150 return True 4151 elif isinstance(parser, GzipParser): 4152 return isinstance(parser.inner_parser, JsonParser) 4153 else: 4154 return False 4155 4156 def create_http_api_budget( 4157 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 4158 ) -> HttpAPIBudget: 4159 policies = [ 4160 self._create_component_from_model(model=policy, config=config) 4161 for policy in model.policies 4162 ] 4163 4164 return HttpAPIBudget( 4165 policies=policies, 4166 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 4167 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 4168 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 4169 ) 4170 4171 def create_fixed_window_call_rate_policy( 4172 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 4173 ) -> FixedWindowCallRatePolicy: 4174 matchers = [ 4175 self._create_component_from_model(model=matcher, config=config) 4176 for matcher in model.matchers 4177 ] 4178 4179 # Set the initial reset timestamp to 10 days from now. 4180 # This value will be updated by the first request. 4181 return FixedWindowCallRatePolicy( 4182 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 4183 period=parse_duration(model.period), 4184 call_limit=model.call_limit, 4185 matchers=matchers, 4186 ) 4187 4188 def create_file_uploader( 4189 self, model: FileUploaderModel, config: Config, **kwargs: Any 4190 ) -> FileUploader: 4191 name = "File Uploader" 4192 requester = self._create_component_from_model( 4193 model=model.requester, 4194 config=config, 4195 name=name, 4196 **kwargs, 4197 ) 4198 download_target_extractor = self._create_component_from_model( 4199 model=model.download_target_extractor, 4200 config=config, 4201 name=name, 4202 **kwargs, 4203 ) 4204 emit_connector_builder_messages = self._emit_connector_builder_messages 4205 file_uploader = DefaultFileUploader( 4206 requester=requester, 4207 download_target_extractor=download_target_extractor, 4208 config=config, 4209 file_writer=NoopFileWriter() 4210 if emit_connector_builder_messages 4211 else LocalFileSystemFileWriter(), 4212 parameters=model.parameters or {}, 4213 filename_extractor=model.filename_extractor if model.filename_extractor else None, 4214 ) 4215 4216 return ( 4217 ConnectorBuilderFileUploader(file_uploader) 4218 if emit_connector_builder_messages 4219 else file_uploader 4220 ) 4221 4222 def create_moving_window_call_rate_policy( 4223 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 4224 ) -> MovingWindowCallRatePolicy: 4225 rates = [ 4226 self._create_component_from_model(model=rate, config=config) for rate in model.rates 4227 ] 4228 matchers = [ 4229 self._create_component_from_model(model=matcher, config=config) 4230 for matcher in model.matchers 4231 ] 4232 return MovingWindowCallRatePolicy( 4233 rates=rates, 4234 matchers=matchers, 4235 ) 4236 4237 def create_unlimited_call_rate_policy( 4238 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 4239 ) -> UnlimitedCallRatePolicy: 4240 matchers = [ 4241 self._create_component_from_model(model=matcher, config=config) 4242 for matcher in model.matchers 4243 ] 4244 4245 return UnlimitedCallRatePolicy( 4246 matchers=matchers, 4247 ) 4248 4249 def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate: 4250 interpolated_limit = InterpolatedString.create(str(model.limit), parameters={}) 4251 return Rate( 4252 limit=int(interpolated_limit.eval(config=config)), 4253 interval=parse_duration(model.interval), 4254 ) 4255 4256 def create_http_request_matcher( 4257 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 4258 ) -> HttpRequestRegexMatcher: 4259 return HttpRequestRegexMatcher( 4260 method=model.method, 4261 url_base=model.url_base, 4262 url_path_pattern=model.url_path_pattern, 4263 params=model.params, 4264 headers=model.headers, 4265 ) 4266 4267 def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None: 4268 self._api_budget = self.create_component( 4269 model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config 4270 ) 4271 4272 def create_grouping_partition_router( 4273 self, 4274 model: GroupingPartitionRouterModel, 4275 config: Config, 4276 *, 4277 stream_name: str, 4278 **kwargs: Any, 4279 ) -> GroupingPartitionRouter: 4280 underlying_router = self._create_component_from_model( 4281 model=model.underlying_partition_router, 4282 config=config, 4283 stream_name=stream_name, 4284 **kwargs, 4285 ) 4286 if model.group_size < 1: 4287 raise ValueError(f"Group size must be greater than 0, got {model.group_size}") 4288 4289 # Request options in underlying partition routers are not supported for GroupingPartitionRouter 4290 # because they are specific to individual partitions and cannot be aggregated or handled 4291 # when grouping, potentially leading to incorrect API calls. Any request customization 4292 # should be managed at the stream level through the requester's configuration. 4293 if isinstance(underlying_router, SubstreamPartitionRouter): 4294 if any( 4295 parent_config.request_option 4296 for parent_config in underlying_router.parent_stream_configs 4297 ): 4298 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4299 4300 if isinstance(underlying_router, ListPartitionRouter): 4301 if underlying_router.request_option: 4302 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4303 4304 return GroupingPartitionRouter( 4305 group_size=model.group_size, 4306 underlying_partition_router=underlying_router, 4307 deduplicate=model.deduplicate if model.deduplicate is not None else True, 4308 config=config, 4309 ) 4310 4311 def _ensure_query_properties_to_model( 4312 self, requester: Union[HttpRequesterModel, CustomRequesterModel] 4313 ) -> None: 4314 """ 4315 For some reason, it seems like CustomRequesterModel request_parameters stays as dictionaries which means that 4316 the other conditions relying on it being QueryPropertiesModel instead of a dict fail. Here, we migrate them to 4317 proper model. 4318 """ 4319 if not hasattr(requester, "request_parameters"): 4320 return 4321 4322 request_parameters = requester.request_parameters 4323 if request_parameters and isinstance(request_parameters, Dict): 4324 for request_parameter_key in request_parameters.keys(): 4325 request_parameter = request_parameters[request_parameter_key] 4326 if ( 4327 isinstance(request_parameter, Dict) 4328 and request_parameter.get("type") == "QueryProperties" 4329 ): 4330 request_parameters[request_parameter_key] = QueryPropertiesModel.parse_obj( 4331 request_parameter 4332 ) 4333 4334 def _get_catalog_defined_cursor_field( 4335 self, stream_name: str, allow_catalog_defined_cursor_field: bool 4336 ) -> Optional[CursorField]: 4337 if not allow_catalog_defined_cursor_field: 4338 return None 4339 4340 configured_stream = self._stream_name_to_configured_stream.get(stream_name) 4341 4342 # Depending on the operation is being performed, there may not be a configured stream yet. In this 4343 # case we return None which will then use the default cursor field defined on the cursor model 4344 if not configured_stream or not configured_stream.cursor_field: 4345 return None 4346 elif len(configured_stream.cursor_field) > 1: 4347 raise ValueError( 4348 f"The `{stream_name}` stream does not support nested cursor_field. Please specify only a single cursor_field for the stream in the configured catalog." 4349 ) 4350 else: 4351 return CursorField( 4352 cursor_field_key=configured_stream.cursor_field[0], 4353 supports_catalog_defined_cursor_field=allow_catalog_defined_cursor_field, 4354 )
674 def __init__( 675 self, 676 limit_pages_fetched_per_slice: Optional[int] = None, 677 limit_slices_fetched: Optional[int] = None, 678 emit_connector_builder_messages: bool = False, 679 disable_retries: bool = False, 680 disable_cache: bool = False, 681 message_repository: Optional[MessageRepository] = None, 682 connector_state_manager: Optional[ConnectorStateManager] = None, 683 max_concurrent_async_job_count: Optional[int] = None, 684 configured_catalog: Optional[ConfiguredAirbyteCatalog] = None, 685 api_budget: Optional[APIBudget] = None, 686 ): 687 self._init_mappings() 688 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 689 self._limit_slices_fetched = limit_slices_fetched 690 self._emit_connector_builder_messages = emit_connector_builder_messages 691 self._disable_retries = disable_retries 692 self._disable_cache = disable_cache 693 self._message_repository = message_repository or InMemoryMessageRepository( 694 self._evaluate_log_level(emit_connector_builder_messages) 695 ) 696 self._stream_name_to_configured_stream = self._create_stream_name_to_configured_stream( 697 configured_catalog 698 ) 699 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 700 self._api_budget: Optional[Union[APIBudget]] = api_budget 701 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 702 # placeholder for deprecation warnings 703 self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = []
826 def create_component( 827 self, 828 model_type: Type[BaseModel], 829 component_definition: ComponentDefinition, 830 config: Config, 831 **kwargs: Any, 832 ) -> Any: 833 """ 834 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 835 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 836 creating declarative components from that model. 837 838 :param model_type: The type of declarative component that is being initialized 839 :param component_definition: The mapping that represents a declarative component 840 :param config: The connector config that is provided by the customer 841 :return: The declarative component to be used at runtime 842 """ 843 844 component_type = component_definition.get("type") 845 if component_definition.get("type") != model_type.__name__: 846 raise ValueError( 847 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 848 ) 849 850 declarative_component_model = model_type.parse_obj(component_definition) 851 852 if not isinstance(declarative_component_model, model_type): 853 raise ValueError( 854 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 855 ) 856 857 return self._create_component_from_model( 858 model=declarative_component_model, config=config, **kwargs 859 )
Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating creating declarative components from that model.
Parameters
- model_type: The type of declarative component that is being initialized
- component_definition: The mapping that represents a declarative component
- config: The connector config that is provided by the customer
Returns
The declarative component to be used at runtime
876 def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]: 877 """ 878 Returns the deprecation warnings that were collected during the creation of components. 879 """ 880 return self._collected_deprecation_logs
Returns the deprecation warnings that were collected during the creation of components.
897 def create_config_migration( 898 self, model: ConfigMigrationModel, config: Config 899 ) -> ConfigMigration: 900 transformations: List[ConfigTransformation] = [ 901 self._create_component_from_model(transformation, config) 902 for transformation in model.transformations 903 ] 904 905 return ConfigMigration( 906 description=model.description, 907 transformations=transformations, 908 )
910 def create_config_add_fields( 911 self, model: ConfigAddFieldsModel, config: Config, **kwargs: Any 912 ) -> ConfigAddFields: 913 fields = [self._create_component_from_model(field, config) for field in model.fields] 914 return ConfigAddFields( 915 fields=fields, 916 condition=model.condition or "", 917 )
966 @staticmethod 967 def create_added_field_definition( 968 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 969 ) -> AddedFieldDefinition: 970 interpolated_value = InterpolatedString.create( 971 model.value, parameters=model.parameters or {} 972 ) 973 return AddedFieldDefinition( 974 path=model.path, 975 value=interpolated_value, 976 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 977 parameters=model.parameters or {}, 978 )
980 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 981 added_field_definitions = [ 982 self._create_component_from_model( 983 model=added_field_definition_model, 984 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 985 added_field_definition_model.value_type 986 ), 987 config=config, 988 ) 989 for added_field_definition_model in model.fields 990 ] 991 return AddFields( 992 fields=added_field_definitions, 993 condition=model.condition or "", 994 parameters=model.parameters or {}, 995 )
1021 def create_dpath_flatten_fields( 1022 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 1023 ) -> DpathFlattenFields: 1024 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 1025 key_transformation = ( 1026 KeyTransformation( 1027 config=config, 1028 prefix=model.key_transformation.prefix, 1029 suffix=model.key_transformation.suffix, 1030 parameters=model.parameters or {}, 1031 ) 1032 if model.key_transformation is not None 1033 else None 1034 ) 1035 return DpathFlattenFields( 1036 config=config, 1037 field_path=model_field_path, 1038 delete_origin_value=model.delete_origin_value 1039 if model.delete_origin_value is not None 1040 else False, 1041 replace_record=model.replace_record if model.replace_record is not None else False, 1042 key_transformation=key_transformation, 1043 parameters=model.parameters or {}, 1044 )
1058 def create_api_key_authenticator( 1059 self, 1060 model: ApiKeyAuthenticatorModel, 1061 config: Config, 1062 token_provider: Optional[TokenProvider] = None, 1063 **kwargs: Any, 1064 ) -> ApiKeyAuthenticator: 1065 if model.inject_into is None and model.header is None: 1066 raise ValueError( 1067 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 1068 ) 1069 1070 if model.inject_into is not None and model.header is not None: 1071 raise ValueError( 1072 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 1073 ) 1074 1075 if token_provider is not None and model.api_token != "": 1076 raise ValueError( 1077 "If token_provider is set, api_token is ignored and has to be set to empty string." 1078 ) 1079 1080 request_option = ( 1081 self._create_component_from_model( 1082 model.inject_into, config, parameters=model.parameters or {} 1083 ) 1084 if model.inject_into 1085 else RequestOption( 1086 inject_into=RequestOptionType.header, 1087 field_name=model.header or "", 1088 parameters=model.parameters or {}, 1089 ) 1090 ) 1091 1092 return ApiKeyAuthenticator( 1093 token_provider=( 1094 token_provider 1095 if token_provider is not None 1096 else InterpolatedStringTokenProvider( 1097 api_token=model.api_token or "", 1098 config=config, 1099 parameters=model.parameters or {}, 1100 ) 1101 ), 1102 request_option=request_option, 1103 config=config, 1104 parameters=model.parameters or {}, 1105 )
1107 def create_legacy_to_per_partition_state_migration( 1108 self, 1109 model: LegacyToPerPartitionStateMigrationModel, 1110 config: Mapping[str, Any], 1111 declarative_stream: DeclarativeStreamModel, 1112 ) -> LegacyToPerPartitionStateMigration: 1113 retriever = declarative_stream.retriever 1114 if not isinstance(retriever, (SimpleRetrieverModel, AsyncRetrieverModel)): 1115 raise ValueError( 1116 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever or AsyncRetriever. Got {type(retriever)}" 1117 ) 1118 partition_router = retriever.partition_router 1119 if not isinstance( 1120 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 1121 ): 1122 raise ValueError( 1123 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 1124 ) 1125 if not hasattr(partition_router, "parent_stream_configs"): 1126 raise ValueError( 1127 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 1128 ) 1129 1130 if not hasattr(declarative_stream, "incremental_sync"): 1131 raise ValueError( 1132 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 1133 ) 1134 1135 return LegacyToPerPartitionStateMigration( 1136 partition_router, # type: ignore # was already checked above 1137 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 1138 config, 1139 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 1140 )
1142 def create_session_token_authenticator( 1143 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 1144 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 1145 decoder = ( 1146 self._create_component_from_model(model=model.decoder, config=config) 1147 if model.decoder 1148 else JsonDecoder(parameters={}) 1149 ) 1150 login_requester = self._create_component_from_model( 1151 model=model.login_requester, 1152 config=config, 1153 name=f"{name}_login_requester", 1154 decoder=decoder, 1155 ) 1156 token_provider = SessionTokenProvider( 1157 login_requester=login_requester, 1158 session_token_path=model.session_token_path, 1159 expiration_duration=parse_duration(model.expiration_duration) 1160 if model.expiration_duration 1161 else None, 1162 parameters=model.parameters or {}, 1163 message_repository=self._message_repository, 1164 decoder=decoder, 1165 ) 1166 if model.request_authentication.type == "Bearer": 1167 return ModelToComponentFactory.create_bearer_authenticator( 1168 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 1169 config, 1170 token_provider=token_provider, 1171 ) 1172 else: 1173 return self.create_api_key_authenticator( 1174 ApiKeyAuthenticatorModel( 1175 type="ApiKeyAuthenticator", 1176 api_token="", 1177 inject_into=model.request_authentication.inject_into, 1178 ), # type: ignore # $parameters and headers default to None 1179 config=config, 1180 token_provider=token_provider, 1181 )
1183 @staticmethod 1184 def create_basic_http_authenticator( 1185 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 1186 ) -> BasicHttpAuthenticator: 1187 return BasicHttpAuthenticator( 1188 password=model.password or "", 1189 username=model.username, 1190 config=config, 1191 parameters=model.parameters or {}, 1192 )
1194 @staticmethod 1195 def create_bearer_authenticator( 1196 model: BearerAuthenticatorModel, 1197 config: Config, 1198 token_provider: Optional[TokenProvider] = None, 1199 **kwargs: Any, 1200 ) -> BearerAuthenticator: 1201 if token_provider is not None and model.api_token != "": 1202 raise ValueError( 1203 "If token_provider is set, api_token is ignored and has to be set to empty string." 1204 ) 1205 return BearerAuthenticator( 1206 token_provider=( 1207 token_provider 1208 if token_provider is not None 1209 else InterpolatedStringTokenProvider( 1210 api_token=model.api_token or "", 1211 config=config, 1212 parameters=model.parameters or {}, 1213 ) 1214 ), 1215 config=config, 1216 parameters=model.parameters or {}, 1217 )
1219 @staticmethod 1220 def create_dynamic_stream_check_config( 1221 model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any 1222 ) -> DynamicStreamCheckConfig: 1223 return DynamicStreamCheckConfig( 1224 dynamic_stream_name=model.dynamic_stream_name, 1225 stream_count=model.stream_count or 0, 1226 )
1228 def create_check_stream( 1229 self, model: CheckStreamModel, config: Config, **kwargs: Any 1230 ) -> CheckStream: 1231 if model.dynamic_streams_check_configs is None and model.stream_names is None: 1232 raise ValueError( 1233 "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream" 1234 ) 1235 1236 dynamic_streams_check_configs = ( 1237 [ 1238 self._create_component_from_model(model=dynamic_stream_check_config, config=config) 1239 for dynamic_stream_check_config in model.dynamic_streams_check_configs 1240 ] 1241 if model.dynamic_streams_check_configs 1242 else [] 1243 ) 1244 1245 return CheckStream( 1246 stream_names=model.stream_names or [], 1247 dynamic_streams_check_configs=dynamic_streams_check_configs, 1248 parameters={}, 1249 )
1251 @staticmethod 1252 def create_check_dynamic_stream( 1253 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 1254 ) -> CheckDynamicStream: 1255 assert model.use_check_availability is not None # for mypy 1256 1257 use_check_availability = model.use_check_availability 1258 1259 return CheckDynamicStream( 1260 stream_count=model.stream_count, 1261 use_check_availability=use_check_availability, 1262 parameters={}, 1263 )
1265 def create_composite_error_handler( 1266 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 1267 ) -> CompositeErrorHandler: 1268 error_handlers = [ 1269 self._create_component_from_model(model=error_handler_model, config=config) 1270 for error_handler_model in model.error_handlers 1271 ] 1272 return CompositeErrorHandler( 1273 error_handlers=error_handlers, parameters=model.parameters or {} 1274 )
1276 @staticmethod 1277 def create_concurrency_level( 1278 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 1279 ) -> ConcurrencyLevel: 1280 return ConcurrencyLevel( 1281 default_concurrency=model.default_concurrency, 1282 max_concurrency=model.max_concurrency, 1283 config=config, 1284 parameters={}, 1285 )
1287 @staticmethod 1288 def apply_stream_state_migrations( 1289 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 1290 ) -> MutableMapping[str, Any]: 1291 if stream_state_migrations: 1292 for state_migration in stream_state_migrations: 1293 if state_migration.should_migrate(stream_state): 1294 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 1295 stream_state = dict(state_migration.migrate(stream_state)) 1296 return stream_state
1298 def create_concurrent_cursor_from_datetime_based_cursor( 1299 self, 1300 model_type: Type[BaseModel], 1301 component_definition: ComponentDefinition, 1302 stream_name: str, 1303 stream_namespace: Optional[str], 1304 stream_state: MutableMapping[str, Any], 1305 config: Config, 1306 message_repository: Optional[MessageRepository] = None, 1307 runtime_lookback_window: Optional[datetime.timedelta] = None, 1308 **kwargs: Any, 1309 ) -> ConcurrentCursor: 1310 component_type = component_definition.get("type") 1311 if component_definition.get("type") != model_type.__name__: 1312 raise ValueError( 1313 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1314 ) 1315 1316 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1317 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1318 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1319 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1320 if "$parameters" not in component_definition and "parameters" in component_definition: 1321 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1322 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1323 1324 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1325 raise ValueError( 1326 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1327 ) 1328 1329 model_parameters = datetime_based_cursor_model.parameters or {} 1330 1331 cursor_field = self._get_catalog_defined_cursor_field( 1332 stream_name=stream_name, 1333 allow_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1334 or False, 1335 ) 1336 1337 if not cursor_field: 1338 interpolated_cursor_field = InterpolatedString.create( 1339 datetime_based_cursor_model.cursor_field, 1340 parameters=model_parameters, 1341 ) 1342 cursor_field = CursorField( 1343 cursor_field_key=interpolated_cursor_field.eval(config=config), 1344 supports_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1345 or False, 1346 ) 1347 1348 interpolated_partition_field_start = InterpolatedString.create( 1349 datetime_based_cursor_model.partition_field_start or "start_time", 1350 parameters=model_parameters, 1351 ) 1352 interpolated_partition_field_end = InterpolatedString.create( 1353 datetime_based_cursor_model.partition_field_end or "end_time", 1354 parameters=model_parameters, 1355 ) 1356 1357 slice_boundary_fields = ( 1358 interpolated_partition_field_start.eval(config=config), 1359 interpolated_partition_field_end.eval(config=config), 1360 ) 1361 1362 datetime_format = datetime_based_cursor_model.datetime_format 1363 1364 cursor_granularity = ( 1365 parse_duration(datetime_based_cursor_model.cursor_granularity) 1366 if datetime_based_cursor_model.cursor_granularity 1367 else None 1368 ) 1369 1370 lookback_window = None 1371 interpolated_lookback_window = ( 1372 InterpolatedString.create( 1373 datetime_based_cursor_model.lookback_window, 1374 parameters=model_parameters, 1375 ) 1376 if datetime_based_cursor_model.lookback_window 1377 else None 1378 ) 1379 if interpolated_lookback_window: 1380 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1381 if evaluated_lookback_window: 1382 lookback_window = parse_duration(evaluated_lookback_window) 1383 1384 connector_state_converter: DateTimeStreamStateConverter 1385 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1386 datetime_format=datetime_format, 1387 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1388 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1389 cursor_granularity=cursor_granularity, 1390 ) 1391 1392 # Adjusts the stream state by applying the runtime lookback window. 1393 # This is used to ensure correct state handling in case of failed partitions. 1394 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1395 if runtime_lookback_window and stream_state_value: 1396 new_stream_state = ( 1397 connector_state_converter.parse_timestamp(stream_state_value) 1398 - runtime_lookback_window 1399 ) 1400 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1401 new_stream_state 1402 ) 1403 1404 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1405 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1406 start_date_runtime_value = self.create_min_max_datetime( 1407 model=datetime_based_cursor_model.start_datetime, config=config 1408 ) 1409 else: 1410 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1411 1412 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1413 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1414 end_date_runtime_value = self.create_min_max_datetime( 1415 model=datetime_based_cursor_model.end_datetime, config=config 1416 ) 1417 else: 1418 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1419 1420 interpolated_start_date = MinMaxDatetime.create( 1421 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1422 parameters=datetime_based_cursor_model.parameters, 1423 ) 1424 interpolated_end_date = ( 1425 None 1426 if not end_date_runtime_value 1427 else MinMaxDatetime.create( 1428 end_date_runtime_value, datetime_based_cursor_model.parameters 1429 ) 1430 ) 1431 1432 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1433 if not interpolated_start_date.datetime_format: 1434 interpolated_start_date.datetime_format = datetime_format 1435 if interpolated_end_date and not interpolated_end_date.datetime_format: 1436 interpolated_end_date.datetime_format = datetime_format 1437 1438 start_date = interpolated_start_date.get_datetime(config=config) 1439 end_date_provider = ( 1440 partial(interpolated_end_date.get_datetime, config) 1441 if interpolated_end_date 1442 else connector_state_converter.get_end_provider() 1443 ) 1444 1445 if ( 1446 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1447 ) or ( 1448 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1449 ): 1450 raise ValueError( 1451 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1452 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1453 ) 1454 1455 # When step is not defined, default to a step size from the starting date to the present moment 1456 step_length = datetime.timedelta.max 1457 interpolated_step = ( 1458 InterpolatedString.create( 1459 datetime_based_cursor_model.step, 1460 parameters=model_parameters, 1461 ) 1462 if datetime_based_cursor_model.step 1463 else None 1464 ) 1465 if interpolated_step: 1466 evaluated_step = interpolated_step.eval(config) 1467 if evaluated_step: 1468 step_length = parse_duration(evaluated_step) 1469 1470 clamping_strategy: ClampingStrategy = NoClamping() 1471 if datetime_based_cursor_model.clamping: 1472 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1473 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1474 # object which we want to keep agnostic of being low-code 1475 target = InterpolatedString( 1476 string=datetime_based_cursor_model.clamping.target, 1477 parameters=model_parameters, 1478 ) 1479 evaluated_target = target.eval(config=config) 1480 match evaluated_target: 1481 case "DAY": 1482 clamping_strategy = DayClampingStrategy() 1483 end_date_provider = ClampingEndProvider( 1484 DayClampingStrategy(is_ceiling=False), 1485 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1486 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1487 ) 1488 case "WEEK": 1489 if ( 1490 not datetime_based_cursor_model.clamping.target_details 1491 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1492 ): 1493 raise ValueError( 1494 "Given WEEK clamping, weekday needs to be provided as target_details" 1495 ) 1496 weekday = self._assemble_weekday( 1497 datetime_based_cursor_model.clamping.target_details["weekday"] 1498 ) 1499 clamping_strategy = WeekClampingStrategy(weekday) 1500 end_date_provider = ClampingEndProvider( 1501 WeekClampingStrategy(weekday, is_ceiling=False), 1502 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1503 granularity=cursor_granularity or datetime.timedelta(days=1), 1504 ) 1505 case "MONTH": 1506 clamping_strategy = MonthClampingStrategy() 1507 end_date_provider = ClampingEndProvider( 1508 MonthClampingStrategy(is_ceiling=False), 1509 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1510 granularity=cursor_granularity or datetime.timedelta(days=1), 1511 ) 1512 case _: 1513 raise ValueError( 1514 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1515 ) 1516 1517 return ConcurrentCursor( 1518 stream_name=stream_name, 1519 stream_namespace=stream_namespace, 1520 stream_state=stream_state, 1521 message_repository=message_repository or self._message_repository, 1522 connector_state_manager=self._connector_state_manager, 1523 connector_state_converter=connector_state_converter, 1524 cursor_field=cursor_field, 1525 slice_boundary_fields=slice_boundary_fields, 1526 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1527 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1528 lookback_window=lookback_window, 1529 slice_range=step_length, 1530 cursor_granularity=cursor_granularity, 1531 clamping_strategy=clamping_strategy, 1532 )
1534 def create_concurrent_cursor_from_incrementing_count_cursor( 1535 self, 1536 model_type: Type[BaseModel], 1537 component_definition: ComponentDefinition, 1538 stream_name: str, 1539 stream_namespace: Optional[str], 1540 stream_state: MutableMapping[str, Any], 1541 config: Config, 1542 message_repository: Optional[MessageRepository] = None, 1543 **kwargs: Any, 1544 ) -> ConcurrentCursor: 1545 component_type = component_definition.get("type") 1546 if component_definition.get("type") != model_type.__name__: 1547 raise ValueError( 1548 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1549 ) 1550 1551 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1552 1553 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1554 raise ValueError( 1555 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1556 ) 1557 1558 interpolated_start_value = ( 1559 InterpolatedString.create( 1560 incrementing_count_cursor_model.start_value, # type: ignore 1561 parameters=incrementing_count_cursor_model.parameters or {}, 1562 ) 1563 if incrementing_count_cursor_model.start_value 1564 else 0 1565 ) 1566 1567 cursor_field = self._get_catalog_defined_cursor_field( 1568 stream_name=stream_name, 1569 allow_catalog_defined_cursor_field=incrementing_count_cursor_model.allow_catalog_defined_cursor_field 1570 or False, 1571 ) 1572 1573 if not cursor_field: 1574 interpolated_cursor_field = InterpolatedString.create( 1575 incrementing_count_cursor_model.cursor_field, 1576 parameters=incrementing_count_cursor_model.parameters or {}, 1577 ) 1578 cursor_field = CursorField( 1579 cursor_field_key=interpolated_cursor_field.eval(config=config), 1580 supports_catalog_defined_cursor_field=incrementing_count_cursor_model.allow_catalog_defined_cursor_field 1581 or False, 1582 ) 1583 1584 connector_state_converter = IncrementingCountStreamStateConverter( 1585 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1586 ) 1587 1588 return ConcurrentCursor( 1589 stream_name=stream_name, 1590 stream_namespace=stream_namespace, 1591 stream_state=stream_state, 1592 message_repository=message_repository or self._message_repository, 1593 connector_state_manager=self._connector_state_manager, 1594 connector_state_converter=connector_state_converter, 1595 cursor_field=cursor_field, 1596 slice_boundary_fields=None, 1597 start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1598 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1599 )
1620 def create_concurrent_cursor_from_perpartition_cursor( 1621 self, 1622 state_manager: ConnectorStateManager, 1623 model_type: Type[BaseModel], 1624 component_definition: ComponentDefinition, 1625 stream_name: str, 1626 stream_namespace: Optional[str], 1627 config: Config, 1628 stream_state: MutableMapping[str, Any], 1629 partition_router: PartitionRouter, 1630 attempt_to_create_cursor_if_not_provided: bool = False, 1631 **kwargs: Any, 1632 ) -> ConcurrentPerPartitionCursor: 1633 component_type = component_definition.get("type") 1634 if component_definition.get("type") != model_type.__name__: 1635 raise ValueError( 1636 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1637 ) 1638 1639 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1640 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1641 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1642 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1643 if "$parameters" not in component_definition and "parameters" in component_definition: 1644 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1645 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1646 1647 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1648 raise ValueError( 1649 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1650 ) 1651 1652 cursor_field = self._get_catalog_defined_cursor_field( 1653 stream_name=stream_name, 1654 allow_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1655 or False, 1656 ) 1657 1658 if not cursor_field: 1659 interpolated_cursor_field = InterpolatedString.create( 1660 datetime_based_cursor_model.cursor_field, 1661 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1662 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1663 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1664 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1665 parameters=datetime_based_cursor_model.parameters or {}, 1666 ) 1667 cursor_field = CursorField( 1668 cursor_field_key=interpolated_cursor_field.eval(config=config), 1669 supports_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1670 or False, 1671 ) 1672 1673 datetime_format = datetime_based_cursor_model.datetime_format 1674 1675 cursor_granularity = ( 1676 parse_duration(datetime_based_cursor_model.cursor_granularity) 1677 if datetime_based_cursor_model.cursor_granularity 1678 else None 1679 ) 1680 1681 connector_state_converter: DateTimeStreamStateConverter 1682 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1683 datetime_format=datetime_format, 1684 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1685 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1686 cursor_granularity=cursor_granularity, 1687 ) 1688 1689 # Create the cursor factory 1690 cursor_factory = ConcurrentCursorFactory( 1691 partial( 1692 self.create_concurrent_cursor_from_datetime_based_cursor, 1693 state_manager=state_manager, 1694 model_type=model_type, 1695 component_definition=component_definition, 1696 stream_name=stream_name, 1697 stream_namespace=stream_namespace, 1698 config=config, 1699 message_repository=NoopMessageRepository(), 1700 ) 1701 ) 1702 1703 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state 1704 use_global_cursor = isinstance( 1705 partition_router, GroupingPartitionRouter 1706 ) or component_definition.get("global_substream_cursor", False) 1707 1708 # Return the concurrent cursor and state converter 1709 return ConcurrentPerPartitionCursor( 1710 cursor_factory=cursor_factory, 1711 partition_router=partition_router, 1712 stream_name=stream_name, 1713 stream_namespace=stream_namespace, 1714 stream_state=stream_state, 1715 message_repository=self._message_repository, # type: ignore 1716 connector_state_manager=state_manager, 1717 connector_state_converter=connector_state_converter, 1718 cursor_field=cursor_field, 1719 use_global_cursor=use_global_cursor, 1720 attempt_to_create_cursor_if_not_provided=attempt_to_create_cursor_if_not_provided, 1721 )
1723 @staticmethod 1724 def create_constant_backoff_strategy( 1725 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1726 ) -> ConstantBackoffStrategy: 1727 return ConstantBackoffStrategy( 1728 backoff_time_in_seconds=model.backoff_time_in_seconds, 1729 config=config, 1730 parameters=model.parameters or {}, 1731 )
1733 def create_cursor_pagination( 1734 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1735 ) -> CursorPaginationStrategy: 1736 if isinstance(decoder, PaginationDecoderDecorator): 1737 inner_decoder = decoder.decoder 1738 else: 1739 inner_decoder = decoder 1740 decoder = PaginationDecoderDecorator(decoder=decoder) 1741 1742 if self._is_supported_decoder_for_pagination(inner_decoder): 1743 decoder_to_use = decoder 1744 else: 1745 raise ValueError( 1746 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1747 ) 1748 1749 return CursorPaginationStrategy( 1750 cursor_value=model.cursor_value, 1751 decoder=decoder_to_use, 1752 page_size=model.page_size, 1753 stop_condition=model.stop_condition, 1754 config=config, 1755 parameters=model.parameters or {}, 1756 )
1758 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1759 """ 1760 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1761 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1762 :param model: The Pydantic model of the custom component being created 1763 :param config: The custom defined connector config 1764 :return: The declarative component built from the Pydantic model to be used at runtime 1765 """ 1766 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1767 component_fields = get_type_hints(custom_component_class) 1768 model_args = model.dict() 1769 model_args["config"] = config 1770 1771 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1772 # we defer to these arguments over the component's definition 1773 for key, arg in kwargs.items(): 1774 model_args[key] = arg 1775 1776 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1777 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1778 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1779 for model_field, model_value in model_args.items(): 1780 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1781 if ( 1782 isinstance(model_value, dict) 1783 and "type" not in model_value 1784 and model_field in component_fields 1785 ): 1786 derived_type = self._derive_component_type_from_type_hints( 1787 component_fields.get(model_field) 1788 ) 1789 if derived_type: 1790 model_value["type"] = derived_type 1791 1792 if self._is_component(model_value): 1793 model_args[model_field] = self._create_nested_component( 1794 model, 1795 model_field, 1796 model_value, 1797 config, 1798 **kwargs, 1799 ) 1800 elif isinstance(model_value, list): 1801 vals = [] 1802 for v in model_value: 1803 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1804 derived_type = self._derive_component_type_from_type_hints( 1805 component_fields.get(model_field) 1806 ) 1807 if derived_type: 1808 v["type"] = derived_type 1809 if self._is_component(v): 1810 vals.append( 1811 self._create_nested_component( 1812 model, 1813 model_field, 1814 v, 1815 config, 1816 **kwargs, 1817 ) 1818 ) 1819 else: 1820 vals.append(v) 1821 model_args[model_field] = vals 1822 1823 kwargs = { 1824 class_field: model_args[class_field] 1825 for class_field in component_fields.keys() 1826 if class_field in model_args 1827 } 1828 return custom_component_class(**kwargs)
Generically creates a custom component based on the model type and a class_name reference to the custom Python class being instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
Parameters
- model: The Pydantic model of the custom component being created
- config: The custom defined connector config
Returns
The declarative component built from the Pydantic model to be used at runtime
1963 def create_default_stream( 1964 self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any 1965 ) -> AbstractStream: 1966 primary_key = model.primary_key.__root__ if model.primary_key else None 1967 self._migrate_state(model, config) 1968 1969 partition_router = self._build_stream_slicer_from_partition_router( 1970 model.retriever, 1971 config, 1972 stream_name=model.name, 1973 **kwargs, 1974 ) 1975 concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config) 1976 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1977 cursor_model: DatetimeBasedCursorModel = model.incremental_sync 1978 1979 end_time_option = ( 1980 self._create_component_from_model( 1981 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 1982 ) 1983 if cursor_model.end_time_option 1984 else None 1985 ) 1986 start_time_option = ( 1987 self._create_component_from_model( 1988 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 1989 ) 1990 if cursor_model.start_time_option 1991 else None 1992 ) 1993 1994 datetime_request_options_provider = DatetimeBasedRequestOptionsProvider( 1995 start_time_option=start_time_option, 1996 end_time_option=end_time_option, 1997 partition_field_start=cursor_model.partition_field_start, 1998 partition_field_end=cursor_model.partition_field_end, 1999 config=config, 2000 parameters=model.parameters or {}, 2001 ) 2002 request_options_provider = ( 2003 datetime_request_options_provider 2004 if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor) 2005 else PerPartitionRequestOptionsProvider( 2006 partition_router, datetime_request_options_provider 2007 ) 2008 ) 2009 elif model.incremental_sync and isinstance( 2010 model.incremental_sync, IncrementingCountCursorModel 2011 ): 2012 if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor): 2013 raise ValueError( 2014 "PerPartition does not support per partition states because switching to global state is time based" 2015 ) 2016 2017 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 2018 2019 start_time_option = ( 2020 self._create_component_from_model( 2021 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2022 config, 2023 parameters=cursor_model.parameters or {}, 2024 ) 2025 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2026 else None 2027 ) 2028 2029 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 2030 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 2031 partition_field_start = "start" 2032 2033 request_options_provider = DatetimeBasedRequestOptionsProvider( 2034 start_time_option=start_time_option, 2035 partition_field_start=partition_field_start, 2036 config=config, 2037 parameters=model.parameters or {}, 2038 ) 2039 else: 2040 request_options_provider = None 2041 2042 transformations = [] 2043 if model.transformations: 2044 for transformation_model in model.transformations: 2045 transformations.append( 2046 self._create_component_from_model(model=transformation_model, config=config) 2047 ) 2048 file_uploader = None 2049 if model.file_uploader: 2050 file_uploader = self._create_component_from_model( 2051 model=model.file_uploader, config=config 2052 ) 2053 2054 stream_slicer: ConcurrentStreamSlicer = ( 2055 partition_router 2056 if isinstance(concurrent_cursor, FinalStateCursor) 2057 else concurrent_cursor 2058 ) 2059 2060 retriever = self._create_component_from_model( 2061 model=model.retriever, 2062 config=config, 2063 name=model.name, 2064 primary_key=primary_key, 2065 request_options_provider=request_options_provider, 2066 stream_slicer=stream_slicer, 2067 partition_router=partition_router, 2068 has_stop_condition_cursor=self._is_stop_condition_on_cursor(model), 2069 is_client_side_incremental_sync=self._is_client_side_filtering_enabled(model), 2070 cursor=concurrent_cursor, 2071 transformations=transformations, 2072 file_uploader=file_uploader, 2073 incremental_sync=model.incremental_sync, 2074 ) 2075 if isinstance(retriever, AsyncRetriever): 2076 stream_slicer = retriever.stream_slicer 2077 2078 schema_loader: SchemaLoader 2079 if model.schema_loader and isinstance(model.schema_loader, list): 2080 nested_schema_loaders = [ 2081 self._create_component_from_model(model=nested_schema_loader, config=config) 2082 for nested_schema_loader in model.schema_loader 2083 ] 2084 schema_loader = CompositeSchemaLoader( 2085 schema_loaders=nested_schema_loaders, parameters={} 2086 ) 2087 elif model.schema_loader: 2088 schema_loader = self._create_component_from_model( 2089 model=model.schema_loader, # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models 2090 config=config, 2091 ) 2092 else: 2093 options = model.parameters or {} 2094 if "name" not in options: 2095 options["name"] = model.name 2096 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 2097 schema_loader = CachingSchemaLoaderDecorator(schema_loader) 2098 2099 stream_name = model.name or "" 2100 return DefaultStream( 2101 partition_generator=StreamSlicerPartitionGenerator( 2102 DeclarativePartitionFactory( 2103 stream_name, 2104 schema_loader, 2105 retriever, 2106 self._message_repository, 2107 ), 2108 stream_slicer, 2109 slice_limit=self._limit_slices_fetched, 2110 ), 2111 name=stream_name, 2112 json_schema=schema_loader.get_json_schema, 2113 primary_key=get_primary_key_from_stream(primary_key), 2114 cursor_field=concurrent_cursor.cursor_field 2115 if hasattr(concurrent_cursor, "cursor_field") 2116 else CursorField( 2117 cursor_field_key="" 2118 ), # FIXME we should have the cursor field has part of the interface of cursor, 2119 logger=logging.getLogger(f"airbyte.{stream_name}"), 2120 cursor=concurrent_cursor, 2121 supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader), 2122 )
2264 def create_default_error_handler( 2265 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 2266 ) -> DefaultErrorHandler: 2267 backoff_strategies = [] 2268 if model.backoff_strategies: 2269 for backoff_strategy_model in model.backoff_strategies: 2270 backoff_strategies.append( 2271 self._create_component_from_model(model=backoff_strategy_model, config=config) 2272 ) 2273 2274 response_filters = [] 2275 if model.response_filters: 2276 for response_filter_model in model.response_filters: 2277 response_filters.append( 2278 self._create_component_from_model(model=response_filter_model, config=config) 2279 ) 2280 response_filters.append( 2281 HttpResponseFilter(config=config, parameters=model.parameters or {}) 2282 ) 2283 2284 return DefaultErrorHandler( 2285 backoff_strategies=backoff_strategies, 2286 max_retries=model.max_retries, 2287 response_filters=response_filters, 2288 config=config, 2289 parameters=model.parameters or {}, 2290 )
2292 def create_default_paginator( 2293 self, 2294 model: DefaultPaginatorModel, 2295 config: Config, 2296 *, 2297 url_base: str, 2298 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2299 decoder: Optional[Decoder] = None, 2300 cursor_used_for_stop_condition: Optional[Cursor] = None, 2301 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 2302 if decoder: 2303 if self._is_supported_decoder_for_pagination(decoder): 2304 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 2305 else: 2306 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 2307 else: 2308 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 2309 page_size_option = ( 2310 self._create_component_from_model(model=model.page_size_option, config=config) 2311 if model.page_size_option 2312 else None 2313 ) 2314 page_token_option = ( 2315 self._create_component_from_model(model=model.page_token_option, config=config) 2316 if model.page_token_option 2317 else None 2318 ) 2319 pagination_strategy = self._create_component_from_model( 2320 model=model.pagination_strategy, 2321 config=config, 2322 decoder=decoder_to_use, 2323 extractor_model=extractor_model, 2324 ) 2325 if cursor_used_for_stop_condition: 2326 pagination_strategy = StopConditionPaginationStrategyDecorator( 2327 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2328 ) 2329 paginator = DefaultPaginator( 2330 decoder=decoder_to_use, 2331 page_size_option=page_size_option, 2332 page_token_option=page_token_option, 2333 pagination_strategy=pagination_strategy, 2334 url_base=url_base, 2335 config=config, 2336 parameters=model.parameters or {}, 2337 ) 2338 if self._limit_pages_fetched_per_slice: 2339 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2340 return paginator
2342 def create_dpath_extractor( 2343 self, 2344 model: DpathExtractorModel, 2345 config: Config, 2346 decoder: Optional[Decoder] = None, 2347 **kwargs: Any, 2348 ) -> DpathExtractor: 2349 if decoder: 2350 decoder_to_use = decoder 2351 else: 2352 decoder_to_use = JsonDecoder(parameters={}) 2353 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2354 return DpathExtractor( 2355 decoder=decoder_to_use, 2356 field_path=model_field_path, 2357 config=config, 2358 parameters=model.parameters or {}, 2359 )
2380 def create_http_requester( 2381 self, 2382 model: HttpRequesterModel, 2383 config: Config, 2384 decoder: Decoder = JsonDecoder(parameters={}), 2385 query_properties_key: Optional[str] = None, 2386 use_cache: Optional[bool] = None, 2387 *, 2388 name: str, 2389 ) -> HttpRequester: 2390 authenticator = ( 2391 self._create_component_from_model( 2392 model=model.authenticator, 2393 config=config, 2394 url_base=model.url or model.url_base, 2395 name=name, 2396 decoder=decoder, 2397 ) 2398 if model.authenticator 2399 else None 2400 ) 2401 error_handler = ( 2402 self._create_component_from_model(model=model.error_handler, config=config) 2403 if model.error_handler 2404 else DefaultErrorHandler( 2405 backoff_strategies=[], 2406 response_filters=[], 2407 config=config, 2408 parameters=model.parameters or {}, 2409 ) 2410 ) 2411 2412 api_budget = self._api_budget 2413 2414 request_options_provider = InterpolatedRequestOptionsProvider( 2415 request_body=model.request_body, 2416 request_body_data=model.request_body_data, 2417 request_body_json=model.request_body_json, 2418 request_headers=model.request_headers, 2419 request_parameters=model.request_parameters, # type: ignore # QueryProperties have been removed in `create_simple_retriever` 2420 query_properties_key=query_properties_key, 2421 config=config, 2422 parameters=model.parameters or {}, 2423 ) 2424 2425 assert model.use_cache is not None # for mypy 2426 assert model.http_method is not None # for mypy 2427 2428 should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache 2429 2430 return HttpRequester( 2431 name=name, 2432 url=model.url, 2433 url_base=model.url_base, 2434 path=model.path, 2435 authenticator=authenticator, 2436 error_handler=error_handler, 2437 api_budget=api_budget, 2438 http_method=HttpMethod[model.http_method.value], 2439 request_options_provider=request_options_provider, 2440 config=config, 2441 disable_retries=self._disable_retries, 2442 parameters=model.parameters or {}, 2443 message_repository=self._message_repository, 2444 use_cache=should_use_cache, 2445 decoder=decoder, 2446 stream_response=decoder.is_stream_response() if decoder else False, 2447 )
2449 @staticmethod 2450 def create_http_response_filter( 2451 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2452 ) -> HttpResponseFilter: 2453 if model.action: 2454 action = ResponseAction(model.action.value) 2455 else: 2456 action = None 2457 2458 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2459 2460 http_codes = ( 2461 set(model.http_codes) if model.http_codes else set() 2462 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2463 2464 return HttpResponseFilter( 2465 action=action, 2466 failure_type=failure_type, 2467 error_message=model.error_message or "", 2468 error_message_contains=model.error_message_contains or "", 2469 http_codes=http_codes, 2470 predicate=model.predicate or "", 2471 config=config, 2472 parameters=model.parameters or {}, 2473 )
2481 def create_complex_field_type( 2482 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2483 ) -> ComplexFieldType: 2484 items = ( 2485 self._create_component_from_model(model=model.items, config=config) 2486 if isinstance(model.items, ComplexFieldTypeModel) 2487 else model.items 2488 ) 2489 2490 return ComplexFieldType(field_type=model.field_type, items=items)
2492 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2493 target_type = ( 2494 self._create_component_from_model(model=model.target_type, config=config) 2495 if isinstance(model.target_type, ComplexFieldTypeModel) 2496 else model.target_type 2497 ) 2498 2499 return TypesMap( 2500 target_type=target_type, 2501 current_type=model.current_type, 2502 condition=model.condition if model.condition is not None else "True", 2503 )
2505 def create_schema_type_identifier( 2506 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2507 ) -> SchemaTypeIdentifier: 2508 types_mapping = [] 2509 if model.types_mapping: 2510 types_mapping.extend( 2511 [ 2512 self._create_component_from_model(types_map, config=config) 2513 for types_map in model.types_mapping 2514 ] 2515 ) 2516 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2517 [x for x in model.schema_pointer] if model.schema_pointer else [] 2518 ) 2519 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2520 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2521 [x for x in model.type_pointer] if model.type_pointer else None 2522 ) 2523 2524 return SchemaTypeIdentifier( 2525 schema_pointer=model_schema_pointer, 2526 key_pointer=model_key_pointer, 2527 type_pointer=model_type_pointer, 2528 types_mapping=types_mapping, 2529 parameters=model.parameters or {}, 2530 )
2532 def create_dynamic_schema_loader( 2533 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2534 ) -> DynamicSchemaLoader: 2535 schema_transformations = [] 2536 if model.schema_transformations: 2537 for transformation_model in model.schema_transformations: 2538 schema_transformations.append( 2539 self._create_component_from_model(model=transformation_model, config=config) 2540 ) 2541 name = "dynamic_properties" 2542 retriever = self._create_component_from_model( 2543 model=model.retriever, 2544 config=config, 2545 name=name, 2546 primary_key=None, 2547 partition_router=self._build_stream_slicer_from_partition_router( 2548 model.retriever, config 2549 ), 2550 transformations=[], 2551 use_cache=True, 2552 log_formatter=( 2553 lambda response: format_http_message( 2554 response, 2555 f"Schema loader '{name}' request", 2556 f"Request performed in order to extract schema.", 2557 name, 2558 is_auxiliary=True, 2559 ) 2560 ), 2561 ) 2562 schema_type_identifier = self._create_component_from_model( 2563 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2564 ) 2565 schema_filter = ( 2566 self._create_component_from_model( 2567 model.schema_filter, config=config, parameters=model.parameters or {} 2568 ) 2569 if model.schema_filter is not None 2570 else None 2571 ) 2572 2573 return DynamicSchemaLoader( 2574 retriever=retriever, 2575 config=config, 2576 schema_transformations=schema_transformations, 2577 schema_filter=schema_filter, 2578 schema_type_identifier=schema_type_identifier, 2579 parameters=model.parameters or {}, 2580 )
2600 def create_gzip_decoder( 2601 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2602 ) -> Decoder: 2603 _compressed_response_types = { 2604 "gzip", 2605 "x-gzip", 2606 "gzip, deflate", 2607 "x-gzip, deflate", 2608 "application/zip", 2609 "application/gzip", 2610 "application/x-gzip", 2611 "application/x-zip-compressed", 2612 } 2613 2614 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2615 2616 if self._emit_connector_builder_messages: 2617 # This is very surprising but if the response is not streamed, 2618 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2619 # which uses urllib3 directly and does not uncompress the data. 2620 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2621 2622 return CompositeRawDecoder.by_headers( 2623 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2624 stream_response=True, 2625 fallback_parser=gzip_parser.inner_parser, 2626 )
2675 def create_jwt_authenticator( 2676 self, model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2677 ) -> JwtAuthenticator: 2678 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2679 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2680 request_option = ( 2681 self._create_component_from_model(model.request_option, config) 2682 if model.request_option 2683 else None 2684 ) 2685 return JwtAuthenticator( 2686 config=config, 2687 parameters=model.parameters or {}, 2688 algorithm=JwtAlgorithm(model.algorithm.value), 2689 secret_key=model.secret_key, 2690 base64_encode_secret_key=model.base64_encode_secret_key, 2691 token_duration=model.token_duration, 2692 header_prefix=model.header_prefix, 2693 kid=jwt_headers.kid, 2694 typ=jwt_headers.typ, 2695 cty=jwt_headers.cty, 2696 iss=jwt_payload.iss, 2697 sub=jwt_payload.sub, 2698 aud=jwt_payload.aud, 2699 additional_jwt_headers=model.additional_jwt_headers, 2700 additional_jwt_payload=model.additional_jwt_payload, 2701 passphrase=model.passphrase, 2702 request_option=request_option, 2703 )
2705 def create_list_partition_router( 2706 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2707 ) -> ListPartitionRouter: 2708 request_option = ( 2709 self._create_component_from_model(model.request_option, config) 2710 if model.request_option 2711 else None 2712 ) 2713 return ListPartitionRouter( 2714 cursor_field=model.cursor_field, 2715 request_option=request_option, 2716 values=model.values, 2717 config=config, 2718 parameters=model.parameters or {}, 2719 )
2721 @staticmethod 2722 def create_min_max_datetime( 2723 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2724 ) -> MinMaxDatetime: 2725 return MinMaxDatetime( 2726 datetime=model.datetime, 2727 datetime_format=model.datetime_format or "", 2728 max_datetime=model.max_datetime or "", 2729 min_datetime=model.min_datetime or "", 2730 parameters=model.parameters or {}, 2731 )
2743 def create_oauth_authenticator( 2744 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2745 ) -> DeclarativeOauth2Authenticator: 2746 profile_assertion = ( 2747 self._create_component_from_model(model.profile_assertion, config=config) 2748 if model.profile_assertion 2749 else None 2750 ) 2751 2752 refresh_token_error_status_codes, refresh_token_error_key, refresh_token_error_values = ( 2753 self._get_refresh_token_error_information(model) 2754 ) 2755 if model.refresh_token_updater: 2756 # ignore type error because fixing it would have a lot of dependencies, revisit later 2757 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2758 config, 2759 InterpolatedString.create( 2760 model.token_refresh_endpoint, # type: ignore 2761 parameters=model.parameters or {}, 2762 ).eval(config), 2763 access_token_name=InterpolatedString.create( 2764 model.access_token_name or "access_token", parameters=model.parameters or {} 2765 ).eval(config), 2766 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2767 expires_in_name=InterpolatedString.create( 2768 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2769 ).eval(config), 2770 client_id_name=InterpolatedString.create( 2771 model.client_id_name or "client_id", parameters=model.parameters or {} 2772 ).eval(config), 2773 client_id=InterpolatedString.create( 2774 model.client_id, parameters=model.parameters or {} 2775 ).eval(config) 2776 if model.client_id 2777 else model.client_id, 2778 client_secret_name=InterpolatedString.create( 2779 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2780 ).eval(config), 2781 client_secret=InterpolatedString.create( 2782 model.client_secret, parameters=model.parameters or {} 2783 ).eval(config) 2784 if model.client_secret 2785 else model.client_secret, 2786 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2787 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2788 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2789 grant_type_name=InterpolatedString.create( 2790 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2791 ).eval(config), 2792 grant_type=InterpolatedString.create( 2793 model.grant_type or "refresh_token", parameters=model.parameters or {} 2794 ).eval(config), 2795 refresh_request_body=InterpolatedMapping( 2796 model.refresh_request_body or {}, parameters=model.parameters or {} 2797 ).eval(config), 2798 refresh_request_headers=InterpolatedMapping( 2799 model.refresh_request_headers or {}, parameters=model.parameters or {} 2800 ).eval(config), 2801 scopes=model.scopes, 2802 token_expiry_date_format=model.token_expiry_date_format, 2803 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2804 message_repository=self._message_repository, 2805 refresh_token_error_status_codes=refresh_token_error_status_codes, 2806 refresh_token_error_key=refresh_token_error_key, 2807 refresh_token_error_values=refresh_token_error_values, 2808 ) 2809 # ignore type error because fixing it would have a lot of dependencies, revisit later 2810 return DeclarativeOauth2Authenticator( # type: ignore 2811 access_token_name=model.access_token_name or "access_token", 2812 access_token_value=model.access_token_value, 2813 client_id_name=model.client_id_name or "client_id", 2814 client_id=model.client_id, 2815 client_secret_name=model.client_secret_name or "client_secret", 2816 client_secret=model.client_secret, 2817 expires_in_name=model.expires_in_name or "expires_in", 2818 grant_type_name=model.grant_type_name or "grant_type", 2819 grant_type=model.grant_type or "refresh_token", 2820 refresh_request_body=model.refresh_request_body, 2821 refresh_request_headers=model.refresh_request_headers, 2822 refresh_token_name=model.refresh_token_name or "refresh_token", 2823 refresh_token=model.refresh_token, 2824 scopes=model.scopes, 2825 token_expiry_date=model.token_expiry_date, 2826 token_expiry_date_format=model.token_expiry_date_format, 2827 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2828 token_refresh_endpoint=model.token_refresh_endpoint, 2829 config=config, 2830 parameters=model.parameters or {}, 2831 message_repository=self._message_repository, 2832 profile_assertion=profile_assertion, 2833 use_profile_assertion=model.use_profile_assertion, 2834 refresh_token_error_status_codes=refresh_token_error_status_codes, 2835 refresh_token_error_key=refresh_token_error_key, 2836 refresh_token_error_values=refresh_token_error_values, 2837 )
2887 def create_offset_increment( 2888 self, 2889 model: OffsetIncrementModel, 2890 config: Config, 2891 decoder: Decoder, 2892 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2893 **kwargs: Any, 2894 ) -> OffsetIncrement: 2895 if isinstance(decoder, PaginationDecoderDecorator): 2896 inner_decoder = decoder.decoder 2897 else: 2898 inner_decoder = decoder 2899 decoder = PaginationDecoderDecorator(decoder=decoder) 2900 2901 if self._is_supported_decoder_for_pagination(inner_decoder): 2902 decoder_to_use = decoder 2903 else: 2904 raise ValueError( 2905 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2906 ) 2907 2908 # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever) 2909 # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the 2910 # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing 2911 # behavior by having two separate extractors with identical behavior since they use the same extractor model. 2912 # When we have more time to investigate we can look into reusing the same component. 2913 extractor = ( 2914 self._create_component_from_model( 2915 model=extractor_model, config=config, decoder=decoder_to_use 2916 ) 2917 if extractor_model 2918 else None 2919 ) 2920 2921 return OffsetIncrement( 2922 page_size=model.page_size, 2923 config=config, 2924 decoder=decoder_to_use, 2925 extractor=extractor, 2926 inject_on_first_request=model.inject_on_first_request or False, 2927 parameters=model.parameters or {}, 2928 )
2930 @staticmethod 2931 def create_page_increment( 2932 model: PageIncrementModel, config: Config, **kwargs: Any 2933 ) -> PageIncrement: 2934 return PageIncrement( 2935 page_size=model.page_size, 2936 config=config, 2937 start_from_page=model.start_from_page or 0, 2938 inject_on_first_request=model.inject_on_first_request or False, 2939 parameters=model.parameters or {}, 2940 )
2942 def create_parent_stream_config( 2943 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 2944 ) -> ParentStreamConfig: 2945 declarative_stream = self._create_component_from_model( 2946 model.stream, 2947 config=config, 2948 is_parent=True, 2949 **kwargs, 2950 ) 2951 request_option = ( 2952 self._create_component_from_model(model.request_option, config=config) 2953 if model.request_option 2954 else None 2955 ) 2956 2957 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2958 raise ValueError( 2959 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2960 ) 2961 2962 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2963 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2964 ) 2965 2966 return ParentStreamConfig( 2967 parent_key=model.parent_key, 2968 request_option=request_option, 2969 stream=declarative_stream, 2970 partition_field=model.partition_field, 2971 config=config, 2972 incremental_dependency=model.incremental_dependency or False, 2973 parameters=model.parameters or {}, 2974 extra_fields=model.extra_fields, 2975 lazy_read_pointer=model_lazy_read_pointer, 2976 )
2978 def create_properties_from_endpoint( 2979 self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any 2980 ) -> PropertiesFromEndpoint: 2981 retriever = self._create_component_from_model( 2982 model=model.retriever, 2983 config=config, 2984 name="dynamic_properties", 2985 primary_key=None, 2986 stream_slicer=None, 2987 transformations=[], 2988 use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different 2989 ) 2990 return PropertiesFromEndpoint( 2991 property_field_path=model.property_field_path, 2992 retriever=retriever, 2993 config=config, 2994 parameters=model.parameters or {}, 2995 )
2997 def create_property_chunking( 2998 self, model: PropertyChunkingModel, config: Config, **kwargs: Any 2999 ) -> PropertyChunking: 3000 record_merge_strategy = ( 3001 self._create_component_from_model( 3002 model=model.record_merge_strategy, config=config, **kwargs 3003 ) 3004 if model.record_merge_strategy 3005 else None 3006 ) 3007 3008 property_limit_type: PropertyLimitType 3009 match model.property_limit_type: 3010 case PropertyLimitTypeModel.property_count: 3011 property_limit_type = PropertyLimitType.property_count 3012 case PropertyLimitTypeModel.characters: 3013 property_limit_type = PropertyLimitType.characters 3014 case _: 3015 raise ValueError(f"Invalid PropertyLimitType {property_limit_type}") 3016 3017 return PropertyChunking( 3018 property_limit_type=property_limit_type, 3019 property_limit=model.property_limit, 3020 record_merge_strategy=record_merge_strategy, 3021 config=config, 3022 parameters=model.parameters or {}, 3023 )
3025 def create_query_properties( 3026 self, model: QueryPropertiesModel, config: Config, *, stream_name: str, **kwargs: Any 3027 ) -> QueryProperties: 3028 if isinstance(model.property_list, list): 3029 property_list = model.property_list 3030 else: 3031 property_list = self._create_component_from_model( 3032 model=model.property_list, config=config, **kwargs 3033 ) 3034 3035 property_chunking = ( 3036 self._create_component_from_model( 3037 model=model.property_chunking, config=config, **kwargs 3038 ) 3039 if model.property_chunking 3040 else None 3041 ) 3042 3043 property_selector = ( 3044 self._create_component_from_model( 3045 model=model.property_selector, config=config, stream_name=stream_name, **kwargs 3046 ) 3047 if model.property_selector 3048 else None 3049 ) 3050 3051 return QueryProperties( 3052 property_list=property_list, 3053 always_include_properties=model.always_include_properties, 3054 property_chunking=property_chunking, 3055 property_selector=property_selector, 3056 config=config, 3057 parameters=model.parameters or {}, 3058 )
3060 def create_json_schema_property_selector( 3061 self, 3062 model: JsonSchemaPropertySelectorModel, 3063 config: Config, 3064 *, 3065 stream_name: str, 3066 **kwargs: Any, 3067 ) -> JsonSchemaPropertySelector: 3068 configured_stream = self._stream_name_to_configured_stream.get(stream_name) 3069 3070 transformations = [] 3071 if model.transformations: 3072 for transformation_model in model.transformations: 3073 transformations.append( 3074 self._create_component_from_model(model=transformation_model, config=config) 3075 ) 3076 3077 return JsonSchemaPropertySelector( 3078 configured_stream=configured_stream, 3079 properties_transformations=transformations, 3080 config=config, 3081 parameters=model.parameters or {}, 3082 )
3096 @staticmethod 3097 def create_request_option( 3098 model: RequestOptionModel, config: Config, **kwargs: Any 3099 ) -> RequestOption: 3100 inject_into = RequestOptionType(model.inject_into.value) 3101 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 3102 [ 3103 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 3104 for segment in model.field_path 3105 ] 3106 if model.field_path 3107 else None 3108 ) 3109 field_name = ( 3110 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 3111 if model.field_name 3112 else None 3113 ) 3114 return RequestOption( 3115 field_name=field_name, 3116 field_path=field_path, 3117 inject_into=inject_into, 3118 parameters=kwargs.get("parameters", {}), 3119 )
3121 def create_record_selector( 3122 self, 3123 model: RecordSelectorModel, 3124 config: Config, 3125 *, 3126 name: str, 3127 transformations: List[RecordTransformation] | None = None, 3128 decoder: Decoder | None = None, 3129 client_side_incremental_sync_cursor: Optional[Cursor] = None, 3130 file_uploader: Optional[DefaultFileUploader] = None, 3131 **kwargs: Any, 3132 ) -> RecordSelector: 3133 extractor = self._create_component_from_model( 3134 model=model.extractor, decoder=decoder, config=config 3135 ) 3136 record_filter = ( 3137 self._create_component_from_model(model.record_filter, config=config) 3138 if model.record_filter 3139 else None 3140 ) 3141 3142 transform_before_filtering = ( 3143 False if model.transform_before_filtering is None else model.transform_before_filtering 3144 ) 3145 if client_side_incremental_sync_cursor: 3146 record_filter = ClientSideIncrementalRecordFilterDecorator( 3147 config=config, 3148 parameters=model.parameters, 3149 condition=model.record_filter.condition 3150 if (model.record_filter and hasattr(model.record_filter, "condition")) 3151 else None, 3152 cursor=client_side_incremental_sync_cursor, 3153 ) 3154 transform_before_filtering = ( 3155 True 3156 if model.transform_before_filtering is None 3157 else model.transform_before_filtering 3158 ) 3159 3160 if model.schema_normalization is None: 3161 # default to no schema normalization if not set 3162 model.schema_normalization = SchemaNormalizationModel.None_ 3163 3164 schema_normalization = ( 3165 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 3166 if isinstance(model.schema_normalization, SchemaNormalizationModel) 3167 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 3168 ) 3169 3170 return RecordSelector( 3171 extractor=extractor, 3172 name=name, 3173 config=config, 3174 record_filter=record_filter, 3175 transformations=transformations or [], 3176 file_uploader=file_uploader, 3177 schema_normalization=schema_normalization, 3178 parameters=model.parameters or {}, 3179 transform_before_filtering=transform_before_filtering, 3180 )
3190 def create_selective_authenticator( 3191 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 3192 ) -> DeclarativeAuthenticator: 3193 authenticators = { 3194 name: self._create_component_from_model(model=auth, config=config) 3195 for name, auth in model.authenticators.items() 3196 } 3197 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 3198 return SelectiveAuthenticator( # type: ignore[abstract] 3199 config=config, 3200 authenticators=authenticators, 3201 authenticator_selection_path=model.authenticator_selection_path, 3202 **kwargs, 3203 )
3205 @staticmethod 3206 def create_legacy_session_token_authenticator( 3207 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 3208 ) -> LegacySessionTokenAuthenticator: 3209 return LegacySessionTokenAuthenticator( 3210 api_url=url_base, 3211 header=model.header, 3212 login_url=model.login_url, 3213 password=model.password or "", 3214 session_token=model.session_token or "", 3215 session_token_response_key=model.session_token_response_key or "", 3216 username=model.username or "", 3217 validate_session_url=model.validate_session_url, 3218 config=config, 3219 parameters=model.parameters or {}, 3220 )
3222 def create_simple_retriever( 3223 self, 3224 model: SimpleRetrieverModel, 3225 config: Config, 3226 *, 3227 name: str, 3228 primary_key: Optional[Union[str, List[str], List[List[str]]]], 3229 request_options_provider: Optional[RequestOptionsProvider] = None, 3230 cursor: Optional[Cursor] = None, 3231 has_stop_condition_cursor: bool = False, 3232 is_client_side_incremental_sync: bool = False, 3233 transformations: List[RecordTransformation], 3234 file_uploader: Optional[DefaultFileUploader] = None, 3235 incremental_sync: Optional[ 3236 Union[IncrementingCountCursorModel, DatetimeBasedCursorModel] 3237 ] = None, 3238 use_cache: Optional[bool] = None, 3239 log_formatter: Optional[Callable[[Response], Any]] = None, 3240 partition_router: Optional[PartitionRouter] = None, 3241 **kwargs: Any, 3242 ) -> SimpleRetriever: 3243 def _get_url(req: Requester) -> str: 3244 """ 3245 Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever. 3246 This is needed because the URL is not set until the requester is created. 3247 """ 3248 3249 _url: str = ( 3250 model.requester.url 3251 if hasattr(model.requester, "url") and model.requester.url is not None 3252 else req.get_url(stream_state=None, stream_slice=None, next_page_token=None) 3253 ) 3254 _url_base: str = ( 3255 model.requester.url_base 3256 if hasattr(model.requester, "url_base") and model.requester.url_base is not None 3257 else req.get_url_base(stream_state=None, stream_slice=None, next_page_token=None) 3258 ) 3259 3260 return _url or _url_base 3261 3262 if cursor is None: 3263 cursor = FinalStateCursor(name, None, self._message_repository) 3264 3265 decoder = ( 3266 self._create_component_from_model(model=model.decoder, config=config) 3267 if model.decoder 3268 else JsonDecoder(parameters={}) 3269 ) 3270 record_selector = self._create_component_from_model( 3271 model=model.record_selector, 3272 name=name, 3273 config=config, 3274 decoder=decoder, 3275 transformations=transformations, 3276 client_side_incremental_sync_cursor=cursor if is_client_side_incremental_sync else None, 3277 file_uploader=file_uploader, 3278 ) 3279 3280 query_properties: Optional[QueryProperties] = None 3281 query_properties_key: Optional[str] = None 3282 self._ensure_query_properties_to_model(model.requester) 3283 if self._has_query_properties_in_request_parameters(model.requester): 3284 # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple 3285 # places instead of default to request_parameters which isn't clearly documented 3286 if ( 3287 hasattr(model.requester, "fetch_properties_from_endpoint") 3288 and model.requester.fetch_properties_from_endpoint 3289 ): 3290 raise ValueError( 3291 f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters" 3292 ) 3293 3294 query_properties_definitions = [] 3295 for key, request_parameter in model.requester.request_parameters.items(): # type: ignore # request_parameters is already validated to be a Mapping using _has_query_properties_in_request_parameters() 3296 if isinstance(request_parameter, QueryPropertiesModel): 3297 query_properties_key = key 3298 query_properties_definitions.append(request_parameter) 3299 3300 if len(query_properties_definitions) > 1: 3301 raise ValueError( 3302 f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages" 3303 ) 3304 3305 if len(query_properties_definitions) == 1: 3306 query_properties = self._create_component_from_model( 3307 model=query_properties_definitions[0], stream_name=name, config=config 3308 ) 3309 3310 # Removes QueryProperties components from the interpolated mappings because it has been designed 3311 # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly 3312 # instead of through jinja interpolation 3313 if hasattr(model.requester, "request_parameters") and isinstance( 3314 model.requester.request_parameters, Mapping 3315 ): 3316 model.requester.request_parameters = self._remove_query_properties( 3317 model.requester.request_parameters 3318 ) 3319 elif ( 3320 hasattr(model.requester, "fetch_properties_from_endpoint") 3321 and model.requester.fetch_properties_from_endpoint 3322 ): 3323 # todo: Deprecate this condition once dependent connectors migrate to query_properties 3324 query_properties_definition = QueryPropertiesModel( 3325 type="QueryProperties", 3326 property_list=model.requester.fetch_properties_from_endpoint, 3327 always_include_properties=None, 3328 property_chunking=None, 3329 ) # type: ignore # $parameters has a default value 3330 3331 query_properties = self.create_query_properties( 3332 model=query_properties_definition, 3333 stream_name=name, 3334 config=config, 3335 ) 3336 elif hasattr(model.requester, "query_properties") and model.requester.query_properties: 3337 query_properties = self.create_query_properties( 3338 model=model.requester.query_properties, 3339 stream_name=name, 3340 config=config, 3341 ) 3342 3343 requester = self._create_component_from_model( 3344 model=model.requester, 3345 decoder=decoder, 3346 name=name, 3347 query_properties_key=query_properties_key, 3348 use_cache=use_cache, 3349 config=config, 3350 ) 3351 3352 if not request_options_provider: 3353 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 3354 if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance( 3355 partition_router, PartitionRouter 3356 ): 3357 request_options_provider = partition_router 3358 3359 paginator = ( 3360 self._create_component_from_model( 3361 model=model.paginator, 3362 config=config, 3363 url_base=_get_url(requester), 3364 extractor_model=model.record_selector.extractor, 3365 decoder=decoder, 3366 cursor_used_for_stop_condition=cursor if has_stop_condition_cursor else None, 3367 ) 3368 if model.paginator 3369 else NoPagination(parameters={}) 3370 ) 3371 3372 ignore_stream_slicer_parameters_on_paginated_requests = ( 3373 model.ignore_stream_slicer_parameters_on_paginated_requests or False 3374 ) 3375 3376 if ( 3377 model.partition_router 3378 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 3379 and not bool(self._connector_state_manager.get_stream_state(name, None)) 3380 and any( 3381 parent_stream_config.lazy_read_pointer 3382 for parent_stream_config in model.partition_router.parent_stream_configs 3383 ) 3384 ): 3385 if incremental_sync: 3386 if incremental_sync.type != "DatetimeBasedCursor": 3387 raise ValueError( 3388 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 3389 ) 3390 3391 elif incremental_sync.step or incremental_sync.cursor_granularity: 3392 raise ValueError( 3393 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 3394 ) 3395 3396 if model.decoder and model.decoder.type != "JsonDecoder": 3397 raise ValueError( 3398 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 3399 ) 3400 3401 return LazySimpleRetriever( 3402 name=name, 3403 paginator=paginator, 3404 primary_key=primary_key, 3405 requester=requester, 3406 record_selector=record_selector, 3407 stream_slicer=_NO_STREAM_SLICING, 3408 request_option_provider=request_options_provider, 3409 config=config, 3410 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3411 parameters=model.parameters or {}, 3412 ) 3413 3414 if ( 3415 model.record_selector.record_filter 3416 and model.pagination_reset 3417 and model.pagination_reset.limits 3418 ): 3419 raise ValueError("PaginationResetLimits are not supported while having record filter.") 3420 3421 return SimpleRetriever( 3422 name=name, 3423 paginator=paginator, 3424 primary_key=primary_key, 3425 requester=requester, 3426 record_selector=record_selector, 3427 stream_slicer=_NO_STREAM_SLICING, 3428 request_option_provider=request_options_provider, 3429 config=config, 3430 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3431 additional_query_properties=query_properties, 3432 log_formatter=self._get_log_formatter(log_formatter, name), 3433 pagination_tracker_factory=self._create_pagination_tracker_factory( 3434 model.pagination_reset, cursor 3435 ), 3436 parameters=model.parameters or {}, 3437 )
3515 def create_state_delegating_stream( 3516 self, 3517 model: StateDelegatingStreamModel, 3518 config: Config, 3519 has_parent_state: Optional[bool] = None, 3520 **kwargs: Any, 3521 ) -> DefaultStream: 3522 if ( 3523 model.full_refresh_stream.name != model.name 3524 or model.name != model.incremental_stream.name 3525 ): 3526 raise ValueError( 3527 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 3528 ) 3529 3530 stream_model = self._get_state_delegating_stream_model( 3531 False if has_parent_state is None else has_parent_state, model 3532 ) 3533 3534 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel
3575 def create_async_retriever( 3576 self, 3577 model: AsyncRetrieverModel, 3578 config: Config, 3579 *, 3580 name: str, 3581 primary_key: Optional[ 3582 Union[str, List[str], List[List[str]]] 3583 ], # this seems to be needed to match create_simple_retriever 3584 stream_slicer: Optional[StreamSlicer], 3585 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3586 transformations: List[RecordTransformation], 3587 **kwargs: Any, 3588 ) -> AsyncRetriever: 3589 if model.download_target_requester and not model.download_target_extractor: 3590 raise ValueError( 3591 f"`download_target_extractor` required if using a `download_target_requester`" 3592 ) 3593 3594 def _get_download_retriever( 3595 requester: Requester, extractor: RecordExtractor, _decoder: Decoder 3596 ) -> SimpleRetriever: 3597 # We create a record selector for the download retriever 3598 # with no schema normalization and no transformations, neither record filter 3599 # as all this occurs in the record_selector of the AsyncRetriever 3600 record_selector = RecordSelector( 3601 extractor=extractor, 3602 name=name, 3603 record_filter=None, 3604 transformations=[], 3605 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 3606 config=config, 3607 parameters={}, 3608 ) 3609 paginator = ( 3610 self._create_component_from_model( 3611 model=model.download_paginator, 3612 decoder=_decoder, 3613 config=config, 3614 url_base="", 3615 ) 3616 if model.download_paginator 3617 else NoPagination(parameters={}) 3618 ) 3619 3620 return SimpleRetriever( 3621 requester=requester, 3622 record_selector=record_selector, 3623 primary_key=None, 3624 name=name, 3625 paginator=paginator, 3626 config=config, 3627 parameters={}, 3628 log_formatter=self._get_log_formatter(None, name), 3629 ) 3630 3631 def _get_job_timeout() -> datetime.timedelta: 3632 user_defined_timeout: Optional[int] = ( 3633 int( 3634 InterpolatedString.create( 3635 str(model.polling_job_timeout), 3636 parameters={}, 3637 ).eval(config) 3638 ) 3639 if model.polling_job_timeout 3640 else None 3641 ) 3642 3643 # check for user defined timeout during the test read or 15 minutes 3644 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 3645 # default value for non-connector builder is 60 minutes. 3646 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 3647 3648 return ( 3649 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 3650 ) 3651 3652 decoder = ( 3653 self._create_component_from_model(model=model.decoder, config=config) 3654 if model.decoder 3655 else JsonDecoder(parameters={}) 3656 ) 3657 record_selector = self._create_component_from_model( 3658 model=model.record_selector, 3659 config=config, 3660 decoder=decoder, 3661 name=name, 3662 transformations=transformations, 3663 client_side_incremental_sync=client_side_incremental_sync, 3664 ) 3665 3666 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 3667 if self._should_limit_slices_fetched(): 3668 stream_slicer = cast( 3669 StreamSlicer, 3670 StreamSlicerTestReadDecorator( 3671 wrapped_slicer=stream_slicer, 3672 maximum_number_of_slices=self._limit_slices_fetched or 5, 3673 ), 3674 ) 3675 3676 creation_requester = self._create_component_from_model( 3677 model=model.creation_requester, 3678 decoder=decoder, 3679 config=config, 3680 name=f"job creation - {name}", 3681 ) 3682 polling_requester = self._create_component_from_model( 3683 model=model.polling_requester, 3684 decoder=decoder, 3685 config=config, 3686 name=f"job polling - {name}", 3687 ) 3688 job_download_components_name = f"job download - {name}" 3689 download_decoder = ( 3690 self._create_component_from_model(model=model.download_decoder, config=config) 3691 if model.download_decoder 3692 else JsonDecoder(parameters={}) 3693 ) 3694 download_extractor = ( 3695 self._create_component_from_model( 3696 model=model.download_extractor, 3697 config=config, 3698 decoder=download_decoder, 3699 parameters=model.parameters, 3700 ) 3701 if model.download_extractor 3702 else DpathExtractor( 3703 [], 3704 config=config, 3705 decoder=download_decoder, 3706 parameters=model.parameters or {}, 3707 ) 3708 ) 3709 download_requester = self._create_component_from_model( 3710 model=model.download_requester, 3711 decoder=download_decoder, 3712 config=config, 3713 name=job_download_components_name, 3714 ) 3715 download_retriever = _get_download_retriever( 3716 download_requester, download_extractor, download_decoder 3717 ) 3718 abort_requester = ( 3719 self._create_component_from_model( 3720 model=model.abort_requester, 3721 decoder=decoder, 3722 config=config, 3723 name=f"job abort - {name}", 3724 ) 3725 if model.abort_requester 3726 else None 3727 ) 3728 delete_requester = ( 3729 self._create_component_from_model( 3730 model=model.delete_requester, 3731 decoder=decoder, 3732 config=config, 3733 name=f"job delete - {name}", 3734 ) 3735 if model.delete_requester 3736 else None 3737 ) 3738 download_target_requester = ( 3739 self._create_component_from_model( 3740 model=model.download_target_requester, 3741 decoder=decoder, 3742 config=config, 3743 name=f"job extract_url - {name}", 3744 ) 3745 if model.download_target_requester 3746 else None 3747 ) 3748 status_extractor = self._create_component_from_model( 3749 model=model.status_extractor, decoder=decoder, config=config, name=name 3750 ) 3751 download_target_extractor = ( 3752 self._create_component_from_model( 3753 model=model.download_target_extractor, 3754 decoder=decoder, 3755 config=config, 3756 name=name, 3757 ) 3758 if model.download_target_extractor 3759 else None 3760 ) 3761 3762 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3763 creation_requester=creation_requester, 3764 polling_requester=polling_requester, 3765 download_retriever=download_retriever, 3766 download_target_requester=download_target_requester, 3767 abort_requester=abort_requester, 3768 delete_requester=delete_requester, 3769 status_extractor=status_extractor, 3770 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3771 download_target_extractor=download_target_extractor, 3772 job_timeout=_get_job_timeout(), 3773 ) 3774 3775 async_job_partition_router = AsyncJobPartitionRouter( 3776 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3777 job_repository, 3778 stream_slices, 3779 self._job_tracker, 3780 self._message_repository, 3781 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3782 has_bulk_parent=False, 3783 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3784 # `None` == default retry is set to 3 attempts, under the hood. 3785 job_max_retry=1 if self._emit_connector_builder_messages else None, 3786 ), 3787 stream_slicer=stream_slicer, 3788 config=config, 3789 parameters=model.parameters or {}, 3790 ) 3791 3792 return AsyncRetriever( 3793 record_selector=record_selector, 3794 stream_slicer=async_job_partition_router, 3795 config=config, 3796 parameters=model.parameters or {}, 3797 )
3799 def create_spec(self, model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3800 config_migrations = [ 3801 self._create_component_from_model(migration, config) 3802 for migration in ( 3803 model.config_normalization_rules.config_migrations 3804 if ( 3805 model.config_normalization_rules 3806 and model.config_normalization_rules.config_migrations 3807 ) 3808 else [] 3809 ) 3810 ] 3811 config_transformations = [ 3812 self._create_component_from_model(transformation, config) 3813 for transformation in ( 3814 model.config_normalization_rules.transformations 3815 if ( 3816 model.config_normalization_rules 3817 and model.config_normalization_rules.transformations 3818 ) 3819 else [] 3820 ) 3821 ] 3822 config_validations = [ 3823 self._create_component_from_model(validation, config) 3824 for validation in ( 3825 model.config_normalization_rules.validations 3826 if ( 3827 model.config_normalization_rules 3828 and model.config_normalization_rules.validations 3829 ) 3830 else [] 3831 ) 3832 ] 3833 3834 return Spec( 3835 connection_specification=model.connection_specification, 3836 documentation_url=model.documentation_url, 3837 advanced_auth=model.advanced_auth, 3838 parameters={}, 3839 config_migrations=config_migrations, 3840 config_transformations=config_transformations, 3841 config_validations=config_validations, 3842 )
3844 def create_substream_partition_router( 3845 self, 3846 model: SubstreamPartitionRouterModel, 3847 config: Config, 3848 *, 3849 stream_name: str, 3850 **kwargs: Any, 3851 ) -> SubstreamPartitionRouter: 3852 parent_stream_configs = [] 3853 if model.parent_stream_configs: 3854 parent_stream_configs.extend( 3855 [ 3856 self.create_parent_stream_config_with_substream_wrapper( 3857 model=parent_stream_config, config=config, stream_name=stream_name, **kwargs 3858 ) 3859 for parent_stream_config in model.parent_stream_configs 3860 ] 3861 ) 3862 3863 return SubstreamPartitionRouter( 3864 parent_stream_configs=parent_stream_configs, 3865 parameters=model.parameters or {}, 3866 config=config, 3867 )
3869 def create_parent_stream_config_with_substream_wrapper( 3870 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 3871 ) -> Any: 3872 # getting the parent state 3873 child_state = self._connector_state_manager.get_stream_state(stream_name, None) 3874 3875 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3876 has_parent_state = bool( 3877 self._connector_state_manager.get_stream_state(stream_name, None) 3878 if model.incremental_dependency 3879 else False 3880 ) 3881 connector_state_manager = self._instantiate_parent_stream_state_manager( 3882 child_state, config, model, has_parent_state 3883 ) 3884 3885 substream_factory = ModelToComponentFactory( 3886 connector_state_manager=connector_state_manager, 3887 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3888 limit_slices_fetched=self._limit_slices_fetched, 3889 emit_connector_builder_messages=self._emit_connector_builder_messages, 3890 disable_retries=self._disable_retries, 3891 disable_cache=self._disable_cache, 3892 message_repository=StateFilteringMessageRepository( 3893 LogAppenderMessageRepositoryDecorator( 3894 { 3895 "airbyte_cdk": {"stream": {"is_substream": True}}, 3896 "http": {"is_auxiliary": True}, 3897 }, 3898 self._message_repository, 3899 self._evaluate_log_level(self._emit_connector_builder_messages), 3900 ), 3901 ), 3902 api_budget=self._api_budget, 3903 ) 3904 3905 return substream_factory.create_parent_stream_config( 3906 model=model, config=config, stream_name=stream_name, **kwargs 3907 )
3970 @staticmethod 3971 def create_wait_time_from_header( 3972 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 3973 ) -> WaitTimeFromHeaderBackoffStrategy: 3974 return WaitTimeFromHeaderBackoffStrategy( 3975 header=model.header, 3976 parameters=model.parameters or {}, 3977 config=config, 3978 regex=model.regex, 3979 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 3980 if model.max_waiting_time_in_seconds is not None 3981 else None, 3982 )
3984 @staticmethod 3985 def create_wait_until_time_from_header( 3986 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 3987 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 3988 return WaitUntilTimeFromHeaderBackoffStrategy( 3989 header=model.header, 3990 parameters=model.parameters or {}, 3991 config=config, 3992 min_wait=model.min_wait, 3993 regex=model.regex, 3994 )
4002 @staticmethod 4003 def create_components_mapping_definition( 4004 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 4005 ) -> ComponentMappingDefinition: 4006 interpolated_value = InterpolatedString.create( 4007 model.value, parameters=model.parameters or {} 4008 ) 4009 field_path = [ 4010 InterpolatedString.create(path, parameters=model.parameters or {}) 4011 for path in model.field_path 4012 ] 4013 return ComponentMappingDefinition( 4014 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 4015 value=interpolated_value, 4016 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 4017 create_or_update=model.create_or_update, 4018 condition=model.condition, 4019 parameters=model.parameters or {}, 4020 )
4022 def create_http_components_resolver( 4023 self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None 4024 ) -> Any: 4025 retriever = self._create_component_from_model( 4026 model=model.retriever, 4027 config=config, 4028 name=f"{stream_name if stream_name else '__http_components_resolver'}", 4029 primary_key=None, 4030 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 4031 transformations=[], 4032 ) 4033 4034 components_mapping = [] 4035 for component_mapping_definition_model in model.components_mapping: 4036 if component_mapping_definition_model.condition: 4037 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 4038 components_mapping.append( 4039 self._create_component_from_model( 4040 model=component_mapping_definition_model, 4041 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4042 component_mapping_definition_model.value_type 4043 ), 4044 config=config, 4045 ) 4046 ) 4047 4048 return HttpComponentsResolver( 4049 retriever=retriever, 4050 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 4051 config=config, 4052 components_mapping=components_mapping, 4053 parameters=model.parameters or {}, 4054 )
4056 @staticmethod 4057 def create_stream_config( 4058 model: StreamConfigModel, config: Config, **kwargs: Any 4059 ) -> StreamConfig: 4060 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 4061 [x for x in model.configs_pointer] if model.configs_pointer else [] 4062 ) 4063 4064 return StreamConfig( 4065 configs_pointer=model_configs_pointer, 4066 default_values=model.default_values, 4067 parameters=model.parameters or {}, 4068 )
4070 def create_config_components_resolver( 4071 self, 4072 model: ConfigComponentsResolverModel, 4073 config: Config, 4074 ) -> Any: 4075 model_stream_configs = ( 4076 model.stream_config if isinstance(model.stream_config, list) else [model.stream_config] 4077 ) 4078 4079 stream_configs = [ 4080 self._create_component_from_model( 4081 stream_config, config=config, parameters=model.parameters or {} 4082 ) 4083 for stream_config in model_stream_configs 4084 ] 4085 4086 components_mapping = [ 4087 self._create_component_from_model( 4088 model=components_mapping_definition_model, 4089 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4090 components_mapping_definition_model.value_type 4091 ), 4092 config=config, 4093 parameters=model.parameters, 4094 ) 4095 for components_mapping_definition_model in model.components_mapping 4096 ] 4097 4098 return ConfigComponentsResolver( 4099 stream_configs=stream_configs, 4100 config=config, 4101 components_mapping=components_mapping, 4102 parameters=model.parameters or {}, 4103 )
4105 def create_parametrized_components_resolver( 4106 self, 4107 model: ParametrizedComponentsResolverModel, 4108 config: Config, 4109 ) -> ParametrizedComponentsResolver: 4110 stream_parameters = StreamParametersDefinition( 4111 list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream 4112 ) 4113 4114 components_mapping = [] 4115 for components_mapping_definition_model in model.components_mapping: 4116 if components_mapping_definition_model.condition: 4117 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 4118 components_mapping.append( 4119 self._create_component_from_model( 4120 model=components_mapping_definition_model, 4121 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4122 components_mapping_definition_model.value_type 4123 ), 4124 config=config, 4125 ) 4126 ) 4127 return ParametrizedComponentsResolver( 4128 stream_parameters=stream_parameters, 4129 config=config, 4130 components_mapping=components_mapping, 4131 parameters=model.parameters or {}, 4132 )
4156 def create_http_api_budget( 4157 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 4158 ) -> HttpAPIBudget: 4159 policies = [ 4160 self._create_component_from_model(model=policy, config=config) 4161 for policy in model.policies 4162 ] 4163 4164 return HttpAPIBudget( 4165 policies=policies, 4166 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 4167 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 4168 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 4169 )
4171 def create_fixed_window_call_rate_policy( 4172 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 4173 ) -> FixedWindowCallRatePolicy: 4174 matchers = [ 4175 self._create_component_from_model(model=matcher, config=config) 4176 for matcher in model.matchers 4177 ] 4178 4179 # Set the initial reset timestamp to 10 days from now. 4180 # This value will be updated by the first request. 4181 return FixedWindowCallRatePolicy( 4182 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 4183 period=parse_duration(model.period), 4184 call_limit=model.call_limit, 4185 matchers=matchers, 4186 )
4188 def create_file_uploader( 4189 self, model: FileUploaderModel, config: Config, **kwargs: Any 4190 ) -> FileUploader: 4191 name = "File Uploader" 4192 requester = self._create_component_from_model( 4193 model=model.requester, 4194 config=config, 4195 name=name, 4196 **kwargs, 4197 ) 4198 download_target_extractor = self._create_component_from_model( 4199 model=model.download_target_extractor, 4200 config=config, 4201 name=name, 4202 **kwargs, 4203 ) 4204 emit_connector_builder_messages = self._emit_connector_builder_messages 4205 file_uploader = DefaultFileUploader( 4206 requester=requester, 4207 download_target_extractor=download_target_extractor, 4208 config=config, 4209 file_writer=NoopFileWriter() 4210 if emit_connector_builder_messages 4211 else LocalFileSystemFileWriter(), 4212 parameters=model.parameters or {}, 4213 filename_extractor=model.filename_extractor if model.filename_extractor else None, 4214 ) 4215 4216 return ( 4217 ConnectorBuilderFileUploader(file_uploader) 4218 if emit_connector_builder_messages 4219 else file_uploader 4220 )
4222 def create_moving_window_call_rate_policy( 4223 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 4224 ) -> MovingWindowCallRatePolicy: 4225 rates = [ 4226 self._create_component_from_model(model=rate, config=config) for rate in model.rates 4227 ] 4228 matchers = [ 4229 self._create_component_from_model(model=matcher, config=config) 4230 for matcher in model.matchers 4231 ] 4232 return MovingWindowCallRatePolicy( 4233 rates=rates, 4234 matchers=matchers, 4235 )
4237 def create_unlimited_call_rate_policy( 4238 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 4239 ) -> UnlimitedCallRatePolicy: 4240 matchers = [ 4241 self._create_component_from_model(model=matcher, config=config) 4242 for matcher in model.matchers 4243 ] 4244 4245 return UnlimitedCallRatePolicy( 4246 matchers=matchers, 4247 )
4256 def create_http_request_matcher( 4257 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 4258 ) -> HttpRequestRegexMatcher: 4259 return HttpRequestRegexMatcher( 4260 method=model.method, 4261 url_base=model.url_base, 4262 url_path_pattern=model.url_path_pattern, 4263 params=model.params, 4264 headers=model.headers, 4265 )
4272 def create_grouping_partition_router( 4273 self, 4274 model: GroupingPartitionRouterModel, 4275 config: Config, 4276 *, 4277 stream_name: str, 4278 **kwargs: Any, 4279 ) -> GroupingPartitionRouter: 4280 underlying_router = self._create_component_from_model( 4281 model=model.underlying_partition_router, 4282 config=config, 4283 stream_name=stream_name, 4284 **kwargs, 4285 ) 4286 if model.group_size < 1: 4287 raise ValueError(f"Group size must be greater than 0, got {model.group_size}") 4288 4289 # Request options in underlying partition routers are not supported for GroupingPartitionRouter 4290 # because they are specific to individual partitions and cannot be aggregated or handled 4291 # when grouping, potentially leading to incorrect API calls. Any request customization 4292 # should be managed at the stream level through the requester's configuration. 4293 if isinstance(underlying_router, SubstreamPartitionRouter): 4294 if any( 4295 parent_config.request_option 4296 for parent_config in underlying_router.parent_stream_configs 4297 ): 4298 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4299 4300 if isinstance(underlying_router, ListPartitionRouter): 4301 if underlying_router.request_option: 4302 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4303 4304 return GroupingPartitionRouter( 4305 group_size=model.group_size, 4306 underlying_partition_router=underlying_router, 4307 deduplicate=model.deduplicate if model.deduplicate is not None else True, 4308 config=config, 4309 )