airbyte_cdk.sources.declarative.parsers.model_to_component_factory
1# 2# Copyright (c) 2025 Airbyte, Inc., all rights reserved. 3# 4 5from __future__ import annotations 6 7import datetime 8import importlib 9import inspect 10import logging 11import re 12from functools import partial 13from typing import ( 14 Any, 15 Callable, 16 Dict, 17 List, 18 Mapping, 19 MutableMapping, 20 Optional, 21 Tuple, 22 Type, 23 Union, 24 cast, 25 get_args, 26 get_origin, 27 get_type_hints, 28) 29 30from airbyte_protocol_dataclasses.models import ConfiguredAirbyteStream 31from isodate import parse_duration 32from pydantic.v1 import BaseModel 33from requests import Response 34 35from airbyte_cdk.connector_builder.models import ( 36 LogMessage as ConnectorBuilderLogMessage, 37) 38from airbyte_cdk.models import ( 39 AirbyteStateBlob, 40 AirbyteStateMessage, 41 AirbyteStateType, 42 AirbyteStreamState, 43 ConfiguredAirbyteCatalog, 44 FailureType, 45 Level, 46 StreamDescriptor, 47) 48from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager 49from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator 50from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker 51from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository 52from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus 53from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator 54from airbyte_cdk.sources.declarative.auth.declarative_authenticator import ( 55 DeclarativeAuthenticator, 56 NoAuth, 57) 58from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm 59from airbyte_cdk.sources.declarative.auth.oauth import ( 60 DeclarativeSingleUseRefreshTokenOauth2Authenticator, 61) 62from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator 63from airbyte_cdk.sources.declarative.auth.token import ( 64 ApiKeyAuthenticator, 65 BasicHttpAuthenticator, 66 BearerAuthenticator, 67 LegacySessionTokenAuthenticator, 68) 69from airbyte_cdk.sources.declarative.auth.token_provider import ( 70 InterpolatedSessionTokenProvider, 71 InterpolatedStringTokenProvider, 72 SessionTokenProvider, 73 TokenProvider, 74) 75from airbyte_cdk.sources.declarative.checks import ( 76 CheckDynamicStream, 77 CheckStream, 78 DynamicStreamCheckConfig, 79) 80from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel 81from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime 82from airbyte_cdk.sources.declarative.decoders import ( 83 Decoder, 84 IterableDecoder, 85 JsonDecoder, 86 PaginationDecoderDecorator, 87 XmlDecoder, 88 ZipfileDecoder, 89) 90from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import ( 91 CompositeRawDecoder, 92 CsvParser, 93 GzipParser, 94 JsonLineParser, 95 JsonParser, 96 Parser, 97) 98from airbyte_cdk.sources.declarative.extractors import ( 99 DpathExtractor, 100 RecordFilter, 101 RecordSelector, 102 ResponseToFileExtractor, 103) 104from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor 105from airbyte_cdk.sources.declarative.extractors.record_filter import ( 106 ClientSideIncrementalRecordFilterDecorator, 107) 108from airbyte_cdk.sources.declarative.incremental import ( 109 ConcurrentCursorFactory, 110 ConcurrentPerPartitionCursor, 111) 112from airbyte_cdk.sources.declarative.interpolation import InterpolatedString 113from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping 114from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import ( 115 LegacyToPerPartitionStateMigration, 116) 117from airbyte_cdk.sources.declarative.models import ( 118 CustomStateMigration, 119 PaginationResetLimits, 120) 121from airbyte_cdk.sources.declarative.models.base_model_with_deprecations import ( 122 DEPRECATION_LOGS_TAG, 123 BaseModelWithDeprecations, 124) 125from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 126 Action1 as PaginationResetActionModel, 127) 128from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 129 AddedFieldDefinition as AddedFieldDefinitionModel, 130) 131from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 132 AddFields as AddFieldsModel, 133) 134from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 135 ApiKeyAuthenticator as ApiKeyAuthenticatorModel, 136) 137from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 138 AsyncJobStatusMap as AsyncJobStatusMapModel, 139) 140from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 141 AsyncRetriever as AsyncRetrieverModel, 142) 143from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 144 BasicHttpAuthenticator as BasicHttpAuthenticatorModel, 145) 146from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 147 BearerAuthenticator as BearerAuthenticatorModel, 148) 149from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 150 CheckDynamicStream as CheckDynamicStreamModel, 151) 152from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 153 CheckStream as CheckStreamModel, 154) 155from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 156 ComplexFieldType as ComplexFieldTypeModel, 157) 158from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 159 ComponentMappingDefinition as ComponentMappingDefinitionModel, 160) 161from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 162 CompositeErrorHandler as CompositeErrorHandlerModel, 163) 164from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 165 ConcurrencyLevel as ConcurrencyLevelModel, 166) 167from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 168 ConfigAddFields as ConfigAddFieldsModel, 169) 170from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 171 ConfigComponentsResolver as ConfigComponentsResolverModel, 172) 173from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 174 ConfigMigration as ConfigMigrationModel, 175) 176from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 177 ConfigRemapField as ConfigRemapFieldModel, 178) 179from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 180 ConfigRemoveFields as ConfigRemoveFieldsModel, 181) 182from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 183 ConstantBackoffStrategy as ConstantBackoffStrategyModel, 184) 185from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 186 CsvDecoder as CsvDecoderModel, 187) 188from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 189 CursorPagination as CursorPaginationModel, 190) 191from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 192 CustomAuthenticator as CustomAuthenticatorModel, 193) 194from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 195 CustomBackoffStrategy as CustomBackoffStrategyModel, 196) 197from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 198 CustomConfigTransformation as CustomConfigTransformationModel, 199) 200from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 201 CustomDecoder as CustomDecoderModel, 202) 203from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 204 CustomErrorHandler as CustomErrorHandlerModel, 205) 206from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 207 CustomPaginationStrategy as CustomPaginationStrategyModel, 208) 209from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 210 CustomPartitionRouter as CustomPartitionRouterModel, 211) 212from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 213 CustomRecordExtractor as CustomRecordExtractorModel, 214) 215from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 216 CustomRecordFilter as CustomRecordFilterModel, 217) 218from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 219 CustomRequester as CustomRequesterModel, 220) 221from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 222 CustomRetriever as CustomRetrieverModel, 223) 224from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 225 CustomSchemaLoader as CustomSchemaLoader, 226) 227from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 228 CustomSchemaNormalization as CustomSchemaNormalizationModel, 229) 230from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 231 CustomTransformation as CustomTransformationModel, 232) 233from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 234 CustomValidationStrategy as CustomValidationStrategyModel, 235) 236from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 237 DatetimeBasedCursor as DatetimeBasedCursorModel, 238) 239from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 240 DeclarativeStream as DeclarativeStreamModel, 241) 242from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 243 DefaultErrorHandler as DefaultErrorHandlerModel, 244) 245from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 246 DefaultPaginator as DefaultPaginatorModel, 247) 248from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 249 DpathExtractor as DpathExtractorModel, 250) 251from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 252 DpathFlattenFields as DpathFlattenFieldsModel, 253) 254from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 255 DpathValidator as DpathValidatorModel, 256) 257from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 258 DynamicSchemaLoader as DynamicSchemaLoaderModel, 259) 260from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 261 DynamicStreamCheckConfig as DynamicStreamCheckConfigModel, 262) 263from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 264 ExponentialBackoffStrategy as ExponentialBackoffStrategyModel, 265) 266from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 267 FileUploader as FileUploaderModel, 268) 269from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 270 FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel, 271) 272from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 273 FlattenFields as FlattenFieldsModel, 274) 275from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 276 GroupByKeyMergeStrategy as GroupByKeyMergeStrategyModel, 277) 278from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 279 GroupingPartitionRouter as GroupingPartitionRouterModel, 280) 281from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 282 GzipDecoder as GzipDecoderModel, 283) 284from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 285 HTTPAPIBudget as HTTPAPIBudgetModel, 286) 287from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 288 HttpComponentsResolver as HttpComponentsResolverModel, 289) 290from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 291 HttpRequester as HttpRequesterModel, 292) 293from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 294 HttpRequestRegexMatcher as HttpRequestRegexMatcherModel, 295) 296from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 297 HttpResponseFilter as HttpResponseFilterModel, 298) 299from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 300 IncrementingCountCursor as IncrementingCountCursorModel, 301) 302from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 303 InlineSchemaLoader as InlineSchemaLoaderModel, 304) 305from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 306 IterableDecoder as IterableDecoderModel, 307) 308from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 309 JsonDecoder as JsonDecoderModel, 310) 311from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 312 JsonFileSchemaLoader as JsonFileSchemaLoaderModel, 313) 314from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 315 JsonlDecoder as JsonlDecoderModel, 316) 317from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 318 JsonSchemaPropertySelector as JsonSchemaPropertySelectorModel, 319) 320from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 321 JwtAuthenticator as JwtAuthenticatorModel, 322) 323from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 324 JwtHeaders as JwtHeadersModel, 325) 326from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 327 JwtPayload as JwtPayloadModel, 328) 329from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 330 KeysReplace as KeysReplaceModel, 331) 332from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 333 KeysToLower as KeysToLowerModel, 334) 335from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 336 KeysToSnakeCase as KeysToSnakeCaseModel, 337) 338from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 339 LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel, 340) 341from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 342 LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel, 343) 344from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 345 ListPartitionRouter as ListPartitionRouterModel, 346) 347from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 348 MinMaxDatetime as MinMaxDatetimeModel, 349) 350from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 351 MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel, 352) 353from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 354 NoAuth as NoAuthModel, 355) 356from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 357 NoPagination as NoPaginationModel, 358) 359from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 360 OAuthAuthenticator as OAuthAuthenticatorModel, 361) 362from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 363 OffsetIncrement as OffsetIncrementModel, 364) 365from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 366 PageIncrement as PageIncrementModel, 367) 368from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 369 PaginationReset as PaginationResetModel, 370) 371from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 372 ParametrizedComponentsResolver as ParametrizedComponentsResolverModel, 373) 374from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 375 ParentStreamConfig as ParentStreamConfigModel, 376) 377from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 378 PredicateValidator as PredicateValidatorModel, 379) 380from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 381 PropertiesFromEndpoint as PropertiesFromEndpointModel, 382) 383from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 384 PropertyChunking as PropertyChunkingModel, 385) 386from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 387 PropertyLimitType as PropertyLimitTypeModel, 388) 389from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 390 QueryProperties as QueryPropertiesModel, 391) 392from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 393 Rate as RateModel, 394) 395from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 396 RecordFilter as RecordFilterModel, 397) 398from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 399 RecordSelector as RecordSelectorModel, 400) 401from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 402 RefreshTokenUpdater as RefreshTokenUpdaterModel, 403) 404from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 405 RemoveFields as RemoveFieldsModel, 406) 407from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 408 RequestOption as RequestOptionModel, 409) 410from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 411 RequestPath as RequestPathModel, 412) 413from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 414 ResponseToFileExtractor as ResponseToFileExtractorModel, 415) 416from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 417 SchemaNormalization as SchemaNormalizationModel, 418) 419from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 420 SchemaTypeIdentifier as SchemaTypeIdentifierModel, 421) 422from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 423 SelectiveAuthenticator as SelectiveAuthenticatorModel, 424) 425from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 426 SessionTokenAuthenticator as SessionTokenAuthenticatorModel, 427) 428from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 429 SimpleRetriever as SimpleRetrieverModel, 430) 431from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel 432from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 433 StateDelegatingStream as StateDelegatingStreamModel, 434) 435from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 436 StreamConfig as StreamConfigModel, 437) 438from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 439 SubstreamPartitionRouter as SubstreamPartitionRouterModel, 440) 441from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 442 TypesMap as TypesMapModel, 443) 444from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 445 UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel, 446) 447from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 448 ValidateAdheresToSchema as ValidateAdheresToSchemaModel, 449) 450from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType 451from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 452 WaitTimeFromHeader as WaitTimeFromHeaderModel, 453) 454from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 455 WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel, 456) 457from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 458 XmlDecoder as XmlDecoderModel, 459) 460from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 461 ZipfileDecoder as ZipfileDecoderModel, 462) 463from airbyte_cdk.sources.declarative.partition_routers import ( 464 CartesianProductStreamSlicer, 465 GroupingPartitionRouter, 466 ListPartitionRouter, 467 PartitionRouter, 468 SinglePartitionRouter, 469 SubstreamPartitionRouter, 470) 471from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import ( 472 AsyncJobPartitionRouter, 473) 474from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ( 475 ParentStreamConfig, 476) 477from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption 478from airbyte_cdk.sources.declarative.requesters.error_handlers import ( 479 CompositeErrorHandler, 480 DefaultErrorHandler, 481 HttpResponseFilter, 482) 483from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import ( 484 ConstantBackoffStrategy, 485 ExponentialBackoffStrategy, 486 WaitTimeFromHeaderBackoffStrategy, 487 WaitUntilTimeFromHeaderBackoffStrategy, 488) 489from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository 490from airbyte_cdk.sources.declarative.requesters.paginators import ( 491 DefaultPaginator, 492 NoPagination, 493 PaginatorTestReadDecorator, 494) 495from airbyte_cdk.sources.declarative.requesters.paginators.strategies import ( 496 CursorPaginationStrategy, 497 CursorStopCondition, 498 OffsetIncrement, 499 PageIncrement, 500 StopConditionPaginationStrategyDecorator, 501) 502from airbyte_cdk.sources.declarative.requesters.query_properties import ( 503 PropertiesFromEndpoint, 504 PropertyChunking, 505 QueryProperties, 506) 507from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import ( 508 PropertyLimitType, 509) 510from airbyte_cdk.sources.declarative.requesters.query_properties.property_selector import ( 511 JsonSchemaPropertySelector, 512) 513from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import ( 514 GroupByKey, 515) 516from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType 517from airbyte_cdk.sources.declarative.requesters.request_options import ( 518 DatetimeBasedRequestOptionsProvider, 519 DefaultRequestOptionsProvider, 520 InterpolatedRequestOptionsProvider, 521 RequestOptionsProvider, 522) 523from airbyte_cdk.sources.declarative.requesters.request_options.per_partition_request_option_provider import ( 524 PerPartitionRequestOptionsProvider, 525) 526from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath 527from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester 528from airbyte_cdk.sources.declarative.resolvers import ( 529 ComponentMappingDefinition, 530 ConfigComponentsResolver, 531 HttpComponentsResolver, 532 ParametrizedComponentsResolver, 533 StreamConfig, 534 StreamParametersDefinition, 535) 536from airbyte_cdk.sources.declarative.retrievers import ( 537 AsyncRetriever, 538 LazySimpleRetriever, 539 SimpleRetriever, 540) 541from airbyte_cdk.sources.declarative.retrievers.file_uploader import ( 542 ConnectorBuilderFileUploader, 543 DefaultFileUploader, 544 FileUploader, 545 LocalFileSystemFileWriter, 546 NoopFileWriter, 547) 548from airbyte_cdk.sources.declarative.retrievers.pagination_tracker import PaginationTracker 549from airbyte_cdk.sources.declarative.schema import ( 550 ComplexFieldType, 551 DefaultSchemaLoader, 552 DynamicSchemaLoader, 553 InlineSchemaLoader, 554 JsonFileSchemaLoader, 555 SchemaLoader, 556 SchemaTypeIdentifier, 557 TypesMap, 558) 559from airbyte_cdk.sources.declarative.schema.caching_schema_loader_decorator import ( 560 CachingSchemaLoaderDecorator, 561) 562from airbyte_cdk.sources.declarative.schema.composite_schema_loader import CompositeSchemaLoader 563from airbyte_cdk.sources.declarative.spec import ConfigMigration, Spec 564from airbyte_cdk.sources.declarative.stream_slicers import ( 565 StreamSlicer, 566 StreamSlicerTestReadDecorator, 567) 568from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import ( 569 DeclarativePartitionFactory, 570 StreamSlicerPartitionGenerator, 571) 572from airbyte_cdk.sources.declarative.transformations import ( 573 AddFields, 574 RecordTransformation, 575 RemoveFields, 576) 577from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition 578from airbyte_cdk.sources.declarative.transformations.config_transformations import ( 579 ConfigAddFields, 580 ConfigRemapField, 581 ConfigRemoveFields, 582) 583from airbyte_cdk.sources.declarative.transformations.config_transformations.config_transformation import ( 584 ConfigTransformation, 585) 586from airbyte_cdk.sources.declarative.transformations.dpath_flatten_fields import ( 587 DpathFlattenFields, 588 KeyTransformation, 589) 590from airbyte_cdk.sources.declarative.transformations.flatten_fields import ( 591 FlattenFields, 592) 593from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import ( 594 KeysReplaceTransformation, 595) 596from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import ( 597 KeysToLowerTransformation, 598) 599from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import ( 600 KeysToSnakeCaseTransformation, 601) 602from airbyte_cdk.sources.declarative.validators import ( 603 DpathValidator, 604 PredicateValidator, 605 ValidateAdheresToSchema, 606) 607from airbyte_cdk.sources.http_logger import format_http_message 608from airbyte_cdk.sources.message import ( 609 InMemoryMessageRepository, 610 LogAppenderMessageRepositoryDecorator, 611 MessageRepository, 612 NoopMessageRepository, 613) 614from airbyte_cdk.sources.message.repository import StateFilteringMessageRepository 615from airbyte_cdk.sources.streams.call_rate import ( 616 APIBudget, 617 FixedWindowCallRatePolicy, 618 HttpAPIBudget, 619 HttpRequestRegexMatcher, 620 MovingWindowCallRatePolicy, 621 Rate, 622 UnlimitedCallRatePolicy, 623) 624from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream 625from airbyte_cdk.sources.streams.concurrent.clamping import ( 626 ClampingEndProvider, 627 ClampingStrategy, 628 DayClampingStrategy, 629 MonthClampingStrategy, 630 NoClamping, 631 WeekClampingStrategy, 632 Weekday, 633) 634from airbyte_cdk.sources.streams.concurrent.cursor import ( 635 ConcurrentCursor, 636 Cursor, 637 CursorField, 638 FinalStateCursor, 639) 640from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream 641from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream 642from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import ( 643 StreamSlicer as ConcurrentStreamSlicer, 644) 645from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import ( 646 CustomFormatConcurrentStreamStateConverter, 647 DateTimeStreamStateConverter, 648) 649from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import ( 650 IncrementingCountStreamStateConverter, 651) 652from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction 653from airbyte_cdk.sources.types import Config 654from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer 655 656ComponentDefinition = Mapping[str, Any] 657 658SCHEMA_TRANSFORMER_TYPE_MAPPING = { 659 SchemaNormalizationModel.None_: TransformConfig.NoTransform, 660 SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization, 661} 662_NO_STREAM_SLICING = SinglePartitionRouter(parameters={}) 663 664# Ideally this should use the value defined in ConcurrentDeclarativeSource, but 665# this would be a circular import 666MAX_SLICES = 5 667 668LOGGER = logging.getLogger(f"airbyte.model_to_component_factory") 669 670 671class ModelToComponentFactory: 672 EPOCH_DATETIME_FORMAT = "%s" 673 674 def __init__( 675 self, 676 limit_pages_fetched_per_slice: Optional[int] = None, 677 limit_slices_fetched: Optional[int] = None, 678 emit_connector_builder_messages: bool = False, 679 disable_retries: bool = False, 680 disable_cache: bool = False, 681 message_repository: Optional[MessageRepository] = None, 682 connector_state_manager: Optional[ConnectorStateManager] = None, 683 max_concurrent_async_job_count: Optional[int] = None, 684 configured_catalog: Optional[ConfiguredAirbyteCatalog] = None, 685 api_budget: Optional[APIBudget] = None, 686 ): 687 self._init_mappings() 688 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 689 self._limit_slices_fetched = limit_slices_fetched 690 self._emit_connector_builder_messages = emit_connector_builder_messages 691 self._disable_retries = disable_retries 692 self._disable_cache = disable_cache 693 self._message_repository = message_repository or InMemoryMessageRepository( 694 self._evaluate_log_level(emit_connector_builder_messages) 695 ) 696 self._stream_name_to_configured_stream = self._create_stream_name_to_configured_stream( 697 configured_catalog 698 ) 699 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 700 self._api_budget: Optional[Union[APIBudget]] = api_budget 701 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 702 # placeholder for deprecation warnings 703 self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = [] 704 705 def _init_mappings(self) -> None: 706 self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = { 707 AddedFieldDefinitionModel: self.create_added_field_definition, 708 AddFieldsModel: self.create_add_fields, 709 ApiKeyAuthenticatorModel: self.create_api_key_authenticator, 710 BasicHttpAuthenticatorModel: self.create_basic_http_authenticator, 711 BearerAuthenticatorModel: self.create_bearer_authenticator, 712 CheckStreamModel: self.create_check_stream, 713 DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config, 714 CheckDynamicStreamModel: self.create_check_dynamic_stream, 715 CompositeErrorHandlerModel: self.create_composite_error_handler, 716 ConcurrencyLevelModel: self.create_concurrency_level, 717 ConfigMigrationModel: self.create_config_migration, 718 ConfigAddFieldsModel: self.create_config_add_fields, 719 ConfigRemapFieldModel: self.create_config_remap_field, 720 ConfigRemoveFieldsModel: self.create_config_remove_fields, 721 ConstantBackoffStrategyModel: self.create_constant_backoff_strategy, 722 CsvDecoderModel: self.create_csv_decoder, 723 CursorPaginationModel: self.create_cursor_pagination, 724 CustomAuthenticatorModel: self.create_custom_component, 725 CustomBackoffStrategyModel: self.create_custom_component, 726 CustomDecoderModel: self.create_custom_component, 727 CustomErrorHandlerModel: self.create_custom_component, 728 CustomRecordExtractorModel: self.create_custom_component, 729 CustomRecordFilterModel: self.create_custom_component, 730 CustomRequesterModel: self.create_custom_component, 731 CustomRetrieverModel: self.create_custom_component, 732 CustomSchemaLoader: self.create_custom_component, 733 CustomSchemaNormalizationModel: self.create_custom_component, 734 CustomStateMigration: self.create_custom_component, 735 CustomPaginationStrategyModel: self.create_custom_component, 736 CustomPartitionRouterModel: self.create_custom_component, 737 CustomTransformationModel: self.create_custom_component, 738 CustomValidationStrategyModel: self.create_custom_component, 739 CustomConfigTransformationModel: self.create_custom_component, 740 DeclarativeStreamModel: self.create_default_stream, 741 DefaultErrorHandlerModel: self.create_default_error_handler, 742 DefaultPaginatorModel: self.create_default_paginator, 743 DpathExtractorModel: self.create_dpath_extractor, 744 DpathValidatorModel: self.create_dpath_validator, 745 ResponseToFileExtractorModel: self.create_response_to_file_extractor, 746 ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy, 747 SessionTokenAuthenticatorModel: self.create_session_token_authenticator, 748 GroupByKeyMergeStrategyModel: self.create_group_by_key, 749 HttpRequesterModel: self.create_http_requester, 750 HttpResponseFilterModel: self.create_http_response_filter, 751 InlineSchemaLoaderModel: self.create_inline_schema_loader, 752 JsonDecoderModel: self.create_json_decoder, 753 JsonlDecoderModel: self.create_jsonl_decoder, 754 JsonSchemaPropertySelectorModel: self.create_json_schema_property_selector, 755 GzipDecoderModel: self.create_gzip_decoder, 756 KeysToLowerModel: self.create_keys_to_lower_transformation, 757 KeysToSnakeCaseModel: self.create_keys_to_snake_transformation, 758 KeysReplaceModel: self.create_keys_replace_transformation, 759 FlattenFieldsModel: self.create_flatten_fields, 760 DpathFlattenFieldsModel: self.create_dpath_flatten_fields, 761 IterableDecoderModel: self.create_iterable_decoder, 762 XmlDecoderModel: self.create_xml_decoder, 763 JsonFileSchemaLoaderModel: self.create_json_file_schema_loader, 764 DynamicSchemaLoaderModel: self.create_dynamic_schema_loader, 765 SchemaTypeIdentifierModel: self.create_schema_type_identifier, 766 TypesMapModel: self.create_types_map, 767 ComplexFieldTypeModel: self.create_complex_field_type, 768 JwtAuthenticatorModel: self.create_jwt_authenticator, 769 LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration, 770 ListPartitionRouterModel: self.create_list_partition_router, 771 MinMaxDatetimeModel: self.create_min_max_datetime, 772 NoAuthModel: self.create_no_auth, 773 NoPaginationModel: self.create_no_pagination, 774 OAuthAuthenticatorModel: self.create_oauth_authenticator, 775 OffsetIncrementModel: self.create_offset_increment, 776 PageIncrementModel: self.create_page_increment, 777 ParentStreamConfigModel: self.create_parent_stream_config_with_substream_wrapper, 778 PredicateValidatorModel: self.create_predicate_validator, 779 PropertiesFromEndpointModel: self.create_properties_from_endpoint, 780 PropertyChunkingModel: self.create_property_chunking, 781 QueryPropertiesModel: self.create_query_properties, 782 RecordFilterModel: self.create_record_filter, 783 RecordSelectorModel: self.create_record_selector, 784 RemoveFieldsModel: self.create_remove_fields, 785 RequestPathModel: self.create_request_path, 786 RequestOptionModel: self.create_request_option, 787 LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator, 788 SelectiveAuthenticatorModel: self.create_selective_authenticator, 789 SimpleRetrieverModel: self.create_simple_retriever, 790 StateDelegatingStreamModel: self.create_state_delegating_stream, 791 SpecModel: self.create_spec, 792 SubstreamPartitionRouterModel: self.create_substream_partition_router, 793 ValidateAdheresToSchemaModel: self.create_validate_adheres_to_schema, 794 WaitTimeFromHeaderModel: self.create_wait_time_from_header, 795 WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header, 796 AsyncRetrieverModel: self.create_async_retriever, 797 HttpComponentsResolverModel: self.create_http_components_resolver, 798 ConfigComponentsResolverModel: self.create_config_components_resolver, 799 ParametrizedComponentsResolverModel: self.create_parametrized_components_resolver, 800 StreamConfigModel: self.create_stream_config, 801 ComponentMappingDefinitionModel: self.create_components_mapping_definition, 802 ZipfileDecoderModel: self.create_zipfile_decoder, 803 HTTPAPIBudgetModel: self.create_http_api_budget, 804 FileUploaderModel: self.create_file_uploader, 805 FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy, 806 MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy, 807 UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy, 808 RateModel: self.create_rate, 809 HttpRequestRegexMatcherModel: self.create_http_request_matcher, 810 GroupingPartitionRouterModel: self.create_grouping_partition_router, 811 } 812 813 # Needed for the case where we need to perform a second parse on the fields of a custom component 814 self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR} 815 816 @staticmethod 817 def _create_stream_name_to_configured_stream( 818 configured_catalog: Optional[ConfiguredAirbyteCatalog], 819 ) -> Mapping[str, ConfiguredAirbyteStream]: 820 return ( 821 {stream.stream.name: stream for stream in configured_catalog.streams} 822 if configured_catalog 823 else {} 824 ) 825 826 def create_component( 827 self, 828 model_type: Type[BaseModel], 829 component_definition: ComponentDefinition, 830 config: Config, 831 **kwargs: Any, 832 ) -> Any: 833 """ 834 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 835 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 836 creating declarative components from that model. 837 838 :param model_type: The type of declarative component that is being initialized 839 :param component_definition: The mapping that represents a declarative component 840 :param config: The connector config that is provided by the customer 841 :return: The declarative component to be used at runtime 842 """ 843 844 component_type = component_definition.get("type") 845 if component_definition.get("type") != model_type.__name__: 846 raise ValueError( 847 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 848 ) 849 850 declarative_component_model = model_type.parse_obj(component_definition) 851 852 if not isinstance(declarative_component_model, model_type): 853 raise ValueError( 854 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 855 ) 856 857 return self._create_component_from_model( 858 model=declarative_component_model, config=config, **kwargs 859 ) 860 861 def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: 862 if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: 863 raise ValueError( 864 f"{model.__class__} with attributes {model} is not a valid component type" 865 ) 866 component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) 867 if not component_constructor: 868 raise ValueError(f"Could not find constructor for {model.__class__}") 869 870 # collect deprecation warnings for supported models. 871 if isinstance(model, BaseModelWithDeprecations): 872 self._collect_model_deprecations(model) 873 874 return component_constructor(model=model, config=config, **kwargs) 875 876 def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]: 877 """ 878 Returns the deprecation warnings that were collected during the creation of components. 879 """ 880 return self._collected_deprecation_logs 881 882 def _collect_model_deprecations(self, model: BaseModelWithDeprecations) -> None: 883 """ 884 Collects deprecation logs from the given model and appends any new logs to the internal collection. 885 886 This method checks if the provided model has deprecation logs (identified by the presence of the DEPRECATION_LOGS_TAG attribute and a non-None `_deprecation_logs` property). It iterates through each deprecation log in the model and appends it to the `_collected_deprecation_logs` list if it has not already been collected, ensuring that duplicate logs are avoided. 887 888 Args: 889 model (BaseModelWithDeprecations): The model instance from which to collect deprecation logs. 890 """ 891 if hasattr(model, DEPRECATION_LOGS_TAG) and model._deprecation_logs is not None: 892 for log in model._deprecation_logs: 893 # avoid duplicates for deprecation logs observed. 894 if log not in self._collected_deprecation_logs: 895 self._collected_deprecation_logs.append(log) 896 897 def create_config_migration( 898 self, model: ConfigMigrationModel, config: Config 899 ) -> ConfigMigration: 900 transformations: List[ConfigTransformation] = [ 901 self._create_component_from_model(transformation, config) 902 for transformation in model.transformations 903 ] 904 905 return ConfigMigration( 906 description=model.description, 907 transformations=transformations, 908 ) 909 910 def create_config_add_fields( 911 self, model: ConfigAddFieldsModel, config: Config, **kwargs: Any 912 ) -> ConfigAddFields: 913 fields = [self._create_component_from_model(field, config) for field in model.fields] 914 return ConfigAddFields( 915 fields=fields, 916 condition=model.condition or "", 917 ) 918 919 @staticmethod 920 def create_config_remove_fields( 921 model: ConfigRemoveFieldsModel, config: Config, **kwargs: Any 922 ) -> ConfigRemoveFields: 923 return ConfigRemoveFields( 924 field_pointers=model.field_pointers, 925 condition=model.condition or "", 926 ) 927 928 @staticmethod 929 def create_config_remap_field( 930 model: ConfigRemapFieldModel, config: Config, **kwargs: Any 931 ) -> ConfigRemapField: 932 mapping = cast(Mapping[str, Any], model.map) 933 return ConfigRemapField( 934 map=mapping, 935 field_path=model.field_path, 936 config=config, 937 ) 938 939 def create_dpath_validator(self, model: DpathValidatorModel, config: Config) -> DpathValidator: 940 strategy = self._create_component_from_model(model.validation_strategy, config) 941 942 return DpathValidator( 943 field_path=model.field_path, 944 strategy=strategy, 945 ) 946 947 def create_predicate_validator( 948 self, model: PredicateValidatorModel, config: Config 949 ) -> PredicateValidator: 950 strategy = self._create_component_from_model(model.validation_strategy, config) 951 952 return PredicateValidator( 953 value=model.value, 954 strategy=strategy, 955 ) 956 957 @staticmethod 958 def create_validate_adheres_to_schema( 959 model: ValidateAdheresToSchemaModel, config: Config, **kwargs: Any 960 ) -> ValidateAdheresToSchema: 961 base_schema = cast(Mapping[str, Any], model.base_schema) 962 return ValidateAdheresToSchema( 963 schema=base_schema, 964 ) 965 966 @staticmethod 967 def create_added_field_definition( 968 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 969 ) -> AddedFieldDefinition: 970 interpolated_value = InterpolatedString.create( 971 model.value, parameters=model.parameters or {} 972 ) 973 return AddedFieldDefinition( 974 path=model.path, 975 value=interpolated_value, 976 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 977 parameters=model.parameters or {}, 978 ) 979 980 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 981 added_field_definitions = [ 982 self._create_component_from_model( 983 model=added_field_definition_model, 984 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 985 added_field_definition_model.value_type 986 ), 987 config=config, 988 ) 989 for added_field_definition_model in model.fields 990 ] 991 return AddFields( 992 fields=added_field_definitions, 993 condition=model.condition or "", 994 parameters=model.parameters or {}, 995 ) 996 997 def create_keys_to_lower_transformation( 998 self, model: KeysToLowerModel, config: Config, **kwargs: Any 999 ) -> KeysToLowerTransformation: 1000 return KeysToLowerTransformation() 1001 1002 def create_keys_to_snake_transformation( 1003 self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any 1004 ) -> KeysToSnakeCaseTransformation: 1005 return KeysToSnakeCaseTransformation() 1006 1007 def create_keys_replace_transformation( 1008 self, model: KeysReplaceModel, config: Config, **kwargs: Any 1009 ) -> KeysReplaceTransformation: 1010 return KeysReplaceTransformation( 1011 old=model.old, new=model.new, parameters=model.parameters or {} 1012 ) 1013 1014 def create_flatten_fields( 1015 self, model: FlattenFieldsModel, config: Config, **kwargs: Any 1016 ) -> FlattenFields: 1017 return FlattenFields( 1018 flatten_lists=model.flatten_lists if model.flatten_lists is not None else True 1019 ) 1020 1021 def create_dpath_flatten_fields( 1022 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 1023 ) -> DpathFlattenFields: 1024 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 1025 key_transformation = ( 1026 KeyTransformation( 1027 config=config, 1028 prefix=model.key_transformation.prefix, 1029 suffix=model.key_transformation.suffix, 1030 parameters=model.parameters or {}, 1031 ) 1032 if model.key_transformation is not None 1033 else None 1034 ) 1035 return DpathFlattenFields( 1036 config=config, 1037 field_path=model_field_path, 1038 delete_origin_value=model.delete_origin_value 1039 if model.delete_origin_value is not None 1040 else False, 1041 replace_record=model.replace_record if model.replace_record is not None else False, 1042 key_transformation=key_transformation, 1043 parameters=model.parameters or {}, 1044 ) 1045 1046 @staticmethod 1047 def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: 1048 if not value_type: 1049 return None 1050 names_to_types = { 1051 ValueType.string: str, 1052 ValueType.number: float, 1053 ValueType.integer: int, 1054 ValueType.boolean: bool, 1055 } 1056 return names_to_types[value_type] 1057 1058 def create_api_key_authenticator( 1059 self, 1060 model: ApiKeyAuthenticatorModel, 1061 config: Config, 1062 token_provider: Optional[TokenProvider] = None, 1063 **kwargs: Any, 1064 ) -> ApiKeyAuthenticator: 1065 if model.inject_into is None and model.header is None: 1066 raise ValueError( 1067 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 1068 ) 1069 1070 if model.inject_into is not None and model.header is not None: 1071 raise ValueError( 1072 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 1073 ) 1074 1075 if token_provider is not None and model.api_token != "": 1076 raise ValueError( 1077 "If token_provider is set, api_token is ignored and has to be set to empty string." 1078 ) 1079 1080 request_option = ( 1081 self._create_component_from_model( 1082 model.inject_into, config, parameters=model.parameters or {} 1083 ) 1084 if model.inject_into 1085 else RequestOption( 1086 inject_into=RequestOptionType.header, 1087 field_name=model.header or "", 1088 parameters=model.parameters or {}, 1089 ) 1090 ) 1091 1092 return ApiKeyAuthenticator( 1093 token_provider=( 1094 token_provider 1095 if token_provider is not None 1096 else InterpolatedStringTokenProvider( 1097 api_token=model.api_token or "", 1098 config=config, 1099 parameters=model.parameters or {}, 1100 ) 1101 ), 1102 request_option=request_option, 1103 config=config, 1104 parameters=model.parameters or {}, 1105 ) 1106 1107 def create_legacy_to_per_partition_state_migration( 1108 self, 1109 model: LegacyToPerPartitionStateMigrationModel, 1110 config: Mapping[str, Any], 1111 declarative_stream: DeclarativeStreamModel, 1112 ) -> LegacyToPerPartitionStateMigration: 1113 retriever = declarative_stream.retriever 1114 if not isinstance(retriever, (SimpleRetrieverModel, AsyncRetrieverModel)): 1115 raise ValueError( 1116 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever or AsyncRetriever. Got {type(retriever)}" 1117 ) 1118 partition_router = retriever.partition_router 1119 if not isinstance( 1120 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 1121 ): 1122 raise ValueError( 1123 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 1124 ) 1125 if not hasattr(partition_router, "parent_stream_configs"): 1126 raise ValueError( 1127 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 1128 ) 1129 1130 if not hasattr(declarative_stream, "incremental_sync"): 1131 raise ValueError( 1132 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 1133 ) 1134 1135 return LegacyToPerPartitionStateMigration( 1136 partition_router, # type: ignore # was already checked above 1137 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 1138 config, 1139 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 1140 ) 1141 1142 def create_session_token_authenticator( 1143 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 1144 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 1145 decoder = ( 1146 self._create_component_from_model(model=model.decoder, config=config) 1147 if model.decoder 1148 else JsonDecoder(parameters={}) 1149 ) 1150 login_requester = self._create_component_from_model( 1151 model=model.login_requester, 1152 config=config, 1153 name=f"{name}_login_requester", 1154 decoder=decoder, 1155 ) 1156 token_provider = SessionTokenProvider( 1157 login_requester=login_requester, 1158 session_token_path=model.session_token_path, 1159 expiration_duration=parse_duration(model.expiration_duration) 1160 if model.expiration_duration 1161 else None, 1162 parameters=model.parameters or {}, 1163 message_repository=self._message_repository, 1164 decoder=decoder, 1165 ) 1166 if model.request_authentication.type == "Bearer": 1167 return ModelToComponentFactory.create_bearer_authenticator( 1168 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 1169 config, 1170 token_provider=token_provider, 1171 ) 1172 else: 1173 # Get the api_token template if specified, default to just the session token 1174 api_token_template = ( 1175 getattr(model.request_authentication, "api_token", None) or "{{ session_token }}" 1176 ) 1177 final_token_provider: TokenProvider = InterpolatedSessionTokenProvider( 1178 config=config, 1179 api_token=api_token_template, 1180 session_token_provider=token_provider, 1181 parameters=model.parameters or {}, 1182 ) 1183 return self.create_api_key_authenticator( 1184 ApiKeyAuthenticatorModel( 1185 type="ApiKeyAuthenticator", 1186 api_token="", 1187 inject_into=model.request_authentication.inject_into, 1188 ), # type: ignore # $parameters and headers default to None 1189 config=config, 1190 token_provider=final_token_provider, 1191 ) 1192 1193 @staticmethod 1194 def create_basic_http_authenticator( 1195 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 1196 ) -> BasicHttpAuthenticator: 1197 return BasicHttpAuthenticator( 1198 password=model.password or "", 1199 username=model.username, 1200 config=config, 1201 parameters=model.parameters or {}, 1202 ) 1203 1204 @staticmethod 1205 def create_bearer_authenticator( 1206 model: BearerAuthenticatorModel, 1207 config: Config, 1208 token_provider: Optional[TokenProvider] = None, 1209 **kwargs: Any, 1210 ) -> BearerAuthenticator: 1211 if token_provider is not None and model.api_token != "": 1212 raise ValueError( 1213 "If token_provider is set, api_token is ignored and has to be set to empty string." 1214 ) 1215 return BearerAuthenticator( 1216 token_provider=( 1217 token_provider 1218 if token_provider is not None 1219 else InterpolatedStringTokenProvider( 1220 api_token=model.api_token or "", 1221 config=config, 1222 parameters=model.parameters or {}, 1223 ) 1224 ), 1225 config=config, 1226 parameters=model.parameters or {}, 1227 ) 1228 1229 @staticmethod 1230 def create_dynamic_stream_check_config( 1231 model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any 1232 ) -> DynamicStreamCheckConfig: 1233 return DynamicStreamCheckConfig( 1234 dynamic_stream_name=model.dynamic_stream_name, 1235 stream_count=model.stream_count or 0, 1236 ) 1237 1238 def create_check_stream( 1239 self, model: CheckStreamModel, config: Config, **kwargs: Any 1240 ) -> CheckStream: 1241 if model.dynamic_streams_check_configs is None and model.stream_names is None: 1242 raise ValueError( 1243 "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream" 1244 ) 1245 1246 dynamic_streams_check_configs = ( 1247 [ 1248 self._create_component_from_model(model=dynamic_stream_check_config, config=config) 1249 for dynamic_stream_check_config in model.dynamic_streams_check_configs 1250 ] 1251 if model.dynamic_streams_check_configs 1252 else [] 1253 ) 1254 1255 return CheckStream( 1256 stream_names=model.stream_names or [], 1257 dynamic_streams_check_configs=dynamic_streams_check_configs, 1258 parameters={}, 1259 ) 1260 1261 @staticmethod 1262 def create_check_dynamic_stream( 1263 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 1264 ) -> CheckDynamicStream: 1265 assert model.use_check_availability is not None # for mypy 1266 1267 use_check_availability = model.use_check_availability 1268 1269 return CheckDynamicStream( 1270 stream_count=model.stream_count, 1271 use_check_availability=use_check_availability, 1272 parameters={}, 1273 ) 1274 1275 def create_composite_error_handler( 1276 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 1277 ) -> CompositeErrorHandler: 1278 error_handlers = [ 1279 self._create_component_from_model(model=error_handler_model, config=config) 1280 for error_handler_model in model.error_handlers 1281 ] 1282 return CompositeErrorHandler( 1283 error_handlers=error_handlers, parameters=model.parameters or {} 1284 ) 1285 1286 @staticmethod 1287 def create_concurrency_level( 1288 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 1289 ) -> ConcurrencyLevel: 1290 return ConcurrencyLevel( 1291 default_concurrency=model.default_concurrency, 1292 max_concurrency=model.max_concurrency, 1293 config=config, 1294 parameters={}, 1295 ) 1296 1297 @staticmethod 1298 def apply_stream_state_migrations( 1299 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 1300 ) -> MutableMapping[str, Any]: 1301 if stream_state_migrations: 1302 for state_migration in stream_state_migrations: 1303 if state_migration.should_migrate(stream_state): 1304 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 1305 stream_state = dict(state_migration.migrate(stream_state)) 1306 return stream_state 1307 1308 def create_concurrent_cursor_from_datetime_based_cursor( 1309 self, 1310 model_type: Type[BaseModel], 1311 component_definition: ComponentDefinition, 1312 stream_name: str, 1313 stream_namespace: Optional[str], 1314 stream_state: MutableMapping[str, Any], 1315 config: Config, 1316 message_repository: Optional[MessageRepository] = None, 1317 runtime_lookback_window: Optional[datetime.timedelta] = None, 1318 **kwargs: Any, 1319 ) -> ConcurrentCursor: 1320 component_type = component_definition.get("type") 1321 if component_definition.get("type") != model_type.__name__: 1322 raise ValueError( 1323 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1324 ) 1325 1326 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1327 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1328 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1329 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1330 if "$parameters" not in component_definition and "parameters" in component_definition: 1331 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1332 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1333 1334 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1335 raise ValueError( 1336 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1337 ) 1338 1339 model_parameters = datetime_based_cursor_model.parameters or {} 1340 1341 cursor_field = self._get_catalog_defined_cursor_field( 1342 stream_name=stream_name, 1343 allow_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1344 or False, 1345 ) 1346 1347 if not cursor_field: 1348 interpolated_cursor_field = InterpolatedString.create( 1349 datetime_based_cursor_model.cursor_field, 1350 parameters=model_parameters, 1351 ) 1352 cursor_field = CursorField( 1353 cursor_field_key=interpolated_cursor_field.eval(config=config), 1354 supports_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1355 or False, 1356 ) 1357 1358 interpolated_partition_field_start = InterpolatedString.create( 1359 datetime_based_cursor_model.partition_field_start or "start_time", 1360 parameters=model_parameters, 1361 ) 1362 interpolated_partition_field_end = InterpolatedString.create( 1363 datetime_based_cursor_model.partition_field_end or "end_time", 1364 parameters=model_parameters, 1365 ) 1366 1367 slice_boundary_fields = ( 1368 interpolated_partition_field_start.eval(config=config), 1369 interpolated_partition_field_end.eval(config=config), 1370 ) 1371 1372 datetime_format = datetime_based_cursor_model.datetime_format 1373 1374 cursor_granularity = ( 1375 parse_duration(datetime_based_cursor_model.cursor_granularity) 1376 if datetime_based_cursor_model.cursor_granularity 1377 else None 1378 ) 1379 1380 lookback_window = None 1381 interpolated_lookback_window = ( 1382 InterpolatedString.create( 1383 datetime_based_cursor_model.lookback_window, 1384 parameters=model_parameters, 1385 ) 1386 if datetime_based_cursor_model.lookback_window 1387 else None 1388 ) 1389 if interpolated_lookback_window: 1390 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1391 if evaluated_lookback_window: 1392 lookback_window = parse_duration(evaluated_lookback_window) 1393 1394 connector_state_converter: DateTimeStreamStateConverter 1395 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1396 datetime_format=datetime_format, 1397 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1398 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1399 cursor_granularity=cursor_granularity, 1400 ) 1401 1402 # Adjusts the stream state by applying the runtime lookback window. 1403 # This is used to ensure correct state handling in case of failed partitions. 1404 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1405 if runtime_lookback_window and stream_state_value: 1406 new_stream_state = ( 1407 connector_state_converter.parse_timestamp(stream_state_value) 1408 - runtime_lookback_window 1409 ) 1410 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1411 new_stream_state 1412 ) 1413 1414 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1415 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1416 start_date_runtime_value = self.create_min_max_datetime( 1417 model=datetime_based_cursor_model.start_datetime, config=config 1418 ) 1419 else: 1420 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1421 1422 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1423 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1424 end_date_runtime_value = self.create_min_max_datetime( 1425 model=datetime_based_cursor_model.end_datetime, config=config 1426 ) 1427 else: 1428 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1429 1430 interpolated_start_date = MinMaxDatetime.create( 1431 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1432 parameters=datetime_based_cursor_model.parameters, 1433 ) 1434 interpolated_end_date = ( 1435 None 1436 if not end_date_runtime_value 1437 else MinMaxDatetime.create( 1438 end_date_runtime_value, datetime_based_cursor_model.parameters 1439 ) 1440 ) 1441 1442 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1443 if not interpolated_start_date.datetime_format: 1444 interpolated_start_date.datetime_format = datetime_format 1445 if interpolated_end_date and not interpolated_end_date.datetime_format: 1446 interpolated_end_date.datetime_format = datetime_format 1447 1448 start_date = interpolated_start_date.get_datetime(config=config) 1449 end_date_provider = ( 1450 partial(interpolated_end_date.get_datetime, config) 1451 if interpolated_end_date 1452 else connector_state_converter.get_end_provider() 1453 ) 1454 1455 if ( 1456 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1457 ) or ( 1458 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1459 ): 1460 raise ValueError( 1461 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1462 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1463 ) 1464 1465 # When step is not defined, default to a step size from the starting date to the present moment 1466 step_length = datetime.timedelta.max 1467 interpolated_step = ( 1468 InterpolatedString.create( 1469 datetime_based_cursor_model.step, 1470 parameters=model_parameters, 1471 ) 1472 if datetime_based_cursor_model.step 1473 else None 1474 ) 1475 if interpolated_step: 1476 evaluated_step = interpolated_step.eval(config) 1477 if evaluated_step: 1478 step_length = parse_duration(evaluated_step) 1479 1480 clamping_strategy: ClampingStrategy = NoClamping() 1481 if datetime_based_cursor_model.clamping: 1482 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1483 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1484 # object which we want to keep agnostic of being low-code 1485 target = InterpolatedString( 1486 string=datetime_based_cursor_model.clamping.target, 1487 parameters=model_parameters, 1488 ) 1489 evaluated_target = target.eval(config=config) 1490 match evaluated_target: 1491 case "DAY": 1492 clamping_strategy = DayClampingStrategy() 1493 end_date_provider = ClampingEndProvider( 1494 DayClampingStrategy(is_ceiling=False), 1495 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1496 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1497 ) 1498 case "WEEK": 1499 if ( 1500 not datetime_based_cursor_model.clamping.target_details 1501 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1502 ): 1503 raise ValueError( 1504 "Given WEEK clamping, weekday needs to be provided as target_details" 1505 ) 1506 weekday = self._assemble_weekday( 1507 datetime_based_cursor_model.clamping.target_details["weekday"] 1508 ) 1509 clamping_strategy = WeekClampingStrategy(weekday) 1510 end_date_provider = ClampingEndProvider( 1511 WeekClampingStrategy(weekday, is_ceiling=False), 1512 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1513 granularity=cursor_granularity or datetime.timedelta(days=1), 1514 ) 1515 case "MONTH": 1516 clamping_strategy = MonthClampingStrategy() 1517 end_date_provider = ClampingEndProvider( 1518 MonthClampingStrategy(is_ceiling=False), 1519 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1520 granularity=cursor_granularity or datetime.timedelta(days=1), 1521 ) 1522 case _: 1523 raise ValueError( 1524 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1525 ) 1526 1527 return ConcurrentCursor( 1528 stream_name=stream_name, 1529 stream_namespace=stream_namespace, 1530 stream_state=stream_state, 1531 message_repository=message_repository or self._message_repository, 1532 connector_state_manager=self._connector_state_manager, 1533 connector_state_converter=connector_state_converter, 1534 cursor_field=cursor_field, 1535 slice_boundary_fields=slice_boundary_fields, 1536 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1537 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1538 lookback_window=lookback_window, 1539 slice_range=step_length, 1540 cursor_granularity=cursor_granularity, 1541 clamping_strategy=clamping_strategy, 1542 ) 1543 1544 def create_concurrent_cursor_from_incrementing_count_cursor( 1545 self, 1546 model_type: Type[BaseModel], 1547 component_definition: ComponentDefinition, 1548 stream_name: str, 1549 stream_namespace: Optional[str], 1550 stream_state: MutableMapping[str, Any], 1551 config: Config, 1552 message_repository: Optional[MessageRepository] = None, 1553 **kwargs: Any, 1554 ) -> ConcurrentCursor: 1555 component_type = component_definition.get("type") 1556 if component_definition.get("type") != model_type.__name__: 1557 raise ValueError( 1558 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1559 ) 1560 1561 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1562 1563 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1564 raise ValueError( 1565 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1566 ) 1567 1568 start_value: Union[int, str, None] = incrementing_count_cursor_model.start_value 1569 # Pydantic Union type coercion can convert int 0 to string '0' depending on Union order. 1570 # We need to handle both int and str representations of numeric values. 1571 # Evaluate the InterpolatedString and convert to int for the ConcurrentCursor. 1572 if start_value is not None: 1573 interpolated_start_value = InterpolatedString.create( 1574 str(start_value), # Ensure we pass a string to InterpolatedString.create 1575 parameters=incrementing_count_cursor_model.parameters or {}, 1576 ) 1577 evaluated_start_value: int = int(interpolated_start_value.eval(config=config)) 1578 else: 1579 evaluated_start_value = 0 1580 1581 cursor_field = self._get_catalog_defined_cursor_field( 1582 stream_name=stream_name, 1583 allow_catalog_defined_cursor_field=incrementing_count_cursor_model.allow_catalog_defined_cursor_field 1584 or False, 1585 ) 1586 1587 if not cursor_field: 1588 interpolated_cursor_field = InterpolatedString.create( 1589 incrementing_count_cursor_model.cursor_field, 1590 parameters=incrementing_count_cursor_model.parameters or {}, 1591 ) 1592 cursor_field = CursorField( 1593 cursor_field_key=interpolated_cursor_field.eval(config=config), 1594 supports_catalog_defined_cursor_field=incrementing_count_cursor_model.allow_catalog_defined_cursor_field 1595 or False, 1596 ) 1597 1598 connector_state_converter = IncrementingCountStreamStateConverter( 1599 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1600 ) 1601 1602 return ConcurrentCursor( 1603 stream_name=stream_name, 1604 stream_namespace=stream_namespace, 1605 stream_state=stream_state, 1606 message_repository=message_repository or self._message_repository, 1607 connector_state_manager=self._connector_state_manager, 1608 connector_state_converter=connector_state_converter, 1609 cursor_field=cursor_field, 1610 slice_boundary_fields=None, 1611 start=evaluated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1612 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1613 ) 1614 1615 def _assemble_weekday(self, weekday: str) -> Weekday: 1616 match weekday: 1617 case "MONDAY": 1618 return Weekday.MONDAY 1619 case "TUESDAY": 1620 return Weekday.TUESDAY 1621 case "WEDNESDAY": 1622 return Weekday.WEDNESDAY 1623 case "THURSDAY": 1624 return Weekday.THURSDAY 1625 case "FRIDAY": 1626 return Weekday.FRIDAY 1627 case "SATURDAY": 1628 return Weekday.SATURDAY 1629 case "SUNDAY": 1630 return Weekday.SUNDAY 1631 case _: 1632 raise ValueError(f"Unknown weekday {weekday}") 1633 1634 def create_concurrent_cursor_from_perpartition_cursor( 1635 self, 1636 state_manager: ConnectorStateManager, 1637 model_type: Type[BaseModel], 1638 component_definition: ComponentDefinition, 1639 stream_name: str, 1640 stream_namespace: Optional[str], 1641 config: Config, 1642 stream_state: MutableMapping[str, Any], 1643 partition_router: PartitionRouter, 1644 attempt_to_create_cursor_if_not_provided: bool = False, 1645 **kwargs: Any, 1646 ) -> ConcurrentPerPartitionCursor: 1647 component_type = component_definition.get("type") 1648 if component_definition.get("type") != model_type.__name__: 1649 raise ValueError( 1650 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1651 ) 1652 1653 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1654 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1655 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1656 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1657 if "$parameters" not in component_definition and "parameters" in component_definition: 1658 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1659 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1660 1661 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1662 raise ValueError( 1663 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1664 ) 1665 1666 cursor_field = self._get_catalog_defined_cursor_field( 1667 stream_name=stream_name, 1668 allow_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1669 or False, 1670 ) 1671 1672 if not cursor_field: 1673 interpolated_cursor_field = InterpolatedString.create( 1674 datetime_based_cursor_model.cursor_field, 1675 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1676 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1677 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1678 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1679 parameters=datetime_based_cursor_model.parameters or {}, 1680 ) 1681 cursor_field = CursorField( 1682 cursor_field_key=interpolated_cursor_field.eval(config=config), 1683 supports_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1684 or False, 1685 ) 1686 1687 datetime_format = datetime_based_cursor_model.datetime_format 1688 1689 cursor_granularity = ( 1690 parse_duration(datetime_based_cursor_model.cursor_granularity) 1691 if datetime_based_cursor_model.cursor_granularity 1692 else None 1693 ) 1694 1695 connector_state_converter: DateTimeStreamStateConverter 1696 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1697 datetime_format=datetime_format, 1698 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1699 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1700 cursor_granularity=cursor_granularity, 1701 ) 1702 1703 # Create the cursor factory 1704 cursor_factory = ConcurrentCursorFactory( 1705 partial( 1706 self.create_concurrent_cursor_from_datetime_based_cursor, 1707 state_manager=state_manager, 1708 model_type=model_type, 1709 component_definition=component_definition, 1710 stream_name=stream_name, 1711 stream_namespace=stream_namespace, 1712 config=config, 1713 message_repository=NoopMessageRepository(), 1714 ) 1715 ) 1716 1717 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state 1718 use_global_cursor = isinstance( 1719 partition_router, GroupingPartitionRouter 1720 ) or component_definition.get("global_substream_cursor", False) 1721 1722 # Return the concurrent cursor and state converter 1723 return ConcurrentPerPartitionCursor( 1724 cursor_factory=cursor_factory, 1725 partition_router=partition_router, 1726 stream_name=stream_name, 1727 stream_namespace=stream_namespace, 1728 stream_state=stream_state, 1729 message_repository=self._message_repository, # type: ignore 1730 connector_state_manager=state_manager, 1731 connector_state_converter=connector_state_converter, 1732 cursor_field=cursor_field, 1733 use_global_cursor=use_global_cursor, 1734 attempt_to_create_cursor_if_not_provided=attempt_to_create_cursor_if_not_provided, 1735 ) 1736 1737 @staticmethod 1738 def create_constant_backoff_strategy( 1739 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1740 ) -> ConstantBackoffStrategy: 1741 return ConstantBackoffStrategy( 1742 backoff_time_in_seconds=model.backoff_time_in_seconds, 1743 config=config, 1744 parameters=model.parameters or {}, 1745 ) 1746 1747 def create_cursor_pagination( 1748 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1749 ) -> CursorPaginationStrategy: 1750 if isinstance(decoder, PaginationDecoderDecorator): 1751 inner_decoder = decoder.decoder 1752 else: 1753 inner_decoder = decoder 1754 decoder = PaginationDecoderDecorator(decoder=decoder) 1755 1756 if self._is_supported_decoder_for_pagination(inner_decoder): 1757 decoder_to_use = decoder 1758 else: 1759 raise ValueError( 1760 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1761 ) 1762 1763 # Pydantic v1 Union type coercion can convert int to string depending on Union order. 1764 # If page_size is a string that represents an integer (not an interpolation), convert it back. 1765 page_size = model.page_size 1766 if isinstance(page_size, str) and page_size.isdigit(): 1767 page_size = int(page_size) 1768 1769 return CursorPaginationStrategy( 1770 cursor_value=model.cursor_value, 1771 decoder=decoder_to_use, 1772 page_size=page_size, 1773 stop_condition=model.stop_condition, 1774 config=config, 1775 parameters=model.parameters or {}, 1776 ) 1777 1778 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1779 """ 1780 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1781 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1782 :param model: The Pydantic model of the custom component being created 1783 :param config: The custom defined connector config 1784 :return: The declarative component built from the Pydantic model to be used at runtime 1785 """ 1786 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1787 component_fields = get_type_hints(custom_component_class) 1788 model_args = model.dict() 1789 model_args["config"] = config 1790 1791 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1792 # we defer to these arguments over the component's definition 1793 for key, arg in kwargs.items(): 1794 model_args[key] = arg 1795 1796 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1797 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1798 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1799 for model_field, model_value in model_args.items(): 1800 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1801 if ( 1802 isinstance(model_value, dict) 1803 and "type" not in model_value 1804 and model_field in component_fields 1805 ): 1806 derived_type = self._derive_component_type_from_type_hints( 1807 component_fields.get(model_field) 1808 ) 1809 if derived_type: 1810 model_value["type"] = derived_type 1811 1812 if self._is_component(model_value): 1813 model_args[model_field] = self._create_nested_component( 1814 model, 1815 model_field, 1816 model_value, 1817 config, 1818 **kwargs, 1819 ) 1820 elif isinstance(model_value, list): 1821 vals = [] 1822 for v in model_value: 1823 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1824 derived_type = self._derive_component_type_from_type_hints( 1825 component_fields.get(model_field) 1826 ) 1827 if derived_type: 1828 v["type"] = derived_type 1829 if self._is_component(v): 1830 vals.append( 1831 self._create_nested_component( 1832 model, 1833 model_field, 1834 v, 1835 config, 1836 **kwargs, 1837 ) 1838 ) 1839 else: 1840 vals.append(v) 1841 model_args[model_field] = vals 1842 1843 kwargs = { 1844 class_field: model_args[class_field] 1845 for class_field in component_fields.keys() 1846 if class_field in model_args 1847 } 1848 return custom_component_class(**kwargs) 1849 1850 @staticmethod 1851 def _get_class_from_fully_qualified_class_name( 1852 full_qualified_class_name: str, 1853 ) -> Any: 1854 """Get a class from its fully qualified name. 1855 1856 If a custom components module is needed, we assume it is already registered - probably 1857 as `source_declarative_manifest.components` or `components`. 1858 1859 Args: 1860 full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). 1861 1862 Returns: 1863 Any: The class object. 1864 1865 Raises: 1866 ValueError: If the class cannot be loaded. 1867 """ 1868 split = full_qualified_class_name.split(".") 1869 module_name_full = ".".join(split[:-1]) 1870 class_name = split[-1] 1871 1872 try: 1873 module_ref = importlib.import_module(module_name_full) 1874 except ModuleNotFoundError as e: 1875 if split[0] == "source_declarative_manifest": 1876 # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append 1877 try: 1878 import os 1879 1880 module_name_with_source_declarative_manifest = ".".join(split[1:-1]) 1881 module_ref = importlib.import_module( 1882 module_name_with_source_declarative_manifest 1883 ) 1884 except ModuleNotFoundError: 1885 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1886 else: 1887 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1888 1889 try: 1890 return getattr(module_ref, class_name) 1891 except AttributeError as e: 1892 raise ValueError( 1893 f"Could not load class `{class_name}` from module `{module_name_full}`.", 1894 ) from e 1895 1896 @staticmethod 1897 def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: 1898 interface = field_type 1899 while True: 1900 origin = get_origin(interface) 1901 if origin: 1902 # Unnest types until we reach the raw type 1903 # List[T] -> T 1904 # Optional[List[T]] -> T 1905 args = get_args(interface) 1906 interface = args[0] 1907 else: 1908 break 1909 if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface): 1910 return interface.__name__ 1911 return None 1912 1913 @staticmethod 1914 def is_builtin_type(cls: Optional[Type[Any]]) -> bool: 1915 if not cls: 1916 return False 1917 return cls.__module__ == "builtins" 1918 1919 @staticmethod 1920 def _extract_missing_parameters(error: TypeError) -> List[str]: 1921 parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error)) 1922 if parameter_search: 1923 return re.findall(r"\'(.+?)\'", parameter_search.group(1)) 1924 else: 1925 return [] 1926 1927 def _create_nested_component( 1928 self, model: Any, model_field: str, model_value: Any, config: Config, **kwargs: Any 1929 ) -> Any: 1930 type_name = model_value.get("type", None) 1931 if not type_name: 1932 # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent 1933 return model_value 1934 1935 model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None) 1936 if model_type: 1937 parsed_model = model_type.parse_obj(model_value) 1938 try: 1939 # To improve usability of the language, certain fields are shared between components. This can come in the form of 1940 # a parent component passing some of its fields to a child component or the parent extracting fields from other child 1941 # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base 1942 # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created 1943 # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that 1944 # are needed by a component and could not be shared. 1945 model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__) 1946 constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs 1947 model_parameters = model_value.get("$parameters", {}) 1948 matching_parameters = { 1949 kwarg: model_parameters[kwarg] 1950 for kwarg in constructor_kwargs 1951 if kwarg in model_parameters 1952 } 1953 matching_kwargs = { 1954 kwarg: kwargs[kwarg] for kwarg in constructor_kwargs if kwarg in kwargs 1955 } 1956 return self._create_component_from_model( 1957 model=parsed_model, config=config, **(matching_parameters | matching_kwargs) 1958 ) 1959 except TypeError as error: 1960 missing_parameters = self._extract_missing_parameters(error) 1961 if missing_parameters: 1962 raise ValueError( 1963 f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " 1964 + ", ".join( 1965 ( 1966 f"{type_name}.$parameters.{parameter}" 1967 for parameter in missing_parameters 1968 ) 1969 ) 1970 ) 1971 raise TypeError( 1972 f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" 1973 ) 1974 else: 1975 raise ValueError( 1976 f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" 1977 ) 1978 1979 @staticmethod 1980 def _is_component(model_value: Any) -> bool: 1981 return isinstance(model_value, dict) and model_value.get("type") is not None 1982 1983 def create_default_stream( 1984 self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any 1985 ) -> AbstractStream: 1986 primary_key = model.primary_key.__root__ if model.primary_key else None 1987 self._migrate_state(model, config) 1988 1989 partition_router = self._build_stream_slicer_from_partition_router( 1990 model.retriever, 1991 config, 1992 stream_name=model.name, 1993 **kwargs, 1994 ) 1995 concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config) 1996 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1997 cursor_model: DatetimeBasedCursorModel = model.incremental_sync 1998 1999 end_time_option = ( 2000 self._create_component_from_model( 2001 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 2002 ) 2003 if cursor_model.end_time_option 2004 else None 2005 ) 2006 start_time_option = ( 2007 self._create_component_from_model( 2008 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 2009 ) 2010 if cursor_model.start_time_option 2011 else None 2012 ) 2013 2014 datetime_request_options_provider = DatetimeBasedRequestOptionsProvider( 2015 start_time_option=start_time_option, 2016 end_time_option=end_time_option, 2017 partition_field_start=cursor_model.partition_field_start, 2018 partition_field_end=cursor_model.partition_field_end, 2019 config=config, 2020 parameters=model.parameters or {}, 2021 ) 2022 request_options_provider = ( 2023 datetime_request_options_provider 2024 if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor) 2025 else PerPartitionRequestOptionsProvider( 2026 partition_router, datetime_request_options_provider 2027 ) 2028 ) 2029 elif model.incremental_sync and isinstance( 2030 model.incremental_sync, IncrementingCountCursorModel 2031 ): 2032 if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor): 2033 raise ValueError( 2034 "PerPartition does not support per partition states because switching to global state is time based" 2035 ) 2036 2037 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 2038 2039 start_time_option = ( 2040 self._create_component_from_model( 2041 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2042 config, 2043 parameters=cursor_model.parameters or {}, 2044 ) 2045 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2046 else None 2047 ) 2048 2049 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 2050 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 2051 partition_field_start = "start" 2052 2053 request_options_provider = DatetimeBasedRequestOptionsProvider( 2054 start_time_option=start_time_option, 2055 partition_field_start=partition_field_start, 2056 config=config, 2057 parameters=model.parameters or {}, 2058 ) 2059 else: 2060 request_options_provider = None 2061 2062 transformations = [] 2063 if model.transformations: 2064 for transformation_model in model.transformations: 2065 transformations.append( 2066 self._create_component_from_model(model=transformation_model, config=config) 2067 ) 2068 file_uploader = None 2069 if model.file_uploader: 2070 file_uploader = self._create_component_from_model( 2071 model=model.file_uploader, config=config 2072 ) 2073 2074 stream_slicer: ConcurrentStreamSlicer = ( 2075 partition_router 2076 if isinstance(concurrent_cursor, FinalStateCursor) 2077 else concurrent_cursor 2078 ) 2079 2080 retriever = self._create_component_from_model( 2081 model=model.retriever, 2082 config=config, 2083 name=model.name, 2084 primary_key=primary_key, 2085 request_options_provider=request_options_provider, 2086 stream_slicer=stream_slicer, 2087 partition_router=partition_router, 2088 has_stop_condition_cursor=self._is_stop_condition_on_cursor(model), 2089 is_client_side_incremental_sync=self._is_client_side_filtering_enabled(model), 2090 cursor=concurrent_cursor, 2091 transformations=transformations, 2092 file_uploader=file_uploader, 2093 incremental_sync=model.incremental_sync, 2094 ) 2095 if isinstance(retriever, AsyncRetriever): 2096 stream_slicer = retriever.stream_slicer 2097 2098 schema_loader: SchemaLoader 2099 if model.schema_loader and isinstance(model.schema_loader, list): 2100 nested_schema_loaders = [ 2101 self._create_component_from_model(model=nested_schema_loader, config=config) 2102 for nested_schema_loader in model.schema_loader 2103 ] 2104 schema_loader = CompositeSchemaLoader( 2105 schema_loaders=nested_schema_loaders, parameters={} 2106 ) 2107 elif model.schema_loader: 2108 schema_loader = self._create_component_from_model( 2109 model=model.schema_loader, # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models 2110 config=config, 2111 ) 2112 else: 2113 options = model.parameters or {} 2114 if "name" not in options: 2115 options["name"] = model.name 2116 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 2117 schema_loader = CachingSchemaLoaderDecorator(schema_loader) 2118 2119 stream_name = model.name or "" 2120 return DefaultStream( 2121 partition_generator=StreamSlicerPartitionGenerator( 2122 DeclarativePartitionFactory( 2123 stream_name, 2124 schema_loader, 2125 retriever, 2126 self._message_repository, 2127 ), 2128 stream_slicer, 2129 slice_limit=self._limit_slices_fetched, 2130 ), 2131 name=stream_name, 2132 json_schema=schema_loader.get_json_schema, 2133 primary_key=get_primary_key_from_stream(primary_key), 2134 cursor_field=( 2135 concurrent_cursor.cursor_field 2136 if hasattr(concurrent_cursor, "cursor_field") 2137 else None 2138 ), 2139 logger=logging.getLogger(f"airbyte.{stream_name}"), 2140 cursor=concurrent_cursor, 2141 supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader), 2142 ) 2143 2144 def _migrate_state(self, model: DeclarativeStreamModel, config: Config) -> None: 2145 stream_name = model.name or "" 2146 stream_state = self._connector_state_manager.get_stream_state( 2147 stream_name=stream_name, namespace=None 2148 ) 2149 if model.state_migrations: 2150 state_transformations = [ 2151 self._create_component_from_model(state_migration, config, declarative_stream=model) 2152 for state_migration in model.state_migrations 2153 ] 2154 else: 2155 state_transformations = [] 2156 stream_state = self.apply_stream_state_migrations(state_transformations, stream_state) 2157 self._connector_state_manager.update_state_for_stream( 2158 stream_name=stream_name, namespace=None, value=stream_state 2159 ) 2160 2161 def _is_stop_condition_on_cursor(self, model: DeclarativeStreamModel) -> bool: 2162 return bool( 2163 model.incremental_sync 2164 and hasattr(model.incremental_sync, "is_data_feed") 2165 and model.incremental_sync.is_data_feed 2166 ) 2167 2168 def _is_client_side_filtering_enabled(self, model: DeclarativeStreamModel) -> bool: 2169 return bool( 2170 model.incremental_sync 2171 and hasattr(model.incremental_sync, "is_client_side_incremental") 2172 and model.incremental_sync.is_client_side_incremental 2173 ) 2174 2175 def _build_stream_slicer_from_partition_router( 2176 self, 2177 model: Union[ 2178 AsyncRetrieverModel, 2179 CustomRetrieverModel, 2180 SimpleRetrieverModel, 2181 ], 2182 config: Config, 2183 stream_name: Optional[str] = None, 2184 **kwargs: Any, 2185 ) -> PartitionRouter: 2186 if ( 2187 hasattr(model, "partition_router") 2188 and isinstance(model, (SimpleRetrieverModel, AsyncRetrieverModel, CustomRetrieverModel)) 2189 and model.partition_router 2190 ): 2191 stream_slicer_model = model.partition_router 2192 if isinstance(stream_slicer_model, list): 2193 return CartesianProductStreamSlicer( 2194 [ 2195 self._create_component_from_model( 2196 model=slicer, config=config, stream_name=stream_name or "" 2197 ) 2198 for slicer in stream_slicer_model 2199 ], 2200 parameters={}, 2201 ) 2202 elif isinstance(stream_slicer_model, dict): 2203 # partition router comes from CustomRetrieverModel therefore has not been parsed as a model 2204 params = stream_slicer_model.get("$parameters") 2205 if not isinstance(params, dict): 2206 params = {} 2207 stream_slicer_model["$parameters"] = params 2208 2209 if stream_name is not None: 2210 params["stream_name"] = stream_name 2211 2212 return self._create_nested_component( # type: ignore[no-any-return] # There is no guarantee that this will return a stream slicer. If not, we expect an AttributeError during the call to `stream_slices` 2213 model, 2214 "partition_router", 2215 stream_slicer_model, 2216 config, 2217 **kwargs, 2218 ) 2219 else: 2220 return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router 2221 model=stream_slicer_model, config=config, stream_name=stream_name or "" 2222 ) 2223 return SinglePartitionRouter(parameters={}) 2224 2225 def _build_concurrent_cursor( 2226 self, 2227 model: DeclarativeStreamModel, 2228 stream_slicer: Optional[PartitionRouter], 2229 config: Config, 2230 ) -> Cursor: 2231 stream_name = model.name or "" 2232 stream_state = self._connector_state_manager.get_stream_state(stream_name, None) 2233 2234 if ( 2235 model.incremental_sync 2236 and stream_slicer 2237 and not isinstance(stream_slicer, SinglePartitionRouter) 2238 ): 2239 if isinstance(model.incremental_sync, IncrementingCountCursorModel): 2240 # We don't currently support usage of partition routing and IncrementingCountCursor at the 2241 # same time because we didn't solve for design questions like what the lookback window would 2242 # be as well as global cursor fall backs. We have not seen customers that have needed both 2243 # at the same time yet and are currently punting on this until we need to solve it. 2244 raise ValueError( 2245 f"The low-code framework does not currently support usage of a PartitionRouter and an IncrementingCountCursor at the same time. Please specify only one of these options for stream {stream_name}." 2246 ) 2247 return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2248 state_manager=self._connector_state_manager, 2249 model_type=DatetimeBasedCursorModel, 2250 component_definition=model.incremental_sync.__dict__, 2251 stream_name=stream_name, 2252 stream_state=stream_state, 2253 stream_namespace=None, 2254 config=config or {}, 2255 partition_router=stream_slicer, 2256 attempt_to_create_cursor_if_not_provided=True, # FIXME can we remove that now? 2257 ) 2258 elif model.incremental_sync: 2259 if type(model.incremental_sync) == IncrementingCountCursorModel: 2260 return self.create_concurrent_cursor_from_incrementing_count_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2261 model_type=IncrementingCountCursorModel, 2262 component_definition=model.incremental_sync.__dict__, 2263 stream_name=stream_name, 2264 stream_namespace=None, 2265 stream_state=stream_state, 2266 config=config or {}, 2267 ) 2268 elif type(model.incremental_sync) == DatetimeBasedCursorModel: 2269 return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2270 model_type=type(model.incremental_sync), 2271 component_definition=model.incremental_sync.__dict__, 2272 stream_name=stream_name, 2273 stream_namespace=None, 2274 stream_state=stream_state, 2275 config=config or {}, 2276 attempt_to_create_cursor_if_not_provided=True, 2277 ) 2278 else: 2279 raise ValueError( 2280 f"Incremental sync of type {type(model.incremental_sync)} is not supported" 2281 ) 2282 return FinalStateCursor(stream_name, None, self._message_repository) 2283 2284 def create_default_error_handler( 2285 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 2286 ) -> DefaultErrorHandler: 2287 backoff_strategies = [] 2288 if model.backoff_strategies: 2289 for backoff_strategy_model in model.backoff_strategies: 2290 backoff_strategies.append( 2291 self._create_component_from_model(model=backoff_strategy_model, config=config) 2292 ) 2293 2294 response_filters = [] 2295 if model.response_filters: 2296 for response_filter_model in model.response_filters: 2297 response_filters.append( 2298 self._create_component_from_model(model=response_filter_model, config=config) 2299 ) 2300 response_filters.append( 2301 HttpResponseFilter(config=config, parameters=model.parameters or {}) 2302 ) 2303 2304 return DefaultErrorHandler( 2305 backoff_strategies=backoff_strategies, 2306 max_retries=model.max_retries, 2307 response_filters=response_filters, 2308 config=config, 2309 parameters=model.parameters or {}, 2310 ) 2311 2312 def create_default_paginator( 2313 self, 2314 model: DefaultPaginatorModel, 2315 config: Config, 2316 *, 2317 url_base: str, 2318 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2319 decoder: Optional[Decoder] = None, 2320 cursor_used_for_stop_condition: Optional[Cursor] = None, 2321 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 2322 if decoder: 2323 if self._is_supported_decoder_for_pagination(decoder): 2324 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 2325 else: 2326 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 2327 else: 2328 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 2329 page_size_option = ( 2330 self._create_component_from_model(model=model.page_size_option, config=config) 2331 if model.page_size_option 2332 else None 2333 ) 2334 page_token_option = ( 2335 self._create_component_from_model(model=model.page_token_option, config=config) 2336 if model.page_token_option 2337 else None 2338 ) 2339 pagination_strategy = self._create_component_from_model( 2340 model=model.pagination_strategy, 2341 config=config, 2342 decoder=decoder_to_use, 2343 extractor_model=extractor_model, 2344 ) 2345 if cursor_used_for_stop_condition: 2346 pagination_strategy = StopConditionPaginationStrategyDecorator( 2347 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2348 ) 2349 paginator = DefaultPaginator( 2350 decoder=decoder_to_use, 2351 page_size_option=page_size_option, 2352 page_token_option=page_token_option, 2353 pagination_strategy=pagination_strategy, 2354 url_base=url_base, 2355 config=config, 2356 parameters=model.parameters or {}, 2357 ) 2358 if self._limit_pages_fetched_per_slice: 2359 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2360 return paginator 2361 2362 def create_dpath_extractor( 2363 self, 2364 model: DpathExtractorModel, 2365 config: Config, 2366 decoder: Optional[Decoder] = None, 2367 **kwargs: Any, 2368 ) -> DpathExtractor: 2369 if decoder: 2370 decoder_to_use = decoder 2371 else: 2372 decoder_to_use = JsonDecoder(parameters={}) 2373 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2374 return DpathExtractor( 2375 decoder=decoder_to_use, 2376 field_path=model_field_path, 2377 config=config, 2378 parameters=model.parameters or {}, 2379 ) 2380 2381 @staticmethod 2382 def create_response_to_file_extractor( 2383 model: ResponseToFileExtractorModel, 2384 **kwargs: Any, 2385 ) -> ResponseToFileExtractor: 2386 return ResponseToFileExtractor(parameters=model.parameters or {}) 2387 2388 @staticmethod 2389 def create_exponential_backoff_strategy( 2390 model: ExponentialBackoffStrategyModel, config: Config 2391 ) -> ExponentialBackoffStrategy: 2392 return ExponentialBackoffStrategy( 2393 factor=model.factor or 5, parameters=model.parameters or {}, config=config 2394 ) 2395 2396 @staticmethod 2397 def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey: 2398 return GroupByKey(model.key, config=config, parameters=model.parameters or {}) 2399 2400 def create_http_requester( 2401 self, 2402 model: HttpRequesterModel, 2403 config: Config, 2404 decoder: Decoder = JsonDecoder(parameters={}), 2405 query_properties_key: Optional[str] = None, 2406 use_cache: Optional[bool] = None, 2407 *, 2408 name: str, 2409 ) -> HttpRequester: 2410 authenticator = ( 2411 self._create_component_from_model( 2412 model=model.authenticator, 2413 config=config, 2414 url_base=model.url or model.url_base, 2415 name=name, 2416 decoder=decoder, 2417 ) 2418 if model.authenticator 2419 else None 2420 ) 2421 error_handler = ( 2422 self._create_component_from_model(model=model.error_handler, config=config) 2423 if model.error_handler 2424 else DefaultErrorHandler( 2425 backoff_strategies=[], 2426 response_filters=[], 2427 config=config, 2428 parameters=model.parameters or {}, 2429 ) 2430 ) 2431 2432 api_budget = self._api_budget 2433 2434 request_options_provider = InterpolatedRequestOptionsProvider( 2435 request_body=model.request_body, 2436 request_body_data=model.request_body_data, 2437 request_body_json=model.request_body_json, 2438 request_headers=model.request_headers, 2439 request_parameters=model.request_parameters, # type: ignore # QueryProperties have been removed in `create_simple_retriever` 2440 query_properties_key=query_properties_key, 2441 config=config, 2442 parameters=model.parameters or {}, 2443 ) 2444 2445 assert model.use_cache is not None # for mypy 2446 assert model.http_method is not None # for mypy 2447 2448 should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache 2449 2450 return HttpRequester( 2451 name=name, 2452 url=model.url, 2453 url_base=model.url_base, 2454 path=model.path, 2455 authenticator=authenticator, 2456 error_handler=error_handler, 2457 api_budget=api_budget, 2458 http_method=HttpMethod[model.http_method.value], 2459 request_options_provider=request_options_provider, 2460 config=config, 2461 disable_retries=self._disable_retries, 2462 parameters=model.parameters or {}, 2463 message_repository=self._message_repository, 2464 use_cache=should_use_cache, 2465 decoder=decoder, 2466 stream_response=decoder.is_stream_response() if decoder else False, 2467 ) 2468 2469 @staticmethod 2470 def create_http_response_filter( 2471 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2472 ) -> HttpResponseFilter: 2473 if model.action: 2474 action = ResponseAction(model.action.value) 2475 else: 2476 action = None 2477 2478 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2479 2480 http_codes = ( 2481 set(model.http_codes) if model.http_codes else set() 2482 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2483 2484 return HttpResponseFilter( 2485 action=action, 2486 failure_type=failure_type, 2487 error_message=model.error_message or "", 2488 error_message_contains=model.error_message_contains or "", 2489 http_codes=http_codes, 2490 predicate=model.predicate or "", 2491 config=config, 2492 parameters=model.parameters or {}, 2493 ) 2494 2495 @staticmethod 2496 def create_inline_schema_loader( 2497 model: InlineSchemaLoaderModel, config: Config, **kwargs: Any 2498 ) -> InlineSchemaLoader: 2499 return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) 2500 2501 def create_complex_field_type( 2502 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2503 ) -> ComplexFieldType: 2504 items = ( 2505 self._create_component_from_model(model=model.items, config=config) 2506 if isinstance(model.items, ComplexFieldTypeModel) 2507 else model.items 2508 ) 2509 2510 return ComplexFieldType(field_type=model.field_type, items=items) 2511 2512 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2513 target_type = ( 2514 self._create_component_from_model(model=model.target_type, config=config) 2515 if isinstance(model.target_type, ComplexFieldTypeModel) 2516 else model.target_type 2517 ) 2518 2519 return TypesMap( 2520 target_type=target_type, 2521 current_type=model.current_type, 2522 condition=model.condition if model.condition is not None else "True", 2523 ) 2524 2525 def create_schema_type_identifier( 2526 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2527 ) -> SchemaTypeIdentifier: 2528 types_mapping = [] 2529 if model.types_mapping: 2530 types_mapping.extend( 2531 [ 2532 self._create_component_from_model(types_map, config=config) 2533 for types_map in model.types_mapping 2534 ] 2535 ) 2536 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2537 [x for x in model.schema_pointer] if model.schema_pointer else [] 2538 ) 2539 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2540 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2541 [x for x in model.type_pointer] if model.type_pointer else None 2542 ) 2543 2544 return SchemaTypeIdentifier( 2545 schema_pointer=model_schema_pointer, 2546 key_pointer=model_key_pointer, 2547 type_pointer=model_type_pointer, 2548 types_mapping=types_mapping, 2549 parameters=model.parameters or {}, 2550 ) 2551 2552 def create_dynamic_schema_loader( 2553 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2554 ) -> DynamicSchemaLoader: 2555 schema_transformations = [] 2556 if model.schema_transformations: 2557 for transformation_model in model.schema_transformations: 2558 schema_transformations.append( 2559 self._create_component_from_model(model=transformation_model, config=config) 2560 ) 2561 name = "dynamic_properties" 2562 retriever = self._create_component_from_model( 2563 model=model.retriever, 2564 config=config, 2565 name=name, 2566 primary_key=None, 2567 partition_router=self._build_stream_slicer_from_partition_router( 2568 model.retriever, config 2569 ), 2570 transformations=[], 2571 use_cache=True, 2572 log_formatter=( 2573 lambda response: format_http_message( 2574 response, 2575 f"Schema loader '{name}' request", 2576 f"Request performed in order to extract schema.", 2577 name, 2578 is_auxiliary=True, 2579 ) 2580 ), 2581 ) 2582 schema_type_identifier = self._create_component_from_model( 2583 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2584 ) 2585 schema_filter = ( 2586 self._create_component_from_model( 2587 model.schema_filter, config=config, parameters=model.parameters or {} 2588 ) 2589 if model.schema_filter is not None 2590 else None 2591 ) 2592 2593 return DynamicSchemaLoader( 2594 retriever=retriever, 2595 config=config, 2596 schema_transformations=schema_transformations, 2597 schema_filter=schema_filter, 2598 schema_type_identifier=schema_type_identifier, 2599 parameters=model.parameters or {}, 2600 ) 2601 2602 @staticmethod 2603 def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2604 return JsonDecoder(parameters={}) 2605 2606 def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2607 return CompositeRawDecoder( 2608 parser=ModelToComponentFactory._get_parser(model, config), 2609 stream_response=False if self._emit_connector_builder_messages else True, 2610 ) 2611 2612 def create_jsonl_decoder( 2613 self, model: JsonlDecoderModel, config: Config, **kwargs: Any 2614 ) -> Decoder: 2615 return CompositeRawDecoder( 2616 parser=ModelToComponentFactory._get_parser(model, config), 2617 stream_response=False if self._emit_connector_builder_messages else True, 2618 ) 2619 2620 def create_gzip_decoder( 2621 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2622 ) -> Decoder: 2623 _compressed_response_types = { 2624 "gzip", 2625 "x-gzip", 2626 "gzip, deflate", 2627 "x-gzip, deflate", 2628 "application/zip", 2629 "application/gzip", 2630 "application/x-gzip", 2631 "application/x-zip-compressed", 2632 } 2633 2634 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2635 2636 if self._emit_connector_builder_messages: 2637 # This is very surprising but if the response is not streamed, 2638 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2639 # which uses urllib3 directly and does not uncompress the data. 2640 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2641 2642 return CompositeRawDecoder.by_headers( 2643 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2644 stream_response=True, 2645 fallback_parser=gzip_parser.inner_parser, 2646 ) 2647 2648 @staticmethod 2649 def create_iterable_decoder( 2650 model: IterableDecoderModel, config: Config, **kwargs: Any 2651 ) -> IterableDecoder: 2652 return IterableDecoder(parameters={}) 2653 2654 @staticmethod 2655 def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder: 2656 return XmlDecoder(parameters={}) 2657 2658 def create_zipfile_decoder( 2659 self, model: ZipfileDecoderModel, config: Config, **kwargs: Any 2660 ) -> ZipfileDecoder: 2661 return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config)) 2662 2663 @staticmethod 2664 def _get_parser(model: BaseModel, config: Config) -> Parser: 2665 if isinstance(model, JsonDecoderModel): 2666 # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases 2667 return JsonParser() 2668 elif isinstance(model, JsonlDecoderModel): 2669 return JsonLineParser() 2670 elif isinstance(model, CsvDecoderModel): 2671 return CsvParser( 2672 encoding=model.encoding, 2673 delimiter=model.delimiter, 2674 set_values_to_none=model.set_values_to_none, 2675 ) 2676 elif isinstance(model, GzipDecoderModel): 2677 return GzipParser( 2678 inner_parser=ModelToComponentFactory._get_parser(model.decoder, config) 2679 ) 2680 elif isinstance( 2681 model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel) 2682 ): 2683 raise ValueError(f"Decoder type {model} does not have parser associated to it") 2684 2685 raise ValueError(f"Unknown decoder type {model}") 2686 2687 @staticmethod 2688 def create_json_file_schema_loader( 2689 model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any 2690 ) -> JsonFileSchemaLoader: 2691 return JsonFileSchemaLoader( 2692 file_path=model.file_path or "", config=config, parameters=model.parameters or {} 2693 ) 2694 2695 def create_jwt_authenticator( 2696 self, model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2697 ) -> JwtAuthenticator: 2698 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2699 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2700 request_option = ( 2701 self._create_component_from_model(model.request_option, config) 2702 if model.request_option 2703 else None 2704 ) 2705 return JwtAuthenticator( 2706 config=config, 2707 parameters=model.parameters or {}, 2708 algorithm=JwtAlgorithm(model.algorithm.value), 2709 secret_key=model.secret_key, 2710 base64_encode_secret_key=model.base64_encode_secret_key, 2711 token_duration=model.token_duration, 2712 header_prefix=model.header_prefix, 2713 kid=jwt_headers.kid, 2714 typ=jwt_headers.typ, 2715 cty=jwt_headers.cty, 2716 iss=jwt_payload.iss, 2717 sub=jwt_payload.sub, 2718 aud=jwt_payload.aud, 2719 additional_jwt_headers=model.additional_jwt_headers, 2720 additional_jwt_payload=model.additional_jwt_payload, 2721 passphrase=model.passphrase, 2722 request_option=request_option, 2723 ) 2724 2725 def create_list_partition_router( 2726 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2727 ) -> ListPartitionRouter: 2728 request_option = ( 2729 self._create_component_from_model(model.request_option, config) 2730 if model.request_option 2731 else None 2732 ) 2733 return ListPartitionRouter( 2734 cursor_field=model.cursor_field, 2735 request_option=request_option, 2736 values=model.values, 2737 config=config, 2738 parameters=model.parameters or {}, 2739 ) 2740 2741 @staticmethod 2742 def create_min_max_datetime( 2743 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2744 ) -> MinMaxDatetime: 2745 return MinMaxDatetime( 2746 datetime=model.datetime, 2747 datetime_format=model.datetime_format or "", 2748 max_datetime=model.max_datetime or "", 2749 min_datetime=model.min_datetime or "", 2750 parameters=model.parameters or {}, 2751 ) 2752 2753 @staticmethod 2754 def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: 2755 return NoAuth(parameters=model.parameters or {}) 2756 2757 @staticmethod 2758 def create_no_pagination( 2759 model: NoPaginationModel, config: Config, **kwargs: Any 2760 ) -> NoPagination: 2761 return NoPagination(parameters={}) 2762 2763 def create_oauth_authenticator( 2764 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2765 ) -> DeclarativeOauth2Authenticator: 2766 profile_assertion = ( 2767 self._create_component_from_model(model.profile_assertion, config=config) 2768 if model.profile_assertion 2769 else None 2770 ) 2771 2772 refresh_token_error_status_codes, refresh_token_error_key, refresh_token_error_values = ( 2773 self._get_refresh_token_error_information(model) 2774 ) 2775 if model.refresh_token_updater: 2776 # ignore type error because fixing it would have a lot of dependencies, revisit later 2777 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2778 config, 2779 InterpolatedString.create( 2780 model.token_refresh_endpoint, # type: ignore 2781 parameters=model.parameters or {}, 2782 ).eval(config), 2783 access_token_name=InterpolatedString.create( 2784 model.access_token_name or "access_token", parameters=model.parameters or {} 2785 ).eval(config), 2786 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2787 expires_in_name=InterpolatedString.create( 2788 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2789 ).eval(config), 2790 client_id_name=InterpolatedString.create( 2791 model.client_id_name or "client_id", parameters=model.parameters or {} 2792 ).eval(config), 2793 client_id=InterpolatedString.create( 2794 model.client_id, parameters=model.parameters or {} 2795 ).eval(config) 2796 if model.client_id 2797 else model.client_id, 2798 client_secret_name=InterpolatedString.create( 2799 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2800 ).eval(config), 2801 client_secret=InterpolatedString.create( 2802 model.client_secret, parameters=model.parameters or {} 2803 ).eval(config) 2804 if model.client_secret 2805 else model.client_secret, 2806 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2807 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2808 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2809 grant_type_name=InterpolatedString.create( 2810 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2811 ).eval(config), 2812 grant_type=InterpolatedString.create( 2813 model.grant_type or "refresh_token", parameters=model.parameters or {} 2814 ).eval(config), 2815 refresh_request_body=InterpolatedMapping( 2816 model.refresh_request_body or {}, parameters=model.parameters or {} 2817 ).eval(config), 2818 refresh_request_headers=InterpolatedMapping( 2819 model.refresh_request_headers or {}, parameters=model.parameters or {} 2820 ).eval(config), 2821 scopes=model.scopes, 2822 token_expiry_date_format=model.token_expiry_date_format, 2823 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2824 message_repository=self._message_repository, 2825 refresh_token_error_status_codes=refresh_token_error_status_codes, 2826 refresh_token_error_key=refresh_token_error_key, 2827 refresh_token_error_values=refresh_token_error_values, 2828 ) 2829 # ignore type error because fixing it would have a lot of dependencies, revisit later 2830 return DeclarativeOauth2Authenticator( # type: ignore 2831 access_token_name=model.access_token_name or "access_token", 2832 access_token_value=model.access_token_value, 2833 client_id_name=model.client_id_name or "client_id", 2834 client_id=model.client_id, 2835 client_secret_name=model.client_secret_name or "client_secret", 2836 client_secret=model.client_secret, 2837 expires_in_name=model.expires_in_name or "expires_in", 2838 grant_type_name=model.grant_type_name or "grant_type", 2839 grant_type=model.grant_type or "refresh_token", 2840 refresh_request_body=model.refresh_request_body, 2841 refresh_request_headers=model.refresh_request_headers, 2842 refresh_token_name=model.refresh_token_name or "refresh_token", 2843 refresh_token=model.refresh_token, 2844 scopes=model.scopes, 2845 token_expiry_date=model.token_expiry_date, 2846 token_expiry_date_format=model.token_expiry_date_format, 2847 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2848 token_refresh_endpoint=model.token_refresh_endpoint, 2849 config=config, 2850 parameters=model.parameters or {}, 2851 message_repository=self._message_repository, 2852 profile_assertion=profile_assertion, 2853 use_profile_assertion=model.use_profile_assertion, 2854 refresh_token_error_status_codes=refresh_token_error_status_codes, 2855 refresh_token_error_key=refresh_token_error_key, 2856 refresh_token_error_values=refresh_token_error_values, 2857 ) 2858 2859 @staticmethod 2860 def _get_refresh_token_error_information( 2861 model: OAuthAuthenticatorModel, 2862 ) -> Tuple[Tuple[int, ...], str, Tuple[str, ...]]: 2863 """ 2864 In a previous version of the CDK, the auth error as config_error was only done if a refresh token updater was 2865 defined. As a transition, we added those fields on the OAuthAuthenticatorModel. This method ensures that the 2866 information is defined only once and return the right fields. 2867 """ 2868 refresh_token_updater = model.refresh_token_updater 2869 is_defined_on_refresh_token_updated = refresh_token_updater and ( 2870 refresh_token_updater.refresh_token_error_status_codes 2871 or refresh_token_updater.refresh_token_error_key 2872 or refresh_token_updater.refresh_token_error_values 2873 ) 2874 is_defined_on_oauth_authenticator = ( 2875 model.refresh_token_error_status_codes 2876 or model.refresh_token_error_key 2877 or model.refresh_token_error_values 2878 ) 2879 if is_defined_on_refresh_token_updated and is_defined_on_oauth_authenticator: 2880 raise ValueError( 2881 "refresh_token_error should either be defined on the OAuthAuthenticatorModel or the RefreshTokenUpdaterModel, not both" 2882 ) 2883 2884 if is_defined_on_refresh_token_updated: 2885 not_optional_refresh_token_updater: RefreshTokenUpdaterModel = refresh_token_updater # type: ignore # we know from the condition that this is not None 2886 return ( 2887 tuple(not_optional_refresh_token_updater.refresh_token_error_status_codes) 2888 if not_optional_refresh_token_updater.refresh_token_error_status_codes 2889 else (), 2890 not_optional_refresh_token_updater.refresh_token_error_key or "", 2891 tuple(not_optional_refresh_token_updater.refresh_token_error_values) 2892 if not_optional_refresh_token_updater.refresh_token_error_values 2893 else (), 2894 ) 2895 elif is_defined_on_oauth_authenticator: 2896 return ( 2897 tuple(model.refresh_token_error_status_codes) 2898 if model.refresh_token_error_status_codes 2899 else (), 2900 model.refresh_token_error_key or "", 2901 tuple(model.refresh_token_error_values) if model.refresh_token_error_values else (), 2902 ) 2903 2904 # returning default values we think cover most cases 2905 return (400,), "error", ("invalid_grant", "invalid_permissions") 2906 2907 def create_offset_increment( 2908 self, 2909 model: OffsetIncrementModel, 2910 config: Config, 2911 decoder: Decoder, 2912 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2913 **kwargs: Any, 2914 ) -> OffsetIncrement: 2915 if isinstance(decoder, PaginationDecoderDecorator): 2916 inner_decoder = decoder.decoder 2917 else: 2918 inner_decoder = decoder 2919 decoder = PaginationDecoderDecorator(decoder=decoder) 2920 2921 if self._is_supported_decoder_for_pagination(inner_decoder): 2922 decoder_to_use = decoder 2923 else: 2924 raise ValueError( 2925 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2926 ) 2927 2928 # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever) 2929 # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the 2930 # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing 2931 # behavior by having two separate extractors with identical behavior since they use the same extractor model. 2932 # When we have more time to investigate we can look into reusing the same component. 2933 extractor = ( 2934 self._create_component_from_model( 2935 model=extractor_model, config=config, decoder=decoder_to_use 2936 ) 2937 if extractor_model 2938 else None 2939 ) 2940 2941 # Pydantic v1 Union type coercion can convert int to string depending on Union order. 2942 # If page_size is a string that represents an integer (not an interpolation), convert it back. 2943 page_size = model.page_size 2944 if isinstance(page_size, str) and page_size.isdigit(): 2945 page_size = int(page_size) 2946 2947 return OffsetIncrement( 2948 page_size=page_size, 2949 config=config, 2950 decoder=decoder_to_use, 2951 extractor=extractor, 2952 inject_on_first_request=model.inject_on_first_request or False, 2953 parameters=model.parameters or {}, 2954 ) 2955 2956 @staticmethod 2957 def create_page_increment( 2958 model: PageIncrementModel, config: Config, **kwargs: Any 2959 ) -> PageIncrement: 2960 # Pydantic v1 Union type coercion can convert int to string depending on Union order. 2961 # If page_size is a string that represents an integer (not an interpolation), convert it back. 2962 page_size = model.page_size 2963 if isinstance(page_size, str) and page_size.isdigit(): 2964 page_size = int(page_size) 2965 2966 return PageIncrement( 2967 page_size=page_size, 2968 config=config, 2969 start_from_page=model.start_from_page or 0, 2970 inject_on_first_request=model.inject_on_first_request or False, 2971 parameters=model.parameters or {}, 2972 ) 2973 2974 def create_parent_stream_config( 2975 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 2976 ) -> ParentStreamConfig: 2977 declarative_stream = self._create_component_from_model( 2978 model.stream, 2979 config=config, 2980 is_parent=True, 2981 **kwargs, 2982 ) 2983 request_option = ( 2984 self._create_component_from_model(model.request_option, config=config) 2985 if model.request_option 2986 else None 2987 ) 2988 2989 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2990 raise ValueError( 2991 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2992 ) 2993 2994 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2995 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2996 ) 2997 2998 return ParentStreamConfig( 2999 parent_key=model.parent_key, 3000 request_option=request_option, 3001 stream=declarative_stream, 3002 partition_field=model.partition_field, 3003 config=config, 3004 incremental_dependency=model.incremental_dependency or False, 3005 parameters=model.parameters or {}, 3006 extra_fields=model.extra_fields, 3007 lazy_read_pointer=model_lazy_read_pointer, 3008 ) 3009 3010 def create_properties_from_endpoint( 3011 self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any 3012 ) -> PropertiesFromEndpoint: 3013 retriever = self._create_component_from_model( 3014 model=model.retriever, 3015 config=config, 3016 name="dynamic_properties", 3017 primary_key=None, 3018 stream_slicer=None, 3019 transformations=[], 3020 use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different 3021 ) 3022 return PropertiesFromEndpoint( 3023 property_field_path=model.property_field_path, 3024 retriever=retriever, 3025 config=config, 3026 parameters=model.parameters or {}, 3027 ) 3028 3029 def create_property_chunking( 3030 self, model: PropertyChunkingModel, config: Config, **kwargs: Any 3031 ) -> PropertyChunking: 3032 record_merge_strategy = ( 3033 self._create_component_from_model( 3034 model=model.record_merge_strategy, config=config, **kwargs 3035 ) 3036 if model.record_merge_strategy 3037 else None 3038 ) 3039 3040 property_limit_type: PropertyLimitType 3041 match model.property_limit_type: 3042 case PropertyLimitTypeModel.property_count: 3043 property_limit_type = PropertyLimitType.property_count 3044 case PropertyLimitTypeModel.characters: 3045 property_limit_type = PropertyLimitType.characters 3046 case _: 3047 raise ValueError(f"Invalid PropertyLimitType {property_limit_type}") 3048 3049 return PropertyChunking( 3050 property_limit_type=property_limit_type, 3051 property_limit=model.property_limit, 3052 record_merge_strategy=record_merge_strategy, 3053 config=config, 3054 parameters=model.parameters or {}, 3055 ) 3056 3057 def create_query_properties( 3058 self, model: QueryPropertiesModel, config: Config, *, stream_name: str, **kwargs: Any 3059 ) -> QueryProperties: 3060 if isinstance(model.property_list, list): 3061 property_list = model.property_list 3062 else: 3063 property_list = self._create_component_from_model( 3064 model=model.property_list, config=config, **kwargs 3065 ) 3066 3067 property_chunking = ( 3068 self._create_component_from_model( 3069 model=model.property_chunking, config=config, **kwargs 3070 ) 3071 if model.property_chunking 3072 else None 3073 ) 3074 3075 property_selector = ( 3076 self._create_component_from_model( 3077 model=model.property_selector, config=config, stream_name=stream_name, **kwargs 3078 ) 3079 if model.property_selector 3080 else None 3081 ) 3082 3083 return QueryProperties( 3084 property_list=property_list, 3085 always_include_properties=model.always_include_properties, 3086 property_chunking=property_chunking, 3087 property_selector=property_selector, 3088 config=config, 3089 parameters=model.parameters or {}, 3090 ) 3091 3092 def create_json_schema_property_selector( 3093 self, 3094 model: JsonSchemaPropertySelectorModel, 3095 config: Config, 3096 *, 3097 stream_name: str, 3098 **kwargs: Any, 3099 ) -> JsonSchemaPropertySelector: 3100 configured_stream = self._stream_name_to_configured_stream.get(stream_name) 3101 3102 transformations = [] 3103 if model.transformations: 3104 for transformation_model in model.transformations: 3105 transformations.append( 3106 self._create_component_from_model(model=transformation_model, config=config) 3107 ) 3108 3109 return JsonSchemaPropertySelector( 3110 configured_stream=configured_stream, 3111 properties_transformations=transformations, 3112 config=config, 3113 parameters=model.parameters or {}, 3114 ) 3115 3116 @staticmethod 3117 def create_record_filter( 3118 model: RecordFilterModel, config: Config, **kwargs: Any 3119 ) -> RecordFilter: 3120 return RecordFilter( 3121 condition=model.condition or "", config=config, parameters=model.parameters or {} 3122 ) 3123 3124 @staticmethod 3125 def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: 3126 return RequestPath(parameters={}) 3127 3128 @staticmethod 3129 def create_request_option( 3130 model: RequestOptionModel, config: Config, **kwargs: Any 3131 ) -> RequestOption: 3132 inject_into = RequestOptionType(model.inject_into.value) 3133 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 3134 [ 3135 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 3136 for segment in model.field_path 3137 ] 3138 if model.field_path 3139 else None 3140 ) 3141 field_name = ( 3142 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 3143 if model.field_name 3144 else None 3145 ) 3146 return RequestOption( 3147 field_name=field_name, 3148 field_path=field_path, 3149 inject_into=inject_into, 3150 parameters=kwargs.get("parameters", {}), 3151 ) 3152 3153 def create_record_selector( 3154 self, 3155 model: RecordSelectorModel, 3156 config: Config, 3157 *, 3158 name: str, 3159 transformations: List[RecordTransformation] | None = None, 3160 decoder: Decoder | None = None, 3161 client_side_incremental_sync_cursor: Optional[Cursor] = None, 3162 file_uploader: Optional[DefaultFileUploader] = None, 3163 **kwargs: Any, 3164 ) -> RecordSelector: 3165 extractor = self._create_component_from_model( 3166 model=model.extractor, decoder=decoder, config=config 3167 ) 3168 record_filter = ( 3169 self._create_component_from_model(model.record_filter, config=config) 3170 if model.record_filter 3171 else None 3172 ) 3173 3174 transform_before_filtering = ( 3175 False if model.transform_before_filtering is None else model.transform_before_filtering 3176 ) 3177 if client_side_incremental_sync_cursor: 3178 record_filter = ClientSideIncrementalRecordFilterDecorator( 3179 config=config, 3180 parameters=model.parameters, 3181 condition=model.record_filter.condition 3182 if (model.record_filter and hasattr(model.record_filter, "condition")) 3183 else None, 3184 cursor=client_side_incremental_sync_cursor, 3185 ) 3186 transform_before_filtering = ( 3187 True 3188 if model.transform_before_filtering is None 3189 else model.transform_before_filtering 3190 ) 3191 3192 if model.schema_normalization is None: 3193 # default to no schema normalization if not set 3194 model.schema_normalization = SchemaNormalizationModel.None_ 3195 3196 schema_normalization = ( 3197 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 3198 if isinstance(model.schema_normalization, SchemaNormalizationModel) 3199 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 3200 ) 3201 3202 return RecordSelector( 3203 extractor=extractor, 3204 name=name, 3205 config=config, 3206 record_filter=record_filter, 3207 transformations=transformations or [], 3208 file_uploader=file_uploader, 3209 schema_normalization=schema_normalization, 3210 parameters=model.parameters or {}, 3211 transform_before_filtering=transform_before_filtering, 3212 ) 3213 3214 @staticmethod 3215 def create_remove_fields( 3216 model: RemoveFieldsModel, config: Config, **kwargs: Any 3217 ) -> RemoveFields: 3218 return RemoveFields( 3219 field_pointers=model.field_pointers, condition=model.condition or "", parameters={} 3220 ) 3221 3222 def create_selective_authenticator( 3223 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 3224 ) -> DeclarativeAuthenticator: 3225 authenticators = { 3226 name: self._create_component_from_model(model=auth, config=config) 3227 for name, auth in model.authenticators.items() 3228 } 3229 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 3230 return SelectiveAuthenticator( # type: ignore[abstract] 3231 config=config, 3232 authenticators=authenticators, 3233 authenticator_selection_path=model.authenticator_selection_path, 3234 **kwargs, 3235 ) 3236 3237 @staticmethod 3238 def create_legacy_session_token_authenticator( 3239 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 3240 ) -> LegacySessionTokenAuthenticator: 3241 return LegacySessionTokenAuthenticator( 3242 api_url=url_base, 3243 header=model.header, 3244 login_url=model.login_url, 3245 password=model.password or "", 3246 session_token=model.session_token or "", 3247 session_token_response_key=model.session_token_response_key or "", 3248 username=model.username or "", 3249 validate_session_url=model.validate_session_url, 3250 config=config, 3251 parameters=model.parameters or {}, 3252 ) 3253 3254 def create_simple_retriever( 3255 self, 3256 model: SimpleRetrieverModel, 3257 config: Config, 3258 *, 3259 name: str, 3260 primary_key: Optional[Union[str, List[str], List[List[str]]]], 3261 request_options_provider: Optional[RequestOptionsProvider] = None, 3262 cursor: Optional[Cursor] = None, 3263 has_stop_condition_cursor: bool = False, 3264 is_client_side_incremental_sync: bool = False, 3265 transformations: List[RecordTransformation], 3266 file_uploader: Optional[DefaultFileUploader] = None, 3267 incremental_sync: Optional[ 3268 Union[IncrementingCountCursorModel, DatetimeBasedCursorModel] 3269 ] = None, 3270 use_cache: Optional[bool] = None, 3271 log_formatter: Optional[Callable[[Response], Any]] = None, 3272 partition_router: Optional[PartitionRouter] = None, 3273 **kwargs: Any, 3274 ) -> SimpleRetriever: 3275 def _get_url(req: Requester) -> str: 3276 """ 3277 Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever. 3278 This is needed because the URL is not set until the requester is created. 3279 """ 3280 3281 _url: str = ( 3282 model.requester.url 3283 if hasattr(model.requester, "url") and model.requester.url is not None 3284 else req.get_url(stream_state=None, stream_slice=None, next_page_token=None) 3285 ) 3286 _url_base: str = ( 3287 model.requester.url_base 3288 if hasattr(model.requester, "url_base") and model.requester.url_base is not None 3289 else req.get_url_base(stream_state=None, stream_slice=None, next_page_token=None) 3290 ) 3291 3292 return _url or _url_base 3293 3294 if cursor is None: 3295 cursor = FinalStateCursor(name, None, self._message_repository) 3296 3297 decoder = ( 3298 self._create_component_from_model(model=model.decoder, config=config) 3299 if model.decoder 3300 else JsonDecoder(parameters={}) 3301 ) 3302 record_selector = self._create_component_from_model( 3303 model=model.record_selector, 3304 name=name, 3305 config=config, 3306 decoder=decoder, 3307 transformations=transformations, 3308 client_side_incremental_sync_cursor=cursor if is_client_side_incremental_sync else None, 3309 file_uploader=file_uploader, 3310 ) 3311 3312 query_properties: Optional[QueryProperties] = None 3313 query_properties_key: Optional[str] = None 3314 self._ensure_query_properties_to_model(model.requester) 3315 if self._has_query_properties_in_request_parameters(model.requester): 3316 # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple 3317 # places instead of default to request_parameters which isn't clearly documented 3318 if ( 3319 hasattr(model.requester, "fetch_properties_from_endpoint") 3320 and model.requester.fetch_properties_from_endpoint 3321 ): 3322 raise ValueError( 3323 f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters" 3324 ) 3325 3326 query_properties_definitions = [] 3327 for key, request_parameter in model.requester.request_parameters.items(): # type: ignore # request_parameters is already validated to be a Mapping using _has_query_properties_in_request_parameters() 3328 if isinstance(request_parameter, QueryPropertiesModel): 3329 query_properties_key = key 3330 query_properties_definitions.append(request_parameter) 3331 3332 if len(query_properties_definitions) > 1: 3333 raise ValueError( 3334 f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages" 3335 ) 3336 3337 if len(query_properties_definitions) == 1: 3338 query_properties = self._create_component_from_model( 3339 model=query_properties_definitions[0], stream_name=name, config=config 3340 ) 3341 3342 # Removes QueryProperties components from the interpolated mappings because it has been designed 3343 # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly 3344 # instead of through jinja interpolation 3345 if hasattr(model.requester, "request_parameters") and isinstance( 3346 model.requester.request_parameters, Mapping 3347 ): 3348 model.requester.request_parameters = self._remove_query_properties( 3349 model.requester.request_parameters 3350 ) 3351 elif ( 3352 hasattr(model.requester, "fetch_properties_from_endpoint") 3353 and model.requester.fetch_properties_from_endpoint 3354 ): 3355 # todo: Deprecate this condition once dependent connectors migrate to query_properties 3356 query_properties_definition = QueryPropertiesModel( 3357 type="QueryProperties", 3358 property_list=model.requester.fetch_properties_from_endpoint, 3359 always_include_properties=None, 3360 property_chunking=None, 3361 ) # type: ignore # $parameters has a default value 3362 3363 query_properties = self.create_query_properties( 3364 model=query_properties_definition, 3365 stream_name=name, 3366 config=config, 3367 ) 3368 elif hasattr(model.requester, "query_properties") and model.requester.query_properties: 3369 query_properties = self.create_query_properties( 3370 model=model.requester.query_properties, 3371 stream_name=name, 3372 config=config, 3373 ) 3374 3375 requester = self._create_component_from_model( 3376 model=model.requester, 3377 decoder=decoder, 3378 name=name, 3379 query_properties_key=query_properties_key, 3380 use_cache=use_cache, 3381 config=config, 3382 ) 3383 3384 if not request_options_provider: 3385 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 3386 if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance( 3387 partition_router, PartitionRouter 3388 ): 3389 request_options_provider = partition_router 3390 3391 paginator = ( 3392 self._create_component_from_model( 3393 model=model.paginator, 3394 config=config, 3395 url_base=_get_url(requester), 3396 extractor_model=model.record_selector.extractor, 3397 decoder=decoder, 3398 cursor_used_for_stop_condition=cursor if has_stop_condition_cursor else None, 3399 ) 3400 if model.paginator 3401 else NoPagination(parameters={}) 3402 ) 3403 3404 ignore_stream_slicer_parameters_on_paginated_requests = ( 3405 model.ignore_stream_slicer_parameters_on_paginated_requests or False 3406 ) 3407 3408 if ( 3409 model.partition_router 3410 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 3411 and not bool(self._connector_state_manager.get_stream_state(name, None)) 3412 and any( 3413 parent_stream_config.lazy_read_pointer 3414 for parent_stream_config in model.partition_router.parent_stream_configs 3415 ) 3416 ): 3417 if incremental_sync: 3418 if incremental_sync.type != "DatetimeBasedCursor": 3419 raise ValueError( 3420 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 3421 ) 3422 3423 elif incremental_sync.step or incremental_sync.cursor_granularity: 3424 raise ValueError( 3425 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 3426 ) 3427 3428 if model.decoder and model.decoder.type != "JsonDecoder": 3429 raise ValueError( 3430 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 3431 ) 3432 3433 return LazySimpleRetriever( 3434 name=name, 3435 paginator=paginator, 3436 primary_key=primary_key, 3437 requester=requester, 3438 record_selector=record_selector, 3439 stream_slicer=_NO_STREAM_SLICING, 3440 request_option_provider=request_options_provider, 3441 config=config, 3442 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3443 parameters=model.parameters or {}, 3444 ) 3445 3446 if ( 3447 model.record_selector.record_filter 3448 and model.pagination_reset 3449 and model.pagination_reset.limits 3450 ): 3451 raise ValueError("PaginationResetLimits are not supported while having record filter.") 3452 3453 return SimpleRetriever( 3454 name=name, 3455 paginator=paginator, 3456 primary_key=primary_key, 3457 requester=requester, 3458 record_selector=record_selector, 3459 stream_slicer=_NO_STREAM_SLICING, 3460 request_option_provider=request_options_provider, 3461 config=config, 3462 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3463 additional_query_properties=query_properties, 3464 log_formatter=self._get_log_formatter(log_formatter, name), 3465 pagination_tracker_factory=self._create_pagination_tracker_factory( 3466 model.pagination_reset, cursor 3467 ), 3468 parameters=model.parameters or {}, 3469 ) 3470 3471 def _create_pagination_tracker_factory( 3472 self, model: Optional[PaginationResetModel], cursor: Cursor 3473 ) -> Callable[[], PaginationTracker]: 3474 if model is None: 3475 return lambda: PaginationTracker() 3476 3477 # Until we figure out a way to use any cursor for PaginationTracker, we will have to have this cursor selector logic 3478 cursor_factory: Callable[[], Optional[ConcurrentCursor]] = lambda: None 3479 if model.action == PaginationResetActionModel.RESET: 3480 # in that case, we will let cursor_factory to return None even if the stream has a cursor 3481 pass 3482 elif model.action == PaginationResetActionModel.SPLIT_USING_CURSOR: 3483 if isinstance(cursor, ConcurrentCursor): 3484 cursor_factory = lambda: cursor.copy_without_state() # type: ignore # the if condition validates that it is a ConcurrentCursor 3485 elif isinstance(cursor, ConcurrentPerPartitionCursor): 3486 cursor_factory = lambda: cursor._cursor_factory.create( # type: ignore # if this becomes a problem, we would need to extract the cursor_factory instantiation logic and make it accessible here 3487 {}, datetime.timedelta(0) 3488 ) 3489 elif not isinstance(cursor, FinalStateCursor): 3490 LOGGER.warning( 3491 "Unknown cursor for PaginationTracker. Pagination resets might not work properly" 3492 ) 3493 else: 3494 raise ValueError(f"Unknown PaginationReset action: {model.action}") 3495 3496 limit = model.limits.number_of_records if model and model.limits else None 3497 return lambda: PaginationTracker(cursor_factory(), limit) 3498 3499 def _get_log_formatter( 3500 self, log_formatter: Callable[[Response], Any] | None, name: str 3501 ) -> Callable[[Response], Any] | None: 3502 if self._should_limit_slices_fetched(): 3503 return ( 3504 ( 3505 lambda response: format_http_message( 3506 response, 3507 f"Stream '{name}' request", 3508 f"Request performed in order to extract records for stream '{name}'", 3509 name, 3510 ) 3511 ) 3512 if not log_formatter 3513 else log_formatter 3514 ) 3515 return None 3516 3517 def _should_limit_slices_fetched(self) -> bool: 3518 """ 3519 Returns True if the number of slices fetched should be limited, False otherwise. 3520 This is used to limit the number of slices fetched during tests. 3521 """ 3522 return bool(self._limit_slices_fetched or self._emit_connector_builder_messages) 3523 3524 @staticmethod 3525 def _has_query_properties_in_request_parameters( 3526 requester: Union[HttpRequesterModel, CustomRequesterModel], 3527 ) -> bool: 3528 if not hasattr(requester, "request_parameters"): 3529 return False 3530 request_parameters = requester.request_parameters 3531 if request_parameters and isinstance(request_parameters, Mapping): 3532 for request_parameter in request_parameters.values(): 3533 if isinstance(request_parameter, QueryPropertiesModel): 3534 return True 3535 return False 3536 3537 @staticmethod 3538 def _remove_query_properties( 3539 request_parameters: Mapping[str, Union[str, QueryPropertiesModel]], 3540 ) -> Mapping[str, str]: 3541 return { 3542 parameter_field: request_parameter 3543 for parameter_field, request_parameter in request_parameters.items() 3544 if not isinstance(request_parameter, QueryPropertiesModel) 3545 } 3546 3547 def create_state_delegating_stream( 3548 self, 3549 model: StateDelegatingStreamModel, 3550 config: Config, 3551 has_parent_state: Optional[bool] = None, 3552 **kwargs: Any, 3553 ) -> DefaultStream: 3554 if ( 3555 model.full_refresh_stream.name != model.name 3556 or model.name != model.incremental_stream.name 3557 ): 3558 raise ValueError( 3559 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 3560 ) 3561 3562 stream_model = self._get_state_delegating_stream_model( 3563 False if has_parent_state is None else has_parent_state, model 3564 ) 3565 3566 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel 3567 3568 def _get_state_delegating_stream_model( 3569 self, has_parent_state: bool, model: StateDelegatingStreamModel 3570 ) -> DeclarativeStreamModel: 3571 return ( 3572 model.incremental_stream 3573 if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state 3574 else model.full_refresh_stream 3575 ) 3576 3577 def _create_async_job_status_mapping( 3578 self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any 3579 ) -> Mapping[str, AsyncJobStatus]: 3580 api_status_to_cdk_status = {} 3581 for cdk_status, api_statuses in model.dict().items(): 3582 if cdk_status == "type": 3583 # This is an element of the dict because of the typing of the CDK but it is not a CDK status 3584 continue 3585 3586 for status in api_statuses: 3587 if status in api_status_to_cdk_status: 3588 raise ValueError( 3589 f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once" 3590 ) 3591 api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status) 3592 return api_status_to_cdk_status 3593 3594 def _get_async_job_status(self, status: str) -> AsyncJobStatus: 3595 match status: 3596 case "running": 3597 return AsyncJobStatus.RUNNING 3598 case "completed": 3599 return AsyncJobStatus.COMPLETED 3600 case "failed": 3601 return AsyncJobStatus.FAILED 3602 case "timeout": 3603 return AsyncJobStatus.TIMED_OUT 3604 case _: 3605 raise ValueError(f"Unsupported CDK status {status}") 3606 3607 def create_async_retriever( 3608 self, 3609 model: AsyncRetrieverModel, 3610 config: Config, 3611 *, 3612 name: str, 3613 primary_key: Optional[ 3614 Union[str, List[str], List[List[str]]] 3615 ], # this seems to be needed to match create_simple_retriever 3616 stream_slicer: Optional[StreamSlicer], 3617 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3618 transformations: List[RecordTransformation], 3619 **kwargs: Any, 3620 ) -> AsyncRetriever: 3621 if model.download_target_requester and not model.download_target_extractor: 3622 raise ValueError( 3623 f"`download_target_extractor` required if using a `download_target_requester`" 3624 ) 3625 3626 def _get_download_retriever( 3627 requester: Requester, extractor: RecordExtractor, _decoder: Decoder 3628 ) -> SimpleRetriever: 3629 # We create a record selector for the download retriever 3630 # with no schema normalization and no transformations, neither record filter 3631 # as all this occurs in the record_selector of the AsyncRetriever 3632 record_selector = RecordSelector( 3633 extractor=extractor, 3634 name=name, 3635 record_filter=None, 3636 transformations=[], 3637 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 3638 config=config, 3639 parameters={}, 3640 ) 3641 paginator = ( 3642 self._create_component_from_model( 3643 model=model.download_paginator, 3644 decoder=_decoder, 3645 config=config, 3646 url_base="", 3647 ) 3648 if model.download_paginator 3649 else NoPagination(parameters={}) 3650 ) 3651 3652 return SimpleRetriever( 3653 requester=requester, 3654 record_selector=record_selector, 3655 primary_key=None, 3656 name=name, 3657 paginator=paginator, 3658 config=config, 3659 parameters={}, 3660 log_formatter=self._get_log_formatter(None, name), 3661 ) 3662 3663 def _get_job_timeout() -> datetime.timedelta: 3664 user_defined_timeout: Optional[int] = ( 3665 int( 3666 InterpolatedString.create( 3667 str(model.polling_job_timeout), 3668 parameters={}, 3669 ).eval(config) 3670 ) 3671 if model.polling_job_timeout 3672 else None 3673 ) 3674 3675 # check for user defined timeout during the test read or 15 minutes 3676 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 3677 # default value for non-connector builder is 60 minutes. 3678 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 3679 3680 return ( 3681 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 3682 ) 3683 3684 decoder = ( 3685 self._create_component_from_model(model=model.decoder, config=config) 3686 if model.decoder 3687 else JsonDecoder(parameters={}) 3688 ) 3689 record_selector = self._create_component_from_model( 3690 model=model.record_selector, 3691 config=config, 3692 decoder=decoder, 3693 name=name, 3694 transformations=transformations, 3695 client_side_incremental_sync=client_side_incremental_sync, 3696 ) 3697 3698 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 3699 if self._should_limit_slices_fetched(): 3700 stream_slicer = cast( 3701 StreamSlicer, 3702 StreamSlicerTestReadDecorator( 3703 wrapped_slicer=stream_slicer, 3704 maximum_number_of_slices=self._limit_slices_fetched or 5, 3705 ), 3706 ) 3707 3708 creation_requester = self._create_component_from_model( 3709 model=model.creation_requester, 3710 decoder=decoder, 3711 config=config, 3712 name=f"job creation - {name}", 3713 ) 3714 polling_requester = self._create_component_from_model( 3715 model=model.polling_requester, 3716 decoder=decoder, 3717 config=config, 3718 name=f"job polling - {name}", 3719 ) 3720 job_download_components_name = f"job download - {name}" 3721 download_decoder = ( 3722 self._create_component_from_model(model=model.download_decoder, config=config) 3723 if model.download_decoder 3724 else JsonDecoder(parameters={}) 3725 ) 3726 download_extractor = ( 3727 self._create_component_from_model( 3728 model=model.download_extractor, 3729 config=config, 3730 decoder=download_decoder, 3731 parameters=model.parameters, 3732 ) 3733 if model.download_extractor 3734 else DpathExtractor( 3735 [], 3736 config=config, 3737 decoder=download_decoder, 3738 parameters=model.parameters or {}, 3739 ) 3740 ) 3741 download_requester = self._create_component_from_model( 3742 model=model.download_requester, 3743 decoder=download_decoder, 3744 config=config, 3745 name=job_download_components_name, 3746 ) 3747 download_retriever = _get_download_retriever( 3748 download_requester, download_extractor, download_decoder 3749 ) 3750 abort_requester = ( 3751 self._create_component_from_model( 3752 model=model.abort_requester, 3753 decoder=decoder, 3754 config=config, 3755 name=f"job abort - {name}", 3756 ) 3757 if model.abort_requester 3758 else None 3759 ) 3760 delete_requester = ( 3761 self._create_component_from_model( 3762 model=model.delete_requester, 3763 decoder=decoder, 3764 config=config, 3765 name=f"job delete - {name}", 3766 ) 3767 if model.delete_requester 3768 else None 3769 ) 3770 download_target_requester = ( 3771 self._create_component_from_model( 3772 model=model.download_target_requester, 3773 decoder=decoder, 3774 config=config, 3775 name=f"job extract_url - {name}", 3776 ) 3777 if model.download_target_requester 3778 else None 3779 ) 3780 status_extractor = self._create_component_from_model( 3781 model=model.status_extractor, decoder=decoder, config=config, name=name 3782 ) 3783 download_target_extractor = ( 3784 self._create_component_from_model( 3785 model=model.download_target_extractor, 3786 decoder=decoder, 3787 config=config, 3788 name=name, 3789 ) 3790 if model.download_target_extractor 3791 else None 3792 ) 3793 3794 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3795 creation_requester=creation_requester, 3796 polling_requester=polling_requester, 3797 download_retriever=download_retriever, 3798 download_target_requester=download_target_requester, 3799 abort_requester=abort_requester, 3800 delete_requester=delete_requester, 3801 status_extractor=status_extractor, 3802 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3803 download_target_extractor=download_target_extractor, 3804 job_timeout=_get_job_timeout(), 3805 ) 3806 3807 async_job_partition_router = AsyncJobPartitionRouter( 3808 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3809 job_repository, 3810 stream_slices, 3811 self._job_tracker, 3812 self._message_repository, 3813 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3814 has_bulk_parent=False, 3815 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3816 # `None` == default retry is set to 3 attempts, under the hood. 3817 job_max_retry=1 if self._emit_connector_builder_messages else None, 3818 ), 3819 stream_slicer=stream_slicer, 3820 config=config, 3821 parameters=model.parameters or {}, 3822 ) 3823 3824 return AsyncRetriever( 3825 record_selector=record_selector, 3826 stream_slicer=async_job_partition_router, 3827 config=config, 3828 parameters=model.parameters or {}, 3829 ) 3830 3831 def create_spec(self, model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3832 config_migrations = [ 3833 self._create_component_from_model(migration, config) 3834 for migration in ( 3835 model.config_normalization_rules.config_migrations 3836 if ( 3837 model.config_normalization_rules 3838 and model.config_normalization_rules.config_migrations 3839 ) 3840 else [] 3841 ) 3842 ] 3843 config_transformations = [ 3844 self._create_component_from_model(transformation, config) 3845 for transformation in ( 3846 model.config_normalization_rules.transformations 3847 if ( 3848 model.config_normalization_rules 3849 and model.config_normalization_rules.transformations 3850 ) 3851 else [] 3852 ) 3853 ] 3854 config_validations = [ 3855 self._create_component_from_model(validation, config) 3856 for validation in ( 3857 model.config_normalization_rules.validations 3858 if ( 3859 model.config_normalization_rules 3860 and model.config_normalization_rules.validations 3861 ) 3862 else [] 3863 ) 3864 ] 3865 3866 return Spec( 3867 connection_specification=model.connection_specification, 3868 documentation_url=model.documentation_url, 3869 advanced_auth=model.advanced_auth, 3870 parameters={}, 3871 config_migrations=config_migrations, 3872 config_transformations=config_transformations, 3873 config_validations=config_validations, 3874 ) 3875 3876 def create_substream_partition_router( 3877 self, 3878 model: SubstreamPartitionRouterModel, 3879 config: Config, 3880 *, 3881 stream_name: str, 3882 **kwargs: Any, 3883 ) -> SubstreamPartitionRouter: 3884 parent_stream_configs = [] 3885 if model.parent_stream_configs: 3886 parent_stream_configs.extend( 3887 [ 3888 self.create_parent_stream_config_with_substream_wrapper( 3889 model=parent_stream_config, config=config, stream_name=stream_name, **kwargs 3890 ) 3891 for parent_stream_config in model.parent_stream_configs 3892 ] 3893 ) 3894 3895 return SubstreamPartitionRouter( 3896 parent_stream_configs=parent_stream_configs, 3897 parameters=model.parameters or {}, 3898 config=config, 3899 ) 3900 3901 def create_parent_stream_config_with_substream_wrapper( 3902 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 3903 ) -> Any: 3904 # getting the parent state 3905 child_state = self._connector_state_manager.get_stream_state(stream_name, None) 3906 3907 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3908 has_parent_state = bool( 3909 self._connector_state_manager.get_stream_state(stream_name, None) 3910 if model.incremental_dependency 3911 else False 3912 ) 3913 connector_state_manager = self._instantiate_parent_stream_state_manager( 3914 child_state, config, model, has_parent_state 3915 ) 3916 3917 substream_factory = ModelToComponentFactory( 3918 connector_state_manager=connector_state_manager, 3919 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3920 limit_slices_fetched=self._limit_slices_fetched, 3921 emit_connector_builder_messages=self._emit_connector_builder_messages, 3922 disable_retries=self._disable_retries, 3923 disable_cache=self._disable_cache, 3924 message_repository=StateFilteringMessageRepository( 3925 LogAppenderMessageRepositoryDecorator( 3926 { 3927 "airbyte_cdk": {"stream": {"is_substream": True}}, 3928 "http": {"is_auxiliary": True}, 3929 }, 3930 self._message_repository, 3931 self._evaluate_log_level(self._emit_connector_builder_messages), 3932 ), 3933 ), 3934 api_budget=self._api_budget, 3935 ) 3936 3937 return substream_factory.create_parent_stream_config( 3938 model=model, config=config, stream_name=stream_name, **kwargs 3939 ) 3940 3941 def _instantiate_parent_stream_state_manager( 3942 self, 3943 child_state: MutableMapping[str, Any], 3944 config: Config, 3945 model: ParentStreamConfigModel, 3946 has_parent_state: bool, 3947 ) -> ConnectorStateManager: 3948 """ 3949 With DefaultStream, the state needs to be provided during __init__ of the cursor as opposed to the 3950 `set_initial_state` flow that existed for the declarative cursors. This state is taken from 3951 self._connector_state_manager.get_stream_state (`self` being a newly created ModelToComponentFactory to account 3952 for the MessageRepository being different). So we need to pass a ConnectorStateManager to the 3953 ModelToComponentFactory that has the parent states. This method populates this if there is a child state and if 3954 incremental_dependency is set. 3955 """ 3956 if model.incremental_dependency and child_state: 3957 parent_stream_name = model.stream.name or "" 3958 parent_state = ConcurrentPerPartitionCursor.get_parent_state( 3959 child_state, parent_stream_name 3960 ) 3961 3962 if not parent_state: 3963 # there are two migration cases: state value from child stream or from global state 3964 parent_state = ConcurrentPerPartitionCursor.get_global_state( 3965 child_state, parent_stream_name 3966 ) 3967 3968 if not parent_state and not isinstance(parent_state, dict): 3969 cursor_values = child_state.values() 3970 if cursor_values and len(cursor_values) == 1: 3971 # We assume the child state is a pair `{<cursor_field>: <cursor_value>}` and we will use the 3972 # cursor value as a parent state. 3973 incremental_sync_model: Union[ 3974 DatetimeBasedCursorModel, 3975 IncrementingCountCursorModel, 3976 ] = ( 3977 model.stream.incremental_sync # type: ignore # if we are there, it is because there is incremental_dependency and therefore there is an incremental_sync on the parent stream 3978 if isinstance(model.stream, DeclarativeStreamModel) 3979 else self._get_state_delegating_stream_model( 3980 has_parent_state, model.stream 3981 ).incremental_sync 3982 ) 3983 cursor_field = InterpolatedString.create( 3984 incremental_sync_model.cursor_field, 3985 parameters=incremental_sync_model.parameters or {}, 3986 ).eval(config) 3987 parent_state = AirbyteStateMessage( 3988 type=AirbyteStateType.STREAM, 3989 stream=AirbyteStreamState( 3990 stream_descriptor=StreamDescriptor( 3991 name=parent_stream_name, namespace=None 3992 ), 3993 stream_state=AirbyteStateBlob( 3994 {cursor_field: list(cursor_values)[0]} 3995 ), 3996 ), 3997 ) 3998 return ConnectorStateManager([parent_state] if parent_state else []) 3999 4000 return ConnectorStateManager([]) 4001 4002 @staticmethod 4003 def create_wait_time_from_header( 4004 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 4005 ) -> WaitTimeFromHeaderBackoffStrategy: 4006 return WaitTimeFromHeaderBackoffStrategy( 4007 header=model.header, 4008 parameters=model.parameters or {}, 4009 config=config, 4010 regex=model.regex, 4011 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 4012 if model.max_waiting_time_in_seconds is not None 4013 else None, 4014 ) 4015 4016 @staticmethod 4017 def create_wait_until_time_from_header( 4018 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 4019 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 4020 return WaitUntilTimeFromHeaderBackoffStrategy( 4021 header=model.header, 4022 parameters=model.parameters or {}, 4023 config=config, 4024 min_wait=model.min_wait, 4025 regex=model.regex, 4026 ) 4027 4028 def get_message_repository(self) -> MessageRepository: 4029 return self._message_repository 4030 4031 def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: 4032 return Level.DEBUG if emit_connector_builder_messages else Level.INFO 4033 4034 @staticmethod 4035 def create_components_mapping_definition( 4036 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 4037 ) -> ComponentMappingDefinition: 4038 interpolated_value = InterpolatedString.create( 4039 model.value, parameters=model.parameters or {} 4040 ) 4041 field_path = [ 4042 InterpolatedString.create(path, parameters=model.parameters or {}) 4043 for path in model.field_path 4044 ] 4045 return ComponentMappingDefinition( 4046 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 4047 value=interpolated_value, 4048 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 4049 create_or_update=model.create_or_update, 4050 condition=model.condition, 4051 parameters=model.parameters or {}, 4052 ) 4053 4054 def create_http_components_resolver( 4055 self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None 4056 ) -> Any: 4057 retriever = self._create_component_from_model( 4058 model=model.retriever, 4059 config=config, 4060 name=f"{stream_name if stream_name else '__http_components_resolver'}", 4061 primary_key=None, 4062 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 4063 transformations=[], 4064 ) 4065 4066 components_mapping = [] 4067 for component_mapping_definition_model in model.components_mapping: 4068 if component_mapping_definition_model.condition: 4069 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 4070 components_mapping.append( 4071 self._create_component_from_model( 4072 model=component_mapping_definition_model, 4073 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4074 component_mapping_definition_model.value_type 4075 ), 4076 config=config, 4077 ) 4078 ) 4079 4080 return HttpComponentsResolver( 4081 retriever=retriever, 4082 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 4083 config=config, 4084 components_mapping=components_mapping, 4085 parameters=model.parameters or {}, 4086 ) 4087 4088 @staticmethod 4089 def create_stream_config( 4090 model: StreamConfigModel, config: Config, **kwargs: Any 4091 ) -> StreamConfig: 4092 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 4093 [x for x in model.configs_pointer] if model.configs_pointer else [] 4094 ) 4095 4096 return StreamConfig( 4097 configs_pointer=model_configs_pointer, 4098 default_values=model.default_values, 4099 parameters=model.parameters or {}, 4100 ) 4101 4102 def create_config_components_resolver( 4103 self, 4104 model: ConfigComponentsResolverModel, 4105 config: Config, 4106 ) -> Any: 4107 model_stream_configs = ( 4108 model.stream_config if isinstance(model.stream_config, list) else [model.stream_config] 4109 ) 4110 4111 stream_configs = [ 4112 self._create_component_from_model( 4113 stream_config, config=config, parameters=model.parameters or {} 4114 ) 4115 for stream_config in model_stream_configs 4116 ] 4117 4118 components_mapping = [ 4119 self._create_component_from_model( 4120 model=components_mapping_definition_model, 4121 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4122 components_mapping_definition_model.value_type 4123 ), 4124 config=config, 4125 parameters=model.parameters, 4126 ) 4127 for components_mapping_definition_model in model.components_mapping 4128 ] 4129 4130 return ConfigComponentsResolver( 4131 stream_configs=stream_configs, 4132 config=config, 4133 components_mapping=components_mapping, 4134 parameters=model.parameters or {}, 4135 ) 4136 4137 def create_parametrized_components_resolver( 4138 self, 4139 model: ParametrizedComponentsResolverModel, 4140 config: Config, 4141 ) -> ParametrizedComponentsResolver: 4142 stream_parameters = StreamParametersDefinition( 4143 list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream 4144 ) 4145 4146 components_mapping = [] 4147 for components_mapping_definition_model in model.components_mapping: 4148 if components_mapping_definition_model.condition: 4149 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 4150 components_mapping.append( 4151 self._create_component_from_model( 4152 model=components_mapping_definition_model, 4153 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4154 components_mapping_definition_model.value_type 4155 ), 4156 config=config, 4157 ) 4158 ) 4159 return ParametrizedComponentsResolver( 4160 stream_parameters=stream_parameters, 4161 config=config, 4162 components_mapping=components_mapping, 4163 parameters=model.parameters or {}, 4164 ) 4165 4166 _UNSUPPORTED_DECODER_ERROR = ( 4167 "Specified decoder of {decoder_type} is not supported for pagination." 4168 "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead." 4169 "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`." 4170 ) 4171 4172 def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool: 4173 if isinstance(decoder, (JsonDecoder, XmlDecoder)): 4174 return True 4175 elif isinstance(decoder, CompositeRawDecoder): 4176 return self._is_supported_parser_for_pagination(decoder.parser) 4177 else: 4178 return False 4179 4180 def _is_supported_parser_for_pagination(self, parser: Parser) -> bool: 4181 if isinstance(parser, JsonParser): 4182 return True 4183 elif isinstance(parser, GzipParser): 4184 return isinstance(parser.inner_parser, JsonParser) 4185 else: 4186 return False 4187 4188 def create_http_api_budget( 4189 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 4190 ) -> HttpAPIBudget: 4191 policies = [ 4192 self._create_component_from_model(model=policy, config=config) 4193 for policy in model.policies 4194 ] 4195 4196 return HttpAPIBudget( 4197 policies=policies, 4198 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 4199 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 4200 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 4201 ) 4202 4203 def create_fixed_window_call_rate_policy( 4204 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 4205 ) -> FixedWindowCallRatePolicy: 4206 matchers = [ 4207 self._create_component_from_model(model=matcher, config=config) 4208 for matcher in model.matchers 4209 ] 4210 4211 # Set the initial reset timestamp to 10 days from now. 4212 # This value will be updated by the first request. 4213 return FixedWindowCallRatePolicy( 4214 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 4215 period=parse_duration(model.period), 4216 call_limit=model.call_limit, 4217 matchers=matchers, 4218 ) 4219 4220 def create_file_uploader( 4221 self, model: FileUploaderModel, config: Config, **kwargs: Any 4222 ) -> FileUploader: 4223 name = "File Uploader" 4224 requester = self._create_component_from_model( 4225 model=model.requester, 4226 config=config, 4227 name=name, 4228 **kwargs, 4229 ) 4230 download_target_extractor = self._create_component_from_model( 4231 model=model.download_target_extractor, 4232 config=config, 4233 name=name, 4234 **kwargs, 4235 ) 4236 emit_connector_builder_messages = self._emit_connector_builder_messages 4237 file_uploader = DefaultFileUploader( 4238 requester=requester, 4239 download_target_extractor=download_target_extractor, 4240 config=config, 4241 file_writer=NoopFileWriter() 4242 if emit_connector_builder_messages 4243 else LocalFileSystemFileWriter(), 4244 parameters=model.parameters or {}, 4245 filename_extractor=model.filename_extractor if model.filename_extractor else None, 4246 ) 4247 4248 return ( 4249 ConnectorBuilderFileUploader(file_uploader) 4250 if emit_connector_builder_messages 4251 else file_uploader 4252 ) 4253 4254 def create_moving_window_call_rate_policy( 4255 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 4256 ) -> MovingWindowCallRatePolicy: 4257 rates = [ 4258 self._create_component_from_model(model=rate, config=config) for rate in model.rates 4259 ] 4260 matchers = [ 4261 self._create_component_from_model(model=matcher, config=config) 4262 for matcher in model.matchers 4263 ] 4264 return MovingWindowCallRatePolicy( 4265 rates=rates, 4266 matchers=matchers, 4267 ) 4268 4269 def create_unlimited_call_rate_policy( 4270 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 4271 ) -> UnlimitedCallRatePolicy: 4272 matchers = [ 4273 self._create_component_from_model(model=matcher, config=config) 4274 for matcher in model.matchers 4275 ] 4276 4277 return UnlimitedCallRatePolicy( 4278 matchers=matchers, 4279 ) 4280 4281 def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate: 4282 interpolated_limit = InterpolatedString.create(str(model.limit), parameters={}) 4283 return Rate( 4284 limit=int(interpolated_limit.eval(config=config)), 4285 interval=parse_duration(model.interval), 4286 ) 4287 4288 def create_http_request_matcher( 4289 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 4290 ) -> HttpRequestRegexMatcher: 4291 return HttpRequestRegexMatcher( 4292 method=model.method, 4293 url_base=model.url_base, 4294 url_path_pattern=model.url_path_pattern, 4295 params=model.params, 4296 headers=model.headers, 4297 ) 4298 4299 def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None: 4300 self._api_budget = self.create_component( 4301 model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config 4302 ) 4303 4304 def create_grouping_partition_router( 4305 self, 4306 model: GroupingPartitionRouterModel, 4307 config: Config, 4308 *, 4309 stream_name: str, 4310 **kwargs: Any, 4311 ) -> GroupingPartitionRouter: 4312 underlying_router = self._create_component_from_model( 4313 model=model.underlying_partition_router, 4314 config=config, 4315 stream_name=stream_name, 4316 **kwargs, 4317 ) 4318 if model.group_size < 1: 4319 raise ValueError(f"Group size must be greater than 0, got {model.group_size}") 4320 4321 # Request options in underlying partition routers are not supported for GroupingPartitionRouter 4322 # because they are specific to individual partitions and cannot be aggregated or handled 4323 # when grouping, potentially leading to incorrect API calls. Any request customization 4324 # should be managed at the stream level through the requester's configuration. 4325 if isinstance(underlying_router, SubstreamPartitionRouter): 4326 if any( 4327 parent_config.request_option 4328 for parent_config in underlying_router.parent_stream_configs 4329 ): 4330 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4331 4332 if isinstance(underlying_router, ListPartitionRouter): 4333 if underlying_router.request_option: 4334 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4335 4336 return GroupingPartitionRouter( 4337 group_size=model.group_size, 4338 underlying_partition_router=underlying_router, 4339 deduplicate=model.deduplicate if model.deduplicate is not None else True, 4340 config=config, 4341 ) 4342 4343 def _ensure_query_properties_to_model( 4344 self, requester: Union[HttpRequesterModel, CustomRequesterModel] 4345 ) -> None: 4346 """ 4347 For some reason, it seems like CustomRequesterModel request_parameters stays as dictionaries which means that 4348 the other conditions relying on it being QueryPropertiesModel instead of a dict fail. Here, we migrate them to 4349 proper model. 4350 """ 4351 if not hasattr(requester, "request_parameters"): 4352 return 4353 4354 request_parameters = requester.request_parameters 4355 if request_parameters and isinstance(request_parameters, Dict): 4356 for request_parameter_key in request_parameters.keys(): 4357 request_parameter = request_parameters[request_parameter_key] 4358 if ( 4359 isinstance(request_parameter, Dict) 4360 and request_parameter.get("type") == "QueryProperties" 4361 ): 4362 request_parameters[request_parameter_key] = QueryPropertiesModel.parse_obj( 4363 request_parameter 4364 ) 4365 4366 def _get_catalog_defined_cursor_field( 4367 self, stream_name: str, allow_catalog_defined_cursor_field: bool 4368 ) -> Optional[CursorField]: 4369 if not allow_catalog_defined_cursor_field: 4370 return None 4371 4372 configured_stream = self._stream_name_to_configured_stream.get(stream_name) 4373 4374 # Depending on the operation is being performed, there may not be a configured stream yet. In this 4375 # case we return None which will then use the default cursor field defined on the cursor model. 4376 # We also treat cursor_field: [""] (list with empty string) as no cursor field, since this can 4377 # occur when the platform serializes "no cursor configured" streams incorrectly. 4378 if ( 4379 not configured_stream 4380 or not configured_stream.cursor_field 4381 or not configured_stream.cursor_field[0] 4382 ): 4383 return None 4384 elif len(configured_stream.cursor_field) > 1: 4385 raise ValueError( 4386 f"The `{stream_name}` stream does not support nested cursor_field. Please specify only a single cursor_field for the stream in the configured catalog." 4387 ) 4388 else: 4389 return CursorField( 4390 cursor_field_key=configured_stream.cursor_field[0], 4391 supports_catalog_defined_cursor_field=allow_catalog_defined_cursor_field, 4392 )
672class ModelToComponentFactory: 673 EPOCH_DATETIME_FORMAT = "%s" 674 675 def __init__( 676 self, 677 limit_pages_fetched_per_slice: Optional[int] = None, 678 limit_slices_fetched: Optional[int] = None, 679 emit_connector_builder_messages: bool = False, 680 disable_retries: bool = False, 681 disable_cache: bool = False, 682 message_repository: Optional[MessageRepository] = None, 683 connector_state_manager: Optional[ConnectorStateManager] = None, 684 max_concurrent_async_job_count: Optional[int] = None, 685 configured_catalog: Optional[ConfiguredAirbyteCatalog] = None, 686 api_budget: Optional[APIBudget] = None, 687 ): 688 self._init_mappings() 689 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 690 self._limit_slices_fetched = limit_slices_fetched 691 self._emit_connector_builder_messages = emit_connector_builder_messages 692 self._disable_retries = disable_retries 693 self._disable_cache = disable_cache 694 self._message_repository = message_repository or InMemoryMessageRepository( 695 self._evaluate_log_level(emit_connector_builder_messages) 696 ) 697 self._stream_name_to_configured_stream = self._create_stream_name_to_configured_stream( 698 configured_catalog 699 ) 700 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 701 self._api_budget: Optional[Union[APIBudget]] = api_budget 702 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 703 # placeholder for deprecation warnings 704 self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = [] 705 706 def _init_mappings(self) -> None: 707 self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = { 708 AddedFieldDefinitionModel: self.create_added_field_definition, 709 AddFieldsModel: self.create_add_fields, 710 ApiKeyAuthenticatorModel: self.create_api_key_authenticator, 711 BasicHttpAuthenticatorModel: self.create_basic_http_authenticator, 712 BearerAuthenticatorModel: self.create_bearer_authenticator, 713 CheckStreamModel: self.create_check_stream, 714 DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config, 715 CheckDynamicStreamModel: self.create_check_dynamic_stream, 716 CompositeErrorHandlerModel: self.create_composite_error_handler, 717 ConcurrencyLevelModel: self.create_concurrency_level, 718 ConfigMigrationModel: self.create_config_migration, 719 ConfigAddFieldsModel: self.create_config_add_fields, 720 ConfigRemapFieldModel: self.create_config_remap_field, 721 ConfigRemoveFieldsModel: self.create_config_remove_fields, 722 ConstantBackoffStrategyModel: self.create_constant_backoff_strategy, 723 CsvDecoderModel: self.create_csv_decoder, 724 CursorPaginationModel: self.create_cursor_pagination, 725 CustomAuthenticatorModel: self.create_custom_component, 726 CustomBackoffStrategyModel: self.create_custom_component, 727 CustomDecoderModel: self.create_custom_component, 728 CustomErrorHandlerModel: self.create_custom_component, 729 CustomRecordExtractorModel: self.create_custom_component, 730 CustomRecordFilterModel: self.create_custom_component, 731 CustomRequesterModel: self.create_custom_component, 732 CustomRetrieverModel: self.create_custom_component, 733 CustomSchemaLoader: self.create_custom_component, 734 CustomSchemaNormalizationModel: self.create_custom_component, 735 CustomStateMigration: self.create_custom_component, 736 CustomPaginationStrategyModel: self.create_custom_component, 737 CustomPartitionRouterModel: self.create_custom_component, 738 CustomTransformationModel: self.create_custom_component, 739 CustomValidationStrategyModel: self.create_custom_component, 740 CustomConfigTransformationModel: self.create_custom_component, 741 DeclarativeStreamModel: self.create_default_stream, 742 DefaultErrorHandlerModel: self.create_default_error_handler, 743 DefaultPaginatorModel: self.create_default_paginator, 744 DpathExtractorModel: self.create_dpath_extractor, 745 DpathValidatorModel: self.create_dpath_validator, 746 ResponseToFileExtractorModel: self.create_response_to_file_extractor, 747 ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy, 748 SessionTokenAuthenticatorModel: self.create_session_token_authenticator, 749 GroupByKeyMergeStrategyModel: self.create_group_by_key, 750 HttpRequesterModel: self.create_http_requester, 751 HttpResponseFilterModel: self.create_http_response_filter, 752 InlineSchemaLoaderModel: self.create_inline_schema_loader, 753 JsonDecoderModel: self.create_json_decoder, 754 JsonlDecoderModel: self.create_jsonl_decoder, 755 JsonSchemaPropertySelectorModel: self.create_json_schema_property_selector, 756 GzipDecoderModel: self.create_gzip_decoder, 757 KeysToLowerModel: self.create_keys_to_lower_transformation, 758 KeysToSnakeCaseModel: self.create_keys_to_snake_transformation, 759 KeysReplaceModel: self.create_keys_replace_transformation, 760 FlattenFieldsModel: self.create_flatten_fields, 761 DpathFlattenFieldsModel: self.create_dpath_flatten_fields, 762 IterableDecoderModel: self.create_iterable_decoder, 763 XmlDecoderModel: self.create_xml_decoder, 764 JsonFileSchemaLoaderModel: self.create_json_file_schema_loader, 765 DynamicSchemaLoaderModel: self.create_dynamic_schema_loader, 766 SchemaTypeIdentifierModel: self.create_schema_type_identifier, 767 TypesMapModel: self.create_types_map, 768 ComplexFieldTypeModel: self.create_complex_field_type, 769 JwtAuthenticatorModel: self.create_jwt_authenticator, 770 LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration, 771 ListPartitionRouterModel: self.create_list_partition_router, 772 MinMaxDatetimeModel: self.create_min_max_datetime, 773 NoAuthModel: self.create_no_auth, 774 NoPaginationModel: self.create_no_pagination, 775 OAuthAuthenticatorModel: self.create_oauth_authenticator, 776 OffsetIncrementModel: self.create_offset_increment, 777 PageIncrementModel: self.create_page_increment, 778 ParentStreamConfigModel: self.create_parent_stream_config_with_substream_wrapper, 779 PredicateValidatorModel: self.create_predicate_validator, 780 PropertiesFromEndpointModel: self.create_properties_from_endpoint, 781 PropertyChunkingModel: self.create_property_chunking, 782 QueryPropertiesModel: self.create_query_properties, 783 RecordFilterModel: self.create_record_filter, 784 RecordSelectorModel: self.create_record_selector, 785 RemoveFieldsModel: self.create_remove_fields, 786 RequestPathModel: self.create_request_path, 787 RequestOptionModel: self.create_request_option, 788 LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator, 789 SelectiveAuthenticatorModel: self.create_selective_authenticator, 790 SimpleRetrieverModel: self.create_simple_retriever, 791 StateDelegatingStreamModel: self.create_state_delegating_stream, 792 SpecModel: self.create_spec, 793 SubstreamPartitionRouterModel: self.create_substream_partition_router, 794 ValidateAdheresToSchemaModel: self.create_validate_adheres_to_schema, 795 WaitTimeFromHeaderModel: self.create_wait_time_from_header, 796 WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header, 797 AsyncRetrieverModel: self.create_async_retriever, 798 HttpComponentsResolverModel: self.create_http_components_resolver, 799 ConfigComponentsResolverModel: self.create_config_components_resolver, 800 ParametrizedComponentsResolverModel: self.create_parametrized_components_resolver, 801 StreamConfigModel: self.create_stream_config, 802 ComponentMappingDefinitionModel: self.create_components_mapping_definition, 803 ZipfileDecoderModel: self.create_zipfile_decoder, 804 HTTPAPIBudgetModel: self.create_http_api_budget, 805 FileUploaderModel: self.create_file_uploader, 806 FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy, 807 MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy, 808 UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy, 809 RateModel: self.create_rate, 810 HttpRequestRegexMatcherModel: self.create_http_request_matcher, 811 GroupingPartitionRouterModel: self.create_grouping_partition_router, 812 } 813 814 # Needed for the case where we need to perform a second parse on the fields of a custom component 815 self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR} 816 817 @staticmethod 818 def _create_stream_name_to_configured_stream( 819 configured_catalog: Optional[ConfiguredAirbyteCatalog], 820 ) -> Mapping[str, ConfiguredAirbyteStream]: 821 return ( 822 {stream.stream.name: stream for stream in configured_catalog.streams} 823 if configured_catalog 824 else {} 825 ) 826 827 def create_component( 828 self, 829 model_type: Type[BaseModel], 830 component_definition: ComponentDefinition, 831 config: Config, 832 **kwargs: Any, 833 ) -> Any: 834 """ 835 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 836 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 837 creating declarative components from that model. 838 839 :param model_type: The type of declarative component that is being initialized 840 :param component_definition: The mapping that represents a declarative component 841 :param config: The connector config that is provided by the customer 842 :return: The declarative component to be used at runtime 843 """ 844 845 component_type = component_definition.get("type") 846 if component_definition.get("type") != model_type.__name__: 847 raise ValueError( 848 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 849 ) 850 851 declarative_component_model = model_type.parse_obj(component_definition) 852 853 if not isinstance(declarative_component_model, model_type): 854 raise ValueError( 855 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 856 ) 857 858 return self._create_component_from_model( 859 model=declarative_component_model, config=config, **kwargs 860 ) 861 862 def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: 863 if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: 864 raise ValueError( 865 f"{model.__class__} with attributes {model} is not a valid component type" 866 ) 867 component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) 868 if not component_constructor: 869 raise ValueError(f"Could not find constructor for {model.__class__}") 870 871 # collect deprecation warnings for supported models. 872 if isinstance(model, BaseModelWithDeprecations): 873 self._collect_model_deprecations(model) 874 875 return component_constructor(model=model, config=config, **kwargs) 876 877 def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]: 878 """ 879 Returns the deprecation warnings that were collected during the creation of components. 880 """ 881 return self._collected_deprecation_logs 882 883 def _collect_model_deprecations(self, model: BaseModelWithDeprecations) -> None: 884 """ 885 Collects deprecation logs from the given model and appends any new logs to the internal collection. 886 887 This method checks if the provided model has deprecation logs (identified by the presence of the DEPRECATION_LOGS_TAG attribute and a non-None `_deprecation_logs` property). It iterates through each deprecation log in the model and appends it to the `_collected_deprecation_logs` list if it has not already been collected, ensuring that duplicate logs are avoided. 888 889 Args: 890 model (BaseModelWithDeprecations): The model instance from which to collect deprecation logs. 891 """ 892 if hasattr(model, DEPRECATION_LOGS_TAG) and model._deprecation_logs is not None: 893 for log in model._deprecation_logs: 894 # avoid duplicates for deprecation logs observed. 895 if log not in self._collected_deprecation_logs: 896 self._collected_deprecation_logs.append(log) 897 898 def create_config_migration( 899 self, model: ConfigMigrationModel, config: Config 900 ) -> ConfigMigration: 901 transformations: List[ConfigTransformation] = [ 902 self._create_component_from_model(transformation, config) 903 for transformation in model.transformations 904 ] 905 906 return ConfigMigration( 907 description=model.description, 908 transformations=transformations, 909 ) 910 911 def create_config_add_fields( 912 self, model: ConfigAddFieldsModel, config: Config, **kwargs: Any 913 ) -> ConfigAddFields: 914 fields = [self._create_component_from_model(field, config) for field in model.fields] 915 return ConfigAddFields( 916 fields=fields, 917 condition=model.condition or "", 918 ) 919 920 @staticmethod 921 def create_config_remove_fields( 922 model: ConfigRemoveFieldsModel, config: Config, **kwargs: Any 923 ) -> ConfigRemoveFields: 924 return ConfigRemoveFields( 925 field_pointers=model.field_pointers, 926 condition=model.condition or "", 927 ) 928 929 @staticmethod 930 def create_config_remap_field( 931 model: ConfigRemapFieldModel, config: Config, **kwargs: Any 932 ) -> ConfigRemapField: 933 mapping = cast(Mapping[str, Any], model.map) 934 return ConfigRemapField( 935 map=mapping, 936 field_path=model.field_path, 937 config=config, 938 ) 939 940 def create_dpath_validator(self, model: DpathValidatorModel, config: Config) -> DpathValidator: 941 strategy = self._create_component_from_model(model.validation_strategy, config) 942 943 return DpathValidator( 944 field_path=model.field_path, 945 strategy=strategy, 946 ) 947 948 def create_predicate_validator( 949 self, model: PredicateValidatorModel, config: Config 950 ) -> PredicateValidator: 951 strategy = self._create_component_from_model(model.validation_strategy, config) 952 953 return PredicateValidator( 954 value=model.value, 955 strategy=strategy, 956 ) 957 958 @staticmethod 959 def create_validate_adheres_to_schema( 960 model: ValidateAdheresToSchemaModel, config: Config, **kwargs: Any 961 ) -> ValidateAdheresToSchema: 962 base_schema = cast(Mapping[str, Any], model.base_schema) 963 return ValidateAdheresToSchema( 964 schema=base_schema, 965 ) 966 967 @staticmethod 968 def create_added_field_definition( 969 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 970 ) -> AddedFieldDefinition: 971 interpolated_value = InterpolatedString.create( 972 model.value, parameters=model.parameters or {} 973 ) 974 return AddedFieldDefinition( 975 path=model.path, 976 value=interpolated_value, 977 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 978 parameters=model.parameters or {}, 979 ) 980 981 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 982 added_field_definitions = [ 983 self._create_component_from_model( 984 model=added_field_definition_model, 985 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 986 added_field_definition_model.value_type 987 ), 988 config=config, 989 ) 990 for added_field_definition_model in model.fields 991 ] 992 return AddFields( 993 fields=added_field_definitions, 994 condition=model.condition or "", 995 parameters=model.parameters or {}, 996 ) 997 998 def create_keys_to_lower_transformation( 999 self, model: KeysToLowerModel, config: Config, **kwargs: Any 1000 ) -> KeysToLowerTransformation: 1001 return KeysToLowerTransformation() 1002 1003 def create_keys_to_snake_transformation( 1004 self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any 1005 ) -> KeysToSnakeCaseTransformation: 1006 return KeysToSnakeCaseTransformation() 1007 1008 def create_keys_replace_transformation( 1009 self, model: KeysReplaceModel, config: Config, **kwargs: Any 1010 ) -> KeysReplaceTransformation: 1011 return KeysReplaceTransformation( 1012 old=model.old, new=model.new, parameters=model.parameters or {} 1013 ) 1014 1015 def create_flatten_fields( 1016 self, model: FlattenFieldsModel, config: Config, **kwargs: Any 1017 ) -> FlattenFields: 1018 return FlattenFields( 1019 flatten_lists=model.flatten_lists if model.flatten_lists is not None else True 1020 ) 1021 1022 def create_dpath_flatten_fields( 1023 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 1024 ) -> DpathFlattenFields: 1025 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 1026 key_transformation = ( 1027 KeyTransformation( 1028 config=config, 1029 prefix=model.key_transformation.prefix, 1030 suffix=model.key_transformation.suffix, 1031 parameters=model.parameters or {}, 1032 ) 1033 if model.key_transformation is not None 1034 else None 1035 ) 1036 return DpathFlattenFields( 1037 config=config, 1038 field_path=model_field_path, 1039 delete_origin_value=model.delete_origin_value 1040 if model.delete_origin_value is not None 1041 else False, 1042 replace_record=model.replace_record if model.replace_record is not None else False, 1043 key_transformation=key_transformation, 1044 parameters=model.parameters or {}, 1045 ) 1046 1047 @staticmethod 1048 def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: 1049 if not value_type: 1050 return None 1051 names_to_types = { 1052 ValueType.string: str, 1053 ValueType.number: float, 1054 ValueType.integer: int, 1055 ValueType.boolean: bool, 1056 } 1057 return names_to_types[value_type] 1058 1059 def create_api_key_authenticator( 1060 self, 1061 model: ApiKeyAuthenticatorModel, 1062 config: Config, 1063 token_provider: Optional[TokenProvider] = None, 1064 **kwargs: Any, 1065 ) -> ApiKeyAuthenticator: 1066 if model.inject_into is None and model.header is None: 1067 raise ValueError( 1068 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 1069 ) 1070 1071 if model.inject_into is not None and model.header is not None: 1072 raise ValueError( 1073 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 1074 ) 1075 1076 if token_provider is not None and model.api_token != "": 1077 raise ValueError( 1078 "If token_provider is set, api_token is ignored and has to be set to empty string." 1079 ) 1080 1081 request_option = ( 1082 self._create_component_from_model( 1083 model.inject_into, config, parameters=model.parameters or {} 1084 ) 1085 if model.inject_into 1086 else RequestOption( 1087 inject_into=RequestOptionType.header, 1088 field_name=model.header or "", 1089 parameters=model.parameters or {}, 1090 ) 1091 ) 1092 1093 return ApiKeyAuthenticator( 1094 token_provider=( 1095 token_provider 1096 if token_provider is not None 1097 else InterpolatedStringTokenProvider( 1098 api_token=model.api_token or "", 1099 config=config, 1100 parameters=model.parameters or {}, 1101 ) 1102 ), 1103 request_option=request_option, 1104 config=config, 1105 parameters=model.parameters or {}, 1106 ) 1107 1108 def create_legacy_to_per_partition_state_migration( 1109 self, 1110 model: LegacyToPerPartitionStateMigrationModel, 1111 config: Mapping[str, Any], 1112 declarative_stream: DeclarativeStreamModel, 1113 ) -> LegacyToPerPartitionStateMigration: 1114 retriever = declarative_stream.retriever 1115 if not isinstance(retriever, (SimpleRetrieverModel, AsyncRetrieverModel)): 1116 raise ValueError( 1117 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever or AsyncRetriever. Got {type(retriever)}" 1118 ) 1119 partition_router = retriever.partition_router 1120 if not isinstance( 1121 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 1122 ): 1123 raise ValueError( 1124 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 1125 ) 1126 if not hasattr(partition_router, "parent_stream_configs"): 1127 raise ValueError( 1128 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 1129 ) 1130 1131 if not hasattr(declarative_stream, "incremental_sync"): 1132 raise ValueError( 1133 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 1134 ) 1135 1136 return LegacyToPerPartitionStateMigration( 1137 partition_router, # type: ignore # was already checked above 1138 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 1139 config, 1140 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 1141 ) 1142 1143 def create_session_token_authenticator( 1144 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 1145 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 1146 decoder = ( 1147 self._create_component_from_model(model=model.decoder, config=config) 1148 if model.decoder 1149 else JsonDecoder(parameters={}) 1150 ) 1151 login_requester = self._create_component_from_model( 1152 model=model.login_requester, 1153 config=config, 1154 name=f"{name}_login_requester", 1155 decoder=decoder, 1156 ) 1157 token_provider = SessionTokenProvider( 1158 login_requester=login_requester, 1159 session_token_path=model.session_token_path, 1160 expiration_duration=parse_duration(model.expiration_duration) 1161 if model.expiration_duration 1162 else None, 1163 parameters=model.parameters or {}, 1164 message_repository=self._message_repository, 1165 decoder=decoder, 1166 ) 1167 if model.request_authentication.type == "Bearer": 1168 return ModelToComponentFactory.create_bearer_authenticator( 1169 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 1170 config, 1171 token_provider=token_provider, 1172 ) 1173 else: 1174 # Get the api_token template if specified, default to just the session token 1175 api_token_template = ( 1176 getattr(model.request_authentication, "api_token", None) or "{{ session_token }}" 1177 ) 1178 final_token_provider: TokenProvider = InterpolatedSessionTokenProvider( 1179 config=config, 1180 api_token=api_token_template, 1181 session_token_provider=token_provider, 1182 parameters=model.parameters or {}, 1183 ) 1184 return self.create_api_key_authenticator( 1185 ApiKeyAuthenticatorModel( 1186 type="ApiKeyAuthenticator", 1187 api_token="", 1188 inject_into=model.request_authentication.inject_into, 1189 ), # type: ignore # $parameters and headers default to None 1190 config=config, 1191 token_provider=final_token_provider, 1192 ) 1193 1194 @staticmethod 1195 def create_basic_http_authenticator( 1196 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 1197 ) -> BasicHttpAuthenticator: 1198 return BasicHttpAuthenticator( 1199 password=model.password or "", 1200 username=model.username, 1201 config=config, 1202 parameters=model.parameters or {}, 1203 ) 1204 1205 @staticmethod 1206 def create_bearer_authenticator( 1207 model: BearerAuthenticatorModel, 1208 config: Config, 1209 token_provider: Optional[TokenProvider] = None, 1210 **kwargs: Any, 1211 ) -> BearerAuthenticator: 1212 if token_provider is not None and model.api_token != "": 1213 raise ValueError( 1214 "If token_provider is set, api_token is ignored and has to be set to empty string." 1215 ) 1216 return BearerAuthenticator( 1217 token_provider=( 1218 token_provider 1219 if token_provider is not None 1220 else InterpolatedStringTokenProvider( 1221 api_token=model.api_token or "", 1222 config=config, 1223 parameters=model.parameters or {}, 1224 ) 1225 ), 1226 config=config, 1227 parameters=model.parameters or {}, 1228 ) 1229 1230 @staticmethod 1231 def create_dynamic_stream_check_config( 1232 model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any 1233 ) -> DynamicStreamCheckConfig: 1234 return DynamicStreamCheckConfig( 1235 dynamic_stream_name=model.dynamic_stream_name, 1236 stream_count=model.stream_count or 0, 1237 ) 1238 1239 def create_check_stream( 1240 self, model: CheckStreamModel, config: Config, **kwargs: Any 1241 ) -> CheckStream: 1242 if model.dynamic_streams_check_configs is None and model.stream_names is None: 1243 raise ValueError( 1244 "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream" 1245 ) 1246 1247 dynamic_streams_check_configs = ( 1248 [ 1249 self._create_component_from_model(model=dynamic_stream_check_config, config=config) 1250 for dynamic_stream_check_config in model.dynamic_streams_check_configs 1251 ] 1252 if model.dynamic_streams_check_configs 1253 else [] 1254 ) 1255 1256 return CheckStream( 1257 stream_names=model.stream_names or [], 1258 dynamic_streams_check_configs=dynamic_streams_check_configs, 1259 parameters={}, 1260 ) 1261 1262 @staticmethod 1263 def create_check_dynamic_stream( 1264 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 1265 ) -> CheckDynamicStream: 1266 assert model.use_check_availability is not None # for mypy 1267 1268 use_check_availability = model.use_check_availability 1269 1270 return CheckDynamicStream( 1271 stream_count=model.stream_count, 1272 use_check_availability=use_check_availability, 1273 parameters={}, 1274 ) 1275 1276 def create_composite_error_handler( 1277 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 1278 ) -> CompositeErrorHandler: 1279 error_handlers = [ 1280 self._create_component_from_model(model=error_handler_model, config=config) 1281 for error_handler_model in model.error_handlers 1282 ] 1283 return CompositeErrorHandler( 1284 error_handlers=error_handlers, parameters=model.parameters or {} 1285 ) 1286 1287 @staticmethod 1288 def create_concurrency_level( 1289 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 1290 ) -> ConcurrencyLevel: 1291 return ConcurrencyLevel( 1292 default_concurrency=model.default_concurrency, 1293 max_concurrency=model.max_concurrency, 1294 config=config, 1295 parameters={}, 1296 ) 1297 1298 @staticmethod 1299 def apply_stream_state_migrations( 1300 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 1301 ) -> MutableMapping[str, Any]: 1302 if stream_state_migrations: 1303 for state_migration in stream_state_migrations: 1304 if state_migration.should_migrate(stream_state): 1305 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 1306 stream_state = dict(state_migration.migrate(stream_state)) 1307 return stream_state 1308 1309 def create_concurrent_cursor_from_datetime_based_cursor( 1310 self, 1311 model_type: Type[BaseModel], 1312 component_definition: ComponentDefinition, 1313 stream_name: str, 1314 stream_namespace: Optional[str], 1315 stream_state: MutableMapping[str, Any], 1316 config: Config, 1317 message_repository: Optional[MessageRepository] = None, 1318 runtime_lookback_window: Optional[datetime.timedelta] = None, 1319 **kwargs: Any, 1320 ) -> ConcurrentCursor: 1321 component_type = component_definition.get("type") 1322 if component_definition.get("type") != model_type.__name__: 1323 raise ValueError( 1324 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1325 ) 1326 1327 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1328 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1329 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1330 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1331 if "$parameters" not in component_definition and "parameters" in component_definition: 1332 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1333 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1334 1335 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1336 raise ValueError( 1337 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1338 ) 1339 1340 model_parameters = datetime_based_cursor_model.parameters or {} 1341 1342 cursor_field = self._get_catalog_defined_cursor_field( 1343 stream_name=stream_name, 1344 allow_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1345 or False, 1346 ) 1347 1348 if not cursor_field: 1349 interpolated_cursor_field = InterpolatedString.create( 1350 datetime_based_cursor_model.cursor_field, 1351 parameters=model_parameters, 1352 ) 1353 cursor_field = CursorField( 1354 cursor_field_key=interpolated_cursor_field.eval(config=config), 1355 supports_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1356 or False, 1357 ) 1358 1359 interpolated_partition_field_start = InterpolatedString.create( 1360 datetime_based_cursor_model.partition_field_start or "start_time", 1361 parameters=model_parameters, 1362 ) 1363 interpolated_partition_field_end = InterpolatedString.create( 1364 datetime_based_cursor_model.partition_field_end or "end_time", 1365 parameters=model_parameters, 1366 ) 1367 1368 slice_boundary_fields = ( 1369 interpolated_partition_field_start.eval(config=config), 1370 interpolated_partition_field_end.eval(config=config), 1371 ) 1372 1373 datetime_format = datetime_based_cursor_model.datetime_format 1374 1375 cursor_granularity = ( 1376 parse_duration(datetime_based_cursor_model.cursor_granularity) 1377 if datetime_based_cursor_model.cursor_granularity 1378 else None 1379 ) 1380 1381 lookback_window = None 1382 interpolated_lookback_window = ( 1383 InterpolatedString.create( 1384 datetime_based_cursor_model.lookback_window, 1385 parameters=model_parameters, 1386 ) 1387 if datetime_based_cursor_model.lookback_window 1388 else None 1389 ) 1390 if interpolated_lookback_window: 1391 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1392 if evaluated_lookback_window: 1393 lookback_window = parse_duration(evaluated_lookback_window) 1394 1395 connector_state_converter: DateTimeStreamStateConverter 1396 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1397 datetime_format=datetime_format, 1398 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1399 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1400 cursor_granularity=cursor_granularity, 1401 ) 1402 1403 # Adjusts the stream state by applying the runtime lookback window. 1404 # This is used to ensure correct state handling in case of failed partitions. 1405 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1406 if runtime_lookback_window and stream_state_value: 1407 new_stream_state = ( 1408 connector_state_converter.parse_timestamp(stream_state_value) 1409 - runtime_lookback_window 1410 ) 1411 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1412 new_stream_state 1413 ) 1414 1415 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1416 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1417 start_date_runtime_value = self.create_min_max_datetime( 1418 model=datetime_based_cursor_model.start_datetime, config=config 1419 ) 1420 else: 1421 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1422 1423 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1424 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1425 end_date_runtime_value = self.create_min_max_datetime( 1426 model=datetime_based_cursor_model.end_datetime, config=config 1427 ) 1428 else: 1429 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1430 1431 interpolated_start_date = MinMaxDatetime.create( 1432 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1433 parameters=datetime_based_cursor_model.parameters, 1434 ) 1435 interpolated_end_date = ( 1436 None 1437 if not end_date_runtime_value 1438 else MinMaxDatetime.create( 1439 end_date_runtime_value, datetime_based_cursor_model.parameters 1440 ) 1441 ) 1442 1443 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1444 if not interpolated_start_date.datetime_format: 1445 interpolated_start_date.datetime_format = datetime_format 1446 if interpolated_end_date and not interpolated_end_date.datetime_format: 1447 interpolated_end_date.datetime_format = datetime_format 1448 1449 start_date = interpolated_start_date.get_datetime(config=config) 1450 end_date_provider = ( 1451 partial(interpolated_end_date.get_datetime, config) 1452 if interpolated_end_date 1453 else connector_state_converter.get_end_provider() 1454 ) 1455 1456 if ( 1457 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1458 ) or ( 1459 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1460 ): 1461 raise ValueError( 1462 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1463 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1464 ) 1465 1466 # When step is not defined, default to a step size from the starting date to the present moment 1467 step_length = datetime.timedelta.max 1468 interpolated_step = ( 1469 InterpolatedString.create( 1470 datetime_based_cursor_model.step, 1471 parameters=model_parameters, 1472 ) 1473 if datetime_based_cursor_model.step 1474 else None 1475 ) 1476 if interpolated_step: 1477 evaluated_step = interpolated_step.eval(config) 1478 if evaluated_step: 1479 step_length = parse_duration(evaluated_step) 1480 1481 clamping_strategy: ClampingStrategy = NoClamping() 1482 if datetime_based_cursor_model.clamping: 1483 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1484 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1485 # object which we want to keep agnostic of being low-code 1486 target = InterpolatedString( 1487 string=datetime_based_cursor_model.clamping.target, 1488 parameters=model_parameters, 1489 ) 1490 evaluated_target = target.eval(config=config) 1491 match evaluated_target: 1492 case "DAY": 1493 clamping_strategy = DayClampingStrategy() 1494 end_date_provider = ClampingEndProvider( 1495 DayClampingStrategy(is_ceiling=False), 1496 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1497 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1498 ) 1499 case "WEEK": 1500 if ( 1501 not datetime_based_cursor_model.clamping.target_details 1502 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1503 ): 1504 raise ValueError( 1505 "Given WEEK clamping, weekday needs to be provided as target_details" 1506 ) 1507 weekday = self._assemble_weekday( 1508 datetime_based_cursor_model.clamping.target_details["weekday"] 1509 ) 1510 clamping_strategy = WeekClampingStrategy(weekday) 1511 end_date_provider = ClampingEndProvider( 1512 WeekClampingStrategy(weekday, is_ceiling=False), 1513 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1514 granularity=cursor_granularity or datetime.timedelta(days=1), 1515 ) 1516 case "MONTH": 1517 clamping_strategy = MonthClampingStrategy() 1518 end_date_provider = ClampingEndProvider( 1519 MonthClampingStrategy(is_ceiling=False), 1520 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1521 granularity=cursor_granularity or datetime.timedelta(days=1), 1522 ) 1523 case _: 1524 raise ValueError( 1525 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1526 ) 1527 1528 return ConcurrentCursor( 1529 stream_name=stream_name, 1530 stream_namespace=stream_namespace, 1531 stream_state=stream_state, 1532 message_repository=message_repository or self._message_repository, 1533 connector_state_manager=self._connector_state_manager, 1534 connector_state_converter=connector_state_converter, 1535 cursor_field=cursor_field, 1536 slice_boundary_fields=slice_boundary_fields, 1537 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1538 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1539 lookback_window=lookback_window, 1540 slice_range=step_length, 1541 cursor_granularity=cursor_granularity, 1542 clamping_strategy=clamping_strategy, 1543 ) 1544 1545 def create_concurrent_cursor_from_incrementing_count_cursor( 1546 self, 1547 model_type: Type[BaseModel], 1548 component_definition: ComponentDefinition, 1549 stream_name: str, 1550 stream_namespace: Optional[str], 1551 stream_state: MutableMapping[str, Any], 1552 config: Config, 1553 message_repository: Optional[MessageRepository] = None, 1554 **kwargs: Any, 1555 ) -> ConcurrentCursor: 1556 component_type = component_definition.get("type") 1557 if component_definition.get("type") != model_type.__name__: 1558 raise ValueError( 1559 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1560 ) 1561 1562 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1563 1564 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1565 raise ValueError( 1566 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1567 ) 1568 1569 start_value: Union[int, str, None] = incrementing_count_cursor_model.start_value 1570 # Pydantic Union type coercion can convert int 0 to string '0' depending on Union order. 1571 # We need to handle both int and str representations of numeric values. 1572 # Evaluate the InterpolatedString and convert to int for the ConcurrentCursor. 1573 if start_value is not None: 1574 interpolated_start_value = InterpolatedString.create( 1575 str(start_value), # Ensure we pass a string to InterpolatedString.create 1576 parameters=incrementing_count_cursor_model.parameters or {}, 1577 ) 1578 evaluated_start_value: int = int(interpolated_start_value.eval(config=config)) 1579 else: 1580 evaluated_start_value = 0 1581 1582 cursor_field = self._get_catalog_defined_cursor_field( 1583 stream_name=stream_name, 1584 allow_catalog_defined_cursor_field=incrementing_count_cursor_model.allow_catalog_defined_cursor_field 1585 or False, 1586 ) 1587 1588 if not cursor_field: 1589 interpolated_cursor_field = InterpolatedString.create( 1590 incrementing_count_cursor_model.cursor_field, 1591 parameters=incrementing_count_cursor_model.parameters or {}, 1592 ) 1593 cursor_field = CursorField( 1594 cursor_field_key=interpolated_cursor_field.eval(config=config), 1595 supports_catalog_defined_cursor_field=incrementing_count_cursor_model.allow_catalog_defined_cursor_field 1596 or False, 1597 ) 1598 1599 connector_state_converter = IncrementingCountStreamStateConverter( 1600 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1601 ) 1602 1603 return ConcurrentCursor( 1604 stream_name=stream_name, 1605 stream_namespace=stream_namespace, 1606 stream_state=stream_state, 1607 message_repository=message_repository or self._message_repository, 1608 connector_state_manager=self._connector_state_manager, 1609 connector_state_converter=connector_state_converter, 1610 cursor_field=cursor_field, 1611 slice_boundary_fields=None, 1612 start=evaluated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1613 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1614 ) 1615 1616 def _assemble_weekday(self, weekday: str) -> Weekday: 1617 match weekday: 1618 case "MONDAY": 1619 return Weekday.MONDAY 1620 case "TUESDAY": 1621 return Weekday.TUESDAY 1622 case "WEDNESDAY": 1623 return Weekday.WEDNESDAY 1624 case "THURSDAY": 1625 return Weekday.THURSDAY 1626 case "FRIDAY": 1627 return Weekday.FRIDAY 1628 case "SATURDAY": 1629 return Weekday.SATURDAY 1630 case "SUNDAY": 1631 return Weekday.SUNDAY 1632 case _: 1633 raise ValueError(f"Unknown weekday {weekday}") 1634 1635 def create_concurrent_cursor_from_perpartition_cursor( 1636 self, 1637 state_manager: ConnectorStateManager, 1638 model_type: Type[BaseModel], 1639 component_definition: ComponentDefinition, 1640 stream_name: str, 1641 stream_namespace: Optional[str], 1642 config: Config, 1643 stream_state: MutableMapping[str, Any], 1644 partition_router: PartitionRouter, 1645 attempt_to_create_cursor_if_not_provided: bool = False, 1646 **kwargs: Any, 1647 ) -> ConcurrentPerPartitionCursor: 1648 component_type = component_definition.get("type") 1649 if component_definition.get("type") != model_type.__name__: 1650 raise ValueError( 1651 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1652 ) 1653 1654 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1655 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1656 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1657 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1658 if "$parameters" not in component_definition and "parameters" in component_definition: 1659 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1660 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1661 1662 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1663 raise ValueError( 1664 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1665 ) 1666 1667 cursor_field = self._get_catalog_defined_cursor_field( 1668 stream_name=stream_name, 1669 allow_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1670 or False, 1671 ) 1672 1673 if not cursor_field: 1674 interpolated_cursor_field = InterpolatedString.create( 1675 datetime_based_cursor_model.cursor_field, 1676 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1677 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1678 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1679 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1680 parameters=datetime_based_cursor_model.parameters or {}, 1681 ) 1682 cursor_field = CursorField( 1683 cursor_field_key=interpolated_cursor_field.eval(config=config), 1684 supports_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1685 or False, 1686 ) 1687 1688 datetime_format = datetime_based_cursor_model.datetime_format 1689 1690 cursor_granularity = ( 1691 parse_duration(datetime_based_cursor_model.cursor_granularity) 1692 if datetime_based_cursor_model.cursor_granularity 1693 else None 1694 ) 1695 1696 connector_state_converter: DateTimeStreamStateConverter 1697 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1698 datetime_format=datetime_format, 1699 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1700 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1701 cursor_granularity=cursor_granularity, 1702 ) 1703 1704 # Create the cursor factory 1705 cursor_factory = ConcurrentCursorFactory( 1706 partial( 1707 self.create_concurrent_cursor_from_datetime_based_cursor, 1708 state_manager=state_manager, 1709 model_type=model_type, 1710 component_definition=component_definition, 1711 stream_name=stream_name, 1712 stream_namespace=stream_namespace, 1713 config=config, 1714 message_repository=NoopMessageRepository(), 1715 ) 1716 ) 1717 1718 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state 1719 use_global_cursor = isinstance( 1720 partition_router, GroupingPartitionRouter 1721 ) or component_definition.get("global_substream_cursor", False) 1722 1723 # Return the concurrent cursor and state converter 1724 return ConcurrentPerPartitionCursor( 1725 cursor_factory=cursor_factory, 1726 partition_router=partition_router, 1727 stream_name=stream_name, 1728 stream_namespace=stream_namespace, 1729 stream_state=stream_state, 1730 message_repository=self._message_repository, # type: ignore 1731 connector_state_manager=state_manager, 1732 connector_state_converter=connector_state_converter, 1733 cursor_field=cursor_field, 1734 use_global_cursor=use_global_cursor, 1735 attempt_to_create_cursor_if_not_provided=attempt_to_create_cursor_if_not_provided, 1736 ) 1737 1738 @staticmethod 1739 def create_constant_backoff_strategy( 1740 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1741 ) -> ConstantBackoffStrategy: 1742 return ConstantBackoffStrategy( 1743 backoff_time_in_seconds=model.backoff_time_in_seconds, 1744 config=config, 1745 parameters=model.parameters or {}, 1746 ) 1747 1748 def create_cursor_pagination( 1749 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1750 ) -> CursorPaginationStrategy: 1751 if isinstance(decoder, PaginationDecoderDecorator): 1752 inner_decoder = decoder.decoder 1753 else: 1754 inner_decoder = decoder 1755 decoder = PaginationDecoderDecorator(decoder=decoder) 1756 1757 if self._is_supported_decoder_for_pagination(inner_decoder): 1758 decoder_to_use = decoder 1759 else: 1760 raise ValueError( 1761 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1762 ) 1763 1764 # Pydantic v1 Union type coercion can convert int to string depending on Union order. 1765 # If page_size is a string that represents an integer (not an interpolation), convert it back. 1766 page_size = model.page_size 1767 if isinstance(page_size, str) and page_size.isdigit(): 1768 page_size = int(page_size) 1769 1770 return CursorPaginationStrategy( 1771 cursor_value=model.cursor_value, 1772 decoder=decoder_to_use, 1773 page_size=page_size, 1774 stop_condition=model.stop_condition, 1775 config=config, 1776 parameters=model.parameters or {}, 1777 ) 1778 1779 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1780 """ 1781 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1782 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1783 :param model: The Pydantic model of the custom component being created 1784 :param config: The custom defined connector config 1785 :return: The declarative component built from the Pydantic model to be used at runtime 1786 """ 1787 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1788 component_fields = get_type_hints(custom_component_class) 1789 model_args = model.dict() 1790 model_args["config"] = config 1791 1792 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1793 # we defer to these arguments over the component's definition 1794 for key, arg in kwargs.items(): 1795 model_args[key] = arg 1796 1797 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1798 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1799 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1800 for model_field, model_value in model_args.items(): 1801 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1802 if ( 1803 isinstance(model_value, dict) 1804 and "type" not in model_value 1805 and model_field in component_fields 1806 ): 1807 derived_type = self._derive_component_type_from_type_hints( 1808 component_fields.get(model_field) 1809 ) 1810 if derived_type: 1811 model_value["type"] = derived_type 1812 1813 if self._is_component(model_value): 1814 model_args[model_field] = self._create_nested_component( 1815 model, 1816 model_field, 1817 model_value, 1818 config, 1819 **kwargs, 1820 ) 1821 elif isinstance(model_value, list): 1822 vals = [] 1823 for v in model_value: 1824 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1825 derived_type = self._derive_component_type_from_type_hints( 1826 component_fields.get(model_field) 1827 ) 1828 if derived_type: 1829 v["type"] = derived_type 1830 if self._is_component(v): 1831 vals.append( 1832 self._create_nested_component( 1833 model, 1834 model_field, 1835 v, 1836 config, 1837 **kwargs, 1838 ) 1839 ) 1840 else: 1841 vals.append(v) 1842 model_args[model_field] = vals 1843 1844 kwargs = { 1845 class_field: model_args[class_field] 1846 for class_field in component_fields.keys() 1847 if class_field in model_args 1848 } 1849 return custom_component_class(**kwargs) 1850 1851 @staticmethod 1852 def _get_class_from_fully_qualified_class_name( 1853 full_qualified_class_name: str, 1854 ) -> Any: 1855 """Get a class from its fully qualified name. 1856 1857 If a custom components module is needed, we assume it is already registered - probably 1858 as `source_declarative_manifest.components` or `components`. 1859 1860 Args: 1861 full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). 1862 1863 Returns: 1864 Any: The class object. 1865 1866 Raises: 1867 ValueError: If the class cannot be loaded. 1868 """ 1869 split = full_qualified_class_name.split(".") 1870 module_name_full = ".".join(split[:-1]) 1871 class_name = split[-1] 1872 1873 try: 1874 module_ref = importlib.import_module(module_name_full) 1875 except ModuleNotFoundError as e: 1876 if split[0] == "source_declarative_manifest": 1877 # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append 1878 try: 1879 import os 1880 1881 module_name_with_source_declarative_manifest = ".".join(split[1:-1]) 1882 module_ref = importlib.import_module( 1883 module_name_with_source_declarative_manifest 1884 ) 1885 except ModuleNotFoundError: 1886 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1887 else: 1888 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1889 1890 try: 1891 return getattr(module_ref, class_name) 1892 except AttributeError as e: 1893 raise ValueError( 1894 f"Could not load class `{class_name}` from module `{module_name_full}`.", 1895 ) from e 1896 1897 @staticmethod 1898 def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: 1899 interface = field_type 1900 while True: 1901 origin = get_origin(interface) 1902 if origin: 1903 # Unnest types until we reach the raw type 1904 # List[T] -> T 1905 # Optional[List[T]] -> T 1906 args = get_args(interface) 1907 interface = args[0] 1908 else: 1909 break 1910 if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface): 1911 return interface.__name__ 1912 return None 1913 1914 @staticmethod 1915 def is_builtin_type(cls: Optional[Type[Any]]) -> bool: 1916 if not cls: 1917 return False 1918 return cls.__module__ == "builtins" 1919 1920 @staticmethod 1921 def _extract_missing_parameters(error: TypeError) -> List[str]: 1922 parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error)) 1923 if parameter_search: 1924 return re.findall(r"\'(.+?)\'", parameter_search.group(1)) 1925 else: 1926 return [] 1927 1928 def _create_nested_component( 1929 self, model: Any, model_field: str, model_value: Any, config: Config, **kwargs: Any 1930 ) -> Any: 1931 type_name = model_value.get("type", None) 1932 if not type_name: 1933 # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent 1934 return model_value 1935 1936 model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None) 1937 if model_type: 1938 parsed_model = model_type.parse_obj(model_value) 1939 try: 1940 # To improve usability of the language, certain fields are shared between components. This can come in the form of 1941 # a parent component passing some of its fields to a child component or the parent extracting fields from other child 1942 # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base 1943 # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created 1944 # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that 1945 # are needed by a component and could not be shared. 1946 model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__) 1947 constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs 1948 model_parameters = model_value.get("$parameters", {}) 1949 matching_parameters = { 1950 kwarg: model_parameters[kwarg] 1951 for kwarg in constructor_kwargs 1952 if kwarg in model_parameters 1953 } 1954 matching_kwargs = { 1955 kwarg: kwargs[kwarg] for kwarg in constructor_kwargs if kwarg in kwargs 1956 } 1957 return self._create_component_from_model( 1958 model=parsed_model, config=config, **(matching_parameters | matching_kwargs) 1959 ) 1960 except TypeError as error: 1961 missing_parameters = self._extract_missing_parameters(error) 1962 if missing_parameters: 1963 raise ValueError( 1964 f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " 1965 + ", ".join( 1966 ( 1967 f"{type_name}.$parameters.{parameter}" 1968 for parameter in missing_parameters 1969 ) 1970 ) 1971 ) 1972 raise TypeError( 1973 f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" 1974 ) 1975 else: 1976 raise ValueError( 1977 f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" 1978 ) 1979 1980 @staticmethod 1981 def _is_component(model_value: Any) -> bool: 1982 return isinstance(model_value, dict) and model_value.get("type") is not None 1983 1984 def create_default_stream( 1985 self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any 1986 ) -> AbstractStream: 1987 primary_key = model.primary_key.__root__ if model.primary_key else None 1988 self._migrate_state(model, config) 1989 1990 partition_router = self._build_stream_slicer_from_partition_router( 1991 model.retriever, 1992 config, 1993 stream_name=model.name, 1994 **kwargs, 1995 ) 1996 concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config) 1997 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1998 cursor_model: DatetimeBasedCursorModel = model.incremental_sync 1999 2000 end_time_option = ( 2001 self._create_component_from_model( 2002 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 2003 ) 2004 if cursor_model.end_time_option 2005 else None 2006 ) 2007 start_time_option = ( 2008 self._create_component_from_model( 2009 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 2010 ) 2011 if cursor_model.start_time_option 2012 else None 2013 ) 2014 2015 datetime_request_options_provider = DatetimeBasedRequestOptionsProvider( 2016 start_time_option=start_time_option, 2017 end_time_option=end_time_option, 2018 partition_field_start=cursor_model.partition_field_start, 2019 partition_field_end=cursor_model.partition_field_end, 2020 config=config, 2021 parameters=model.parameters or {}, 2022 ) 2023 request_options_provider = ( 2024 datetime_request_options_provider 2025 if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor) 2026 else PerPartitionRequestOptionsProvider( 2027 partition_router, datetime_request_options_provider 2028 ) 2029 ) 2030 elif model.incremental_sync and isinstance( 2031 model.incremental_sync, IncrementingCountCursorModel 2032 ): 2033 if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor): 2034 raise ValueError( 2035 "PerPartition does not support per partition states because switching to global state is time based" 2036 ) 2037 2038 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 2039 2040 start_time_option = ( 2041 self._create_component_from_model( 2042 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2043 config, 2044 parameters=cursor_model.parameters or {}, 2045 ) 2046 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2047 else None 2048 ) 2049 2050 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 2051 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 2052 partition_field_start = "start" 2053 2054 request_options_provider = DatetimeBasedRequestOptionsProvider( 2055 start_time_option=start_time_option, 2056 partition_field_start=partition_field_start, 2057 config=config, 2058 parameters=model.parameters or {}, 2059 ) 2060 else: 2061 request_options_provider = None 2062 2063 transformations = [] 2064 if model.transformations: 2065 for transformation_model in model.transformations: 2066 transformations.append( 2067 self._create_component_from_model(model=transformation_model, config=config) 2068 ) 2069 file_uploader = None 2070 if model.file_uploader: 2071 file_uploader = self._create_component_from_model( 2072 model=model.file_uploader, config=config 2073 ) 2074 2075 stream_slicer: ConcurrentStreamSlicer = ( 2076 partition_router 2077 if isinstance(concurrent_cursor, FinalStateCursor) 2078 else concurrent_cursor 2079 ) 2080 2081 retriever = self._create_component_from_model( 2082 model=model.retriever, 2083 config=config, 2084 name=model.name, 2085 primary_key=primary_key, 2086 request_options_provider=request_options_provider, 2087 stream_slicer=stream_slicer, 2088 partition_router=partition_router, 2089 has_stop_condition_cursor=self._is_stop_condition_on_cursor(model), 2090 is_client_side_incremental_sync=self._is_client_side_filtering_enabled(model), 2091 cursor=concurrent_cursor, 2092 transformations=transformations, 2093 file_uploader=file_uploader, 2094 incremental_sync=model.incremental_sync, 2095 ) 2096 if isinstance(retriever, AsyncRetriever): 2097 stream_slicer = retriever.stream_slicer 2098 2099 schema_loader: SchemaLoader 2100 if model.schema_loader and isinstance(model.schema_loader, list): 2101 nested_schema_loaders = [ 2102 self._create_component_from_model(model=nested_schema_loader, config=config) 2103 for nested_schema_loader in model.schema_loader 2104 ] 2105 schema_loader = CompositeSchemaLoader( 2106 schema_loaders=nested_schema_loaders, parameters={} 2107 ) 2108 elif model.schema_loader: 2109 schema_loader = self._create_component_from_model( 2110 model=model.schema_loader, # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models 2111 config=config, 2112 ) 2113 else: 2114 options = model.parameters or {} 2115 if "name" not in options: 2116 options["name"] = model.name 2117 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 2118 schema_loader = CachingSchemaLoaderDecorator(schema_loader) 2119 2120 stream_name = model.name or "" 2121 return DefaultStream( 2122 partition_generator=StreamSlicerPartitionGenerator( 2123 DeclarativePartitionFactory( 2124 stream_name, 2125 schema_loader, 2126 retriever, 2127 self._message_repository, 2128 ), 2129 stream_slicer, 2130 slice_limit=self._limit_slices_fetched, 2131 ), 2132 name=stream_name, 2133 json_schema=schema_loader.get_json_schema, 2134 primary_key=get_primary_key_from_stream(primary_key), 2135 cursor_field=( 2136 concurrent_cursor.cursor_field 2137 if hasattr(concurrent_cursor, "cursor_field") 2138 else None 2139 ), 2140 logger=logging.getLogger(f"airbyte.{stream_name}"), 2141 cursor=concurrent_cursor, 2142 supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader), 2143 ) 2144 2145 def _migrate_state(self, model: DeclarativeStreamModel, config: Config) -> None: 2146 stream_name = model.name or "" 2147 stream_state = self._connector_state_manager.get_stream_state( 2148 stream_name=stream_name, namespace=None 2149 ) 2150 if model.state_migrations: 2151 state_transformations = [ 2152 self._create_component_from_model(state_migration, config, declarative_stream=model) 2153 for state_migration in model.state_migrations 2154 ] 2155 else: 2156 state_transformations = [] 2157 stream_state = self.apply_stream_state_migrations(state_transformations, stream_state) 2158 self._connector_state_manager.update_state_for_stream( 2159 stream_name=stream_name, namespace=None, value=stream_state 2160 ) 2161 2162 def _is_stop_condition_on_cursor(self, model: DeclarativeStreamModel) -> bool: 2163 return bool( 2164 model.incremental_sync 2165 and hasattr(model.incremental_sync, "is_data_feed") 2166 and model.incremental_sync.is_data_feed 2167 ) 2168 2169 def _is_client_side_filtering_enabled(self, model: DeclarativeStreamModel) -> bool: 2170 return bool( 2171 model.incremental_sync 2172 and hasattr(model.incremental_sync, "is_client_side_incremental") 2173 and model.incremental_sync.is_client_side_incremental 2174 ) 2175 2176 def _build_stream_slicer_from_partition_router( 2177 self, 2178 model: Union[ 2179 AsyncRetrieverModel, 2180 CustomRetrieverModel, 2181 SimpleRetrieverModel, 2182 ], 2183 config: Config, 2184 stream_name: Optional[str] = None, 2185 **kwargs: Any, 2186 ) -> PartitionRouter: 2187 if ( 2188 hasattr(model, "partition_router") 2189 and isinstance(model, (SimpleRetrieverModel, AsyncRetrieverModel, CustomRetrieverModel)) 2190 and model.partition_router 2191 ): 2192 stream_slicer_model = model.partition_router 2193 if isinstance(stream_slicer_model, list): 2194 return CartesianProductStreamSlicer( 2195 [ 2196 self._create_component_from_model( 2197 model=slicer, config=config, stream_name=stream_name or "" 2198 ) 2199 for slicer in stream_slicer_model 2200 ], 2201 parameters={}, 2202 ) 2203 elif isinstance(stream_slicer_model, dict): 2204 # partition router comes from CustomRetrieverModel therefore has not been parsed as a model 2205 params = stream_slicer_model.get("$parameters") 2206 if not isinstance(params, dict): 2207 params = {} 2208 stream_slicer_model["$parameters"] = params 2209 2210 if stream_name is not None: 2211 params["stream_name"] = stream_name 2212 2213 return self._create_nested_component( # type: ignore[no-any-return] # There is no guarantee that this will return a stream slicer. If not, we expect an AttributeError during the call to `stream_slices` 2214 model, 2215 "partition_router", 2216 stream_slicer_model, 2217 config, 2218 **kwargs, 2219 ) 2220 else: 2221 return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router 2222 model=stream_slicer_model, config=config, stream_name=stream_name or "" 2223 ) 2224 return SinglePartitionRouter(parameters={}) 2225 2226 def _build_concurrent_cursor( 2227 self, 2228 model: DeclarativeStreamModel, 2229 stream_slicer: Optional[PartitionRouter], 2230 config: Config, 2231 ) -> Cursor: 2232 stream_name = model.name or "" 2233 stream_state = self._connector_state_manager.get_stream_state(stream_name, None) 2234 2235 if ( 2236 model.incremental_sync 2237 and stream_slicer 2238 and not isinstance(stream_slicer, SinglePartitionRouter) 2239 ): 2240 if isinstance(model.incremental_sync, IncrementingCountCursorModel): 2241 # We don't currently support usage of partition routing and IncrementingCountCursor at the 2242 # same time because we didn't solve for design questions like what the lookback window would 2243 # be as well as global cursor fall backs. We have not seen customers that have needed both 2244 # at the same time yet and are currently punting on this until we need to solve it. 2245 raise ValueError( 2246 f"The low-code framework does not currently support usage of a PartitionRouter and an IncrementingCountCursor at the same time. Please specify only one of these options for stream {stream_name}." 2247 ) 2248 return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2249 state_manager=self._connector_state_manager, 2250 model_type=DatetimeBasedCursorModel, 2251 component_definition=model.incremental_sync.__dict__, 2252 stream_name=stream_name, 2253 stream_state=stream_state, 2254 stream_namespace=None, 2255 config=config or {}, 2256 partition_router=stream_slicer, 2257 attempt_to_create_cursor_if_not_provided=True, # FIXME can we remove that now? 2258 ) 2259 elif model.incremental_sync: 2260 if type(model.incremental_sync) == IncrementingCountCursorModel: 2261 return self.create_concurrent_cursor_from_incrementing_count_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2262 model_type=IncrementingCountCursorModel, 2263 component_definition=model.incremental_sync.__dict__, 2264 stream_name=stream_name, 2265 stream_namespace=None, 2266 stream_state=stream_state, 2267 config=config or {}, 2268 ) 2269 elif type(model.incremental_sync) == DatetimeBasedCursorModel: 2270 return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2271 model_type=type(model.incremental_sync), 2272 component_definition=model.incremental_sync.__dict__, 2273 stream_name=stream_name, 2274 stream_namespace=None, 2275 stream_state=stream_state, 2276 config=config or {}, 2277 attempt_to_create_cursor_if_not_provided=True, 2278 ) 2279 else: 2280 raise ValueError( 2281 f"Incremental sync of type {type(model.incremental_sync)} is not supported" 2282 ) 2283 return FinalStateCursor(stream_name, None, self._message_repository) 2284 2285 def create_default_error_handler( 2286 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 2287 ) -> DefaultErrorHandler: 2288 backoff_strategies = [] 2289 if model.backoff_strategies: 2290 for backoff_strategy_model in model.backoff_strategies: 2291 backoff_strategies.append( 2292 self._create_component_from_model(model=backoff_strategy_model, config=config) 2293 ) 2294 2295 response_filters = [] 2296 if model.response_filters: 2297 for response_filter_model in model.response_filters: 2298 response_filters.append( 2299 self._create_component_from_model(model=response_filter_model, config=config) 2300 ) 2301 response_filters.append( 2302 HttpResponseFilter(config=config, parameters=model.parameters or {}) 2303 ) 2304 2305 return DefaultErrorHandler( 2306 backoff_strategies=backoff_strategies, 2307 max_retries=model.max_retries, 2308 response_filters=response_filters, 2309 config=config, 2310 parameters=model.parameters or {}, 2311 ) 2312 2313 def create_default_paginator( 2314 self, 2315 model: DefaultPaginatorModel, 2316 config: Config, 2317 *, 2318 url_base: str, 2319 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2320 decoder: Optional[Decoder] = None, 2321 cursor_used_for_stop_condition: Optional[Cursor] = None, 2322 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 2323 if decoder: 2324 if self._is_supported_decoder_for_pagination(decoder): 2325 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 2326 else: 2327 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 2328 else: 2329 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 2330 page_size_option = ( 2331 self._create_component_from_model(model=model.page_size_option, config=config) 2332 if model.page_size_option 2333 else None 2334 ) 2335 page_token_option = ( 2336 self._create_component_from_model(model=model.page_token_option, config=config) 2337 if model.page_token_option 2338 else None 2339 ) 2340 pagination_strategy = self._create_component_from_model( 2341 model=model.pagination_strategy, 2342 config=config, 2343 decoder=decoder_to_use, 2344 extractor_model=extractor_model, 2345 ) 2346 if cursor_used_for_stop_condition: 2347 pagination_strategy = StopConditionPaginationStrategyDecorator( 2348 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2349 ) 2350 paginator = DefaultPaginator( 2351 decoder=decoder_to_use, 2352 page_size_option=page_size_option, 2353 page_token_option=page_token_option, 2354 pagination_strategy=pagination_strategy, 2355 url_base=url_base, 2356 config=config, 2357 parameters=model.parameters or {}, 2358 ) 2359 if self._limit_pages_fetched_per_slice: 2360 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2361 return paginator 2362 2363 def create_dpath_extractor( 2364 self, 2365 model: DpathExtractorModel, 2366 config: Config, 2367 decoder: Optional[Decoder] = None, 2368 **kwargs: Any, 2369 ) -> DpathExtractor: 2370 if decoder: 2371 decoder_to_use = decoder 2372 else: 2373 decoder_to_use = JsonDecoder(parameters={}) 2374 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2375 return DpathExtractor( 2376 decoder=decoder_to_use, 2377 field_path=model_field_path, 2378 config=config, 2379 parameters=model.parameters or {}, 2380 ) 2381 2382 @staticmethod 2383 def create_response_to_file_extractor( 2384 model: ResponseToFileExtractorModel, 2385 **kwargs: Any, 2386 ) -> ResponseToFileExtractor: 2387 return ResponseToFileExtractor(parameters=model.parameters or {}) 2388 2389 @staticmethod 2390 def create_exponential_backoff_strategy( 2391 model: ExponentialBackoffStrategyModel, config: Config 2392 ) -> ExponentialBackoffStrategy: 2393 return ExponentialBackoffStrategy( 2394 factor=model.factor or 5, parameters=model.parameters or {}, config=config 2395 ) 2396 2397 @staticmethod 2398 def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey: 2399 return GroupByKey(model.key, config=config, parameters=model.parameters or {}) 2400 2401 def create_http_requester( 2402 self, 2403 model: HttpRequesterModel, 2404 config: Config, 2405 decoder: Decoder = JsonDecoder(parameters={}), 2406 query_properties_key: Optional[str] = None, 2407 use_cache: Optional[bool] = None, 2408 *, 2409 name: str, 2410 ) -> HttpRequester: 2411 authenticator = ( 2412 self._create_component_from_model( 2413 model=model.authenticator, 2414 config=config, 2415 url_base=model.url or model.url_base, 2416 name=name, 2417 decoder=decoder, 2418 ) 2419 if model.authenticator 2420 else None 2421 ) 2422 error_handler = ( 2423 self._create_component_from_model(model=model.error_handler, config=config) 2424 if model.error_handler 2425 else DefaultErrorHandler( 2426 backoff_strategies=[], 2427 response_filters=[], 2428 config=config, 2429 parameters=model.parameters or {}, 2430 ) 2431 ) 2432 2433 api_budget = self._api_budget 2434 2435 request_options_provider = InterpolatedRequestOptionsProvider( 2436 request_body=model.request_body, 2437 request_body_data=model.request_body_data, 2438 request_body_json=model.request_body_json, 2439 request_headers=model.request_headers, 2440 request_parameters=model.request_parameters, # type: ignore # QueryProperties have been removed in `create_simple_retriever` 2441 query_properties_key=query_properties_key, 2442 config=config, 2443 parameters=model.parameters or {}, 2444 ) 2445 2446 assert model.use_cache is not None # for mypy 2447 assert model.http_method is not None # for mypy 2448 2449 should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache 2450 2451 return HttpRequester( 2452 name=name, 2453 url=model.url, 2454 url_base=model.url_base, 2455 path=model.path, 2456 authenticator=authenticator, 2457 error_handler=error_handler, 2458 api_budget=api_budget, 2459 http_method=HttpMethod[model.http_method.value], 2460 request_options_provider=request_options_provider, 2461 config=config, 2462 disable_retries=self._disable_retries, 2463 parameters=model.parameters or {}, 2464 message_repository=self._message_repository, 2465 use_cache=should_use_cache, 2466 decoder=decoder, 2467 stream_response=decoder.is_stream_response() if decoder else False, 2468 ) 2469 2470 @staticmethod 2471 def create_http_response_filter( 2472 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2473 ) -> HttpResponseFilter: 2474 if model.action: 2475 action = ResponseAction(model.action.value) 2476 else: 2477 action = None 2478 2479 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2480 2481 http_codes = ( 2482 set(model.http_codes) if model.http_codes else set() 2483 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2484 2485 return HttpResponseFilter( 2486 action=action, 2487 failure_type=failure_type, 2488 error_message=model.error_message or "", 2489 error_message_contains=model.error_message_contains or "", 2490 http_codes=http_codes, 2491 predicate=model.predicate or "", 2492 config=config, 2493 parameters=model.parameters or {}, 2494 ) 2495 2496 @staticmethod 2497 def create_inline_schema_loader( 2498 model: InlineSchemaLoaderModel, config: Config, **kwargs: Any 2499 ) -> InlineSchemaLoader: 2500 return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) 2501 2502 def create_complex_field_type( 2503 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2504 ) -> ComplexFieldType: 2505 items = ( 2506 self._create_component_from_model(model=model.items, config=config) 2507 if isinstance(model.items, ComplexFieldTypeModel) 2508 else model.items 2509 ) 2510 2511 return ComplexFieldType(field_type=model.field_type, items=items) 2512 2513 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2514 target_type = ( 2515 self._create_component_from_model(model=model.target_type, config=config) 2516 if isinstance(model.target_type, ComplexFieldTypeModel) 2517 else model.target_type 2518 ) 2519 2520 return TypesMap( 2521 target_type=target_type, 2522 current_type=model.current_type, 2523 condition=model.condition if model.condition is not None else "True", 2524 ) 2525 2526 def create_schema_type_identifier( 2527 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2528 ) -> SchemaTypeIdentifier: 2529 types_mapping = [] 2530 if model.types_mapping: 2531 types_mapping.extend( 2532 [ 2533 self._create_component_from_model(types_map, config=config) 2534 for types_map in model.types_mapping 2535 ] 2536 ) 2537 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2538 [x for x in model.schema_pointer] if model.schema_pointer else [] 2539 ) 2540 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2541 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2542 [x for x in model.type_pointer] if model.type_pointer else None 2543 ) 2544 2545 return SchemaTypeIdentifier( 2546 schema_pointer=model_schema_pointer, 2547 key_pointer=model_key_pointer, 2548 type_pointer=model_type_pointer, 2549 types_mapping=types_mapping, 2550 parameters=model.parameters or {}, 2551 ) 2552 2553 def create_dynamic_schema_loader( 2554 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2555 ) -> DynamicSchemaLoader: 2556 schema_transformations = [] 2557 if model.schema_transformations: 2558 for transformation_model in model.schema_transformations: 2559 schema_transformations.append( 2560 self._create_component_from_model(model=transformation_model, config=config) 2561 ) 2562 name = "dynamic_properties" 2563 retriever = self._create_component_from_model( 2564 model=model.retriever, 2565 config=config, 2566 name=name, 2567 primary_key=None, 2568 partition_router=self._build_stream_slicer_from_partition_router( 2569 model.retriever, config 2570 ), 2571 transformations=[], 2572 use_cache=True, 2573 log_formatter=( 2574 lambda response: format_http_message( 2575 response, 2576 f"Schema loader '{name}' request", 2577 f"Request performed in order to extract schema.", 2578 name, 2579 is_auxiliary=True, 2580 ) 2581 ), 2582 ) 2583 schema_type_identifier = self._create_component_from_model( 2584 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2585 ) 2586 schema_filter = ( 2587 self._create_component_from_model( 2588 model.schema_filter, config=config, parameters=model.parameters or {} 2589 ) 2590 if model.schema_filter is not None 2591 else None 2592 ) 2593 2594 return DynamicSchemaLoader( 2595 retriever=retriever, 2596 config=config, 2597 schema_transformations=schema_transformations, 2598 schema_filter=schema_filter, 2599 schema_type_identifier=schema_type_identifier, 2600 parameters=model.parameters or {}, 2601 ) 2602 2603 @staticmethod 2604 def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2605 return JsonDecoder(parameters={}) 2606 2607 def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2608 return CompositeRawDecoder( 2609 parser=ModelToComponentFactory._get_parser(model, config), 2610 stream_response=False if self._emit_connector_builder_messages else True, 2611 ) 2612 2613 def create_jsonl_decoder( 2614 self, model: JsonlDecoderModel, config: Config, **kwargs: Any 2615 ) -> Decoder: 2616 return CompositeRawDecoder( 2617 parser=ModelToComponentFactory._get_parser(model, config), 2618 stream_response=False if self._emit_connector_builder_messages else True, 2619 ) 2620 2621 def create_gzip_decoder( 2622 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2623 ) -> Decoder: 2624 _compressed_response_types = { 2625 "gzip", 2626 "x-gzip", 2627 "gzip, deflate", 2628 "x-gzip, deflate", 2629 "application/zip", 2630 "application/gzip", 2631 "application/x-gzip", 2632 "application/x-zip-compressed", 2633 } 2634 2635 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2636 2637 if self._emit_connector_builder_messages: 2638 # This is very surprising but if the response is not streamed, 2639 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2640 # which uses urllib3 directly and does not uncompress the data. 2641 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2642 2643 return CompositeRawDecoder.by_headers( 2644 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2645 stream_response=True, 2646 fallback_parser=gzip_parser.inner_parser, 2647 ) 2648 2649 @staticmethod 2650 def create_iterable_decoder( 2651 model: IterableDecoderModel, config: Config, **kwargs: Any 2652 ) -> IterableDecoder: 2653 return IterableDecoder(parameters={}) 2654 2655 @staticmethod 2656 def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder: 2657 return XmlDecoder(parameters={}) 2658 2659 def create_zipfile_decoder( 2660 self, model: ZipfileDecoderModel, config: Config, **kwargs: Any 2661 ) -> ZipfileDecoder: 2662 return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config)) 2663 2664 @staticmethod 2665 def _get_parser(model: BaseModel, config: Config) -> Parser: 2666 if isinstance(model, JsonDecoderModel): 2667 # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases 2668 return JsonParser() 2669 elif isinstance(model, JsonlDecoderModel): 2670 return JsonLineParser() 2671 elif isinstance(model, CsvDecoderModel): 2672 return CsvParser( 2673 encoding=model.encoding, 2674 delimiter=model.delimiter, 2675 set_values_to_none=model.set_values_to_none, 2676 ) 2677 elif isinstance(model, GzipDecoderModel): 2678 return GzipParser( 2679 inner_parser=ModelToComponentFactory._get_parser(model.decoder, config) 2680 ) 2681 elif isinstance( 2682 model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel) 2683 ): 2684 raise ValueError(f"Decoder type {model} does not have parser associated to it") 2685 2686 raise ValueError(f"Unknown decoder type {model}") 2687 2688 @staticmethod 2689 def create_json_file_schema_loader( 2690 model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any 2691 ) -> JsonFileSchemaLoader: 2692 return JsonFileSchemaLoader( 2693 file_path=model.file_path or "", config=config, parameters=model.parameters or {} 2694 ) 2695 2696 def create_jwt_authenticator( 2697 self, model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2698 ) -> JwtAuthenticator: 2699 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2700 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2701 request_option = ( 2702 self._create_component_from_model(model.request_option, config) 2703 if model.request_option 2704 else None 2705 ) 2706 return JwtAuthenticator( 2707 config=config, 2708 parameters=model.parameters or {}, 2709 algorithm=JwtAlgorithm(model.algorithm.value), 2710 secret_key=model.secret_key, 2711 base64_encode_secret_key=model.base64_encode_secret_key, 2712 token_duration=model.token_duration, 2713 header_prefix=model.header_prefix, 2714 kid=jwt_headers.kid, 2715 typ=jwt_headers.typ, 2716 cty=jwt_headers.cty, 2717 iss=jwt_payload.iss, 2718 sub=jwt_payload.sub, 2719 aud=jwt_payload.aud, 2720 additional_jwt_headers=model.additional_jwt_headers, 2721 additional_jwt_payload=model.additional_jwt_payload, 2722 passphrase=model.passphrase, 2723 request_option=request_option, 2724 ) 2725 2726 def create_list_partition_router( 2727 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2728 ) -> ListPartitionRouter: 2729 request_option = ( 2730 self._create_component_from_model(model.request_option, config) 2731 if model.request_option 2732 else None 2733 ) 2734 return ListPartitionRouter( 2735 cursor_field=model.cursor_field, 2736 request_option=request_option, 2737 values=model.values, 2738 config=config, 2739 parameters=model.parameters or {}, 2740 ) 2741 2742 @staticmethod 2743 def create_min_max_datetime( 2744 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2745 ) -> MinMaxDatetime: 2746 return MinMaxDatetime( 2747 datetime=model.datetime, 2748 datetime_format=model.datetime_format or "", 2749 max_datetime=model.max_datetime or "", 2750 min_datetime=model.min_datetime or "", 2751 parameters=model.parameters or {}, 2752 ) 2753 2754 @staticmethod 2755 def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: 2756 return NoAuth(parameters=model.parameters or {}) 2757 2758 @staticmethod 2759 def create_no_pagination( 2760 model: NoPaginationModel, config: Config, **kwargs: Any 2761 ) -> NoPagination: 2762 return NoPagination(parameters={}) 2763 2764 def create_oauth_authenticator( 2765 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2766 ) -> DeclarativeOauth2Authenticator: 2767 profile_assertion = ( 2768 self._create_component_from_model(model.profile_assertion, config=config) 2769 if model.profile_assertion 2770 else None 2771 ) 2772 2773 refresh_token_error_status_codes, refresh_token_error_key, refresh_token_error_values = ( 2774 self._get_refresh_token_error_information(model) 2775 ) 2776 if model.refresh_token_updater: 2777 # ignore type error because fixing it would have a lot of dependencies, revisit later 2778 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2779 config, 2780 InterpolatedString.create( 2781 model.token_refresh_endpoint, # type: ignore 2782 parameters=model.parameters or {}, 2783 ).eval(config), 2784 access_token_name=InterpolatedString.create( 2785 model.access_token_name or "access_token", parameters=model.parameters or {} 2786 ).eval(config), 2787 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2788 expires_in_name=InterpolatedString.create( 2789 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2790 ).eval(config), 2791 client_id_name=InterpolatedString.create( 2792 model.client_id_name or "client_id", parameters=model.parameters or {} 2793 ).eval(config), 2794 client_id=InterpolatedString.create( 2795 model.client_id, parameters=model.parameters or {} 2796 ).eval(config) 2797 if model.client_id 2798 else model.client_id, 2799 client_secret_name=InterpolatedString.create( 2800 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2801 ).eval(config), 2802 client_secret=InterpolatedString.create( 2803 model.client_secret, parameters=model.parameters or {} 2804 ).eval(config) 2805 if model.client_secret 2806 else model.client_secret, 2807 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2808 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2809 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2810 grant_type_name=InterpolatedString.create( 2811 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2812 ).eval(config), 2813 grant_type=InterpolatedString.create( 2814 model.grant_type or "refresh_token", parameters=model.parameters or {} 2815 ).eval(config), 2816 refresh_request_body=InterpolatedMapping( 2817 model.refresh_request_body or {}, parameters=model.parameters or {} 2818 ).eval(config), 2819 refresh_request_headers=InterpolatedMapping( 2820 model.refresh_request_headers or {}, parameters=model.parameters or {} 2821 ).eval(config), 2822 scopes=model.scopes, 2823 token_expiry_date_format=model.token_expiry_date_format, 2824 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2825 message_repository=self._message_repository, 2826 refresh_token_error_status_codes=refresh_token_error_status_codes, 2827 refresh_token_error_key=refresh_token_error_key, 2828 refresh_token_error_values=refresh_token_error_values, 2829 ) 2830 # ignore type error because fixing it would have a lot of dependencies, revisit later 2831 return DeclarativeOauth2Authenticator( # type: ignore 2832 access_token_name=model.access_token_name or "access_token", 2833 access_token_value=model.access_token_value, 2834 client_id_name=model.client_id_name or "client_id", 2835 client_id=model.client_id, 2836 client_secret_name=model.client_secret_name or "client_secret", 2837 client_secret=model.client_secret, 2838 expires_in_name=model.expires_in_name or "expires_in", 2839 grant_type_name=model.grant_type_name or "grant_type", 2840 grant_type=model.grant_type or "refresh_token", 2841 refresh_request_body=model.refresh_request_body, 2842 refresh_request_headers=model.refresh_request_headers, 2843 refresh_token_name=model.refresh_token_name or "refresh_token", 2844 refresh_token=model.refresh_token, 2845 scopes=model.scopes, 2846 token_expiry_date=model.token_expiry_date, 2847 token_expiry_date_format=model.token_expiry_date_format, 2848 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2849 token_refresh_endpoint=model.token_refresh_endpoint, 2850 config=config, 2851 parameters=model.parameters or {}, 2852 message_repository=self._message_repository, 2853 profile_assertion=profile_assertion, 2854 use_profile_assertion=model.use_profile_assertion, 2855 refresh_token_error_status_codes=refresh_token_error_status_codes, 2856 refresh_token_error_key=refresh_token_error_key, 2857 refresh_token_error_values=refresh_token_error_values, 2858 ) 2859 2860 @staticmethod 2861 def _get_refresh_token_error_information( 2862 model: OAuthAuthenticatorModel, 2863 ) -> Tuple[Tuple[int, ...], str, Tuple[str, ...]]: 2864 """ 2865 In a previous version of the CDK, the auth error as config_error was only done if a refresh token updater was 2866 defined. As a transition, we added those fields on the OAuthAuthenticatorModel. This method ensures that the 2867 information is defined only once and return the right fields. 2868 """ 2869 refresh_token_updater = model.refresh_token_updater 2870 is_defined_on_refresh_token_updated = refresh_token_updater and ( 2871 refresh_token_updater.refresh_token_error_status_codes 2872 or refresh_token_updater.refresh_token_error_key 2873 or refresh_token_updater.refresh_token_error_values 2874 ) 2875 is_defined_on_oauth_authenticator = ( 2876 model.refresh_token_error_status_codes 2877 or model.refresh_token_error_key 2878 or model.refresh_token_error_values 2879 ) 2880 if is_defined_on_refresh_token_updated and is_defined_on_oauth_authenticator: 2881 raise ValueError( 2882 "refresh_token_error should either be defined on the OAuthAuthenticatorModel or the RefreshTokenUpdaterModel, not both" 2883 ) 2884 2885 if is_defined_on_refresh_token_updated: 2886 not_optional_refresh_token_updater: RefreshTokenUpdaterModel = refresh_token_updater # type: ignore # we know from the condition that this is not None 2887 return ( 2888 tuple(not_optional_refresh_token_updater.refresh_token_error_status_codes) 2889 if not_optional_refresh_token_updater.refresh_token_error_status_codes 2890 else (), 2891 not_optional_refresh_token_updater.refresh_token_error_key or "", 2892 tuple(not_optional_refresh_token_updater.refresh_token_error_values) 2893 if not_optional_refresh_token_updater.refresh_token_error_values 2894 else (), 2895 ) 2896 elif is_defined_on_oauth_authenticator: 2897 return ( 2898 tuple(model.refresh_token_error_status_codes) 2899 if model.refresh_token_error_status_codes 2900 else (), 2901 model.refresh_token_error_key or "", 2902 tuple(model.refresh_token_error_values) if model.refresh_token_error_values else (), 2903 ) 2904 2905 # returning default values we think cover most cases 2906 return (400,), "error", ("invalid_grant", "invalid_permissions") 2907 2908 def create_offset_increment( 2909 self, 2910 model: OffsetIncrementModel, 2911 config: Config, 2912 decoder: Decoder, 2913 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2914 **kwargs: Any, 2915 ) -> OffsetIncrement: 2916 if isinstance(decoder, PaginationDecoderDecorator): 2917 inner_decoder = decoder.decoder 2918 else: 2919 inner_decoder = decoder 2920 decoder = PaginationDecoderDecorator(decoder=decoder) 2921 2922 if self._is_supported_decoder_for_pagination(inner_decoder): 2923 decoder_to_use = decoder 2924 else: 2925 raise ValueError( 2926 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2927 ) 2928 2929 # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever) 2930 # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the 2931 # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing 2932 # behavior by having two separate extractors with identical behavior since they use the same extractor model. 2933 # When we have more time to investigate we can look into reusing the same component. 2934 extractor = ( 2935 self._create_component_from_model( 2936 model=extractor_model, config=config, decoder=decoder_to_use 2937 ) 2938 if extractor_model 2939 else None 2940 ) 2941 2942 # Pydantic v1 Union type coercion can convert int to string depending on Union order. 2943 # If page_size is a string that represents an integer (not an interpolation), convert it back. 2944 page_size = model.page_size 2945 if isinstance(page_size, str) and page_size.isdigit(): 2946 page_size = int(page_size) 2947 2948 return OffsetIncrement( 2949 page_size=page_size, 2950 config=config, 2951 decoder=decoder_to_use, 2952 extractor=extractor, 2953 inject_on_first_request=model.inject_on_first_request or False, 2954 parameters=model.parameters or {}, 2955 ) 2956 2957 @staticmethod 2958 def create_page_increment( 2959 model: PageIncrementModel, config: Config, **kwargs: Any 2960 ) -> PageIncrement: 2961 # Pydantic v1 Union type coercion can convert int to string depending on Union order. 2962 # If page_size is a string that represents an integer (not an interpolation), convert it back. 2963 page_size = model.page_size 2964 if isinstance(page_size, str) and page_size.isdigit(): 2965 page_size = int(page_size) 2966 2967 return PageIncrement( 2968 page_size=page_size, 2969 config=config, 2970 start_from_page=model.start_from_page or 0, 2971 inject_on_first_request=model.inject_on_first_request or False, 2972 parameters=model.parameters or {}, 2973 ) 2974 2975 def create_parent_stream_config( 2976 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 2977 ) -> ParentStreamConfig: 2978 declarative_stream = self._create_component_from_model( 2979 model.stream, 2980 config=config, 2981 is_parent=True, 2982 **kwargs, 2983 ) 2984 request_option = ( 2985 self._create_component_from_model(model.request_option, config=config) 2986 if model.request_option 2987 else None 2988 ) 2989 2990 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2991 raise ValueError( 2992 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2993 ) 2994 2995 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2996 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2997 ) 2998 2999 return ParentStreamConfig( 3000 parent_key=model.parent_key, 3001 request_option=request_option, 3002 stream=declarative_stream, 3003 partition_field=model.partition_field, 3004 config=config, 3005 incremental_dependency=model.incremental_dependency or False, 3006 parameters=model.parameters or {}, 3007 extra_fields=model.extra_fields, 3008 lazy_read_pointer=model_lazy_read_pointer, 3009 ) 3010 3011 def create_properties_from_endpoint( 3012 self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any 3013 ) -> PropertiesFromEndpoint: 3014 retriever = self._create_component_from_model( 3015 model=model.retriever, 3016 config=config, 3017 name="dynamic_properties", 3018 primary_key=None, 3019 stream_slicer=None, 3020 transformations=[], 3021 use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different 3022 ) 3023 return PropertiesFromEndpoint( 3024 property_field_path=model.property_field_path, 3025 retriever=retriever, 3026 config=config, 3027 parameters=model.parameters or {}, 3028 ) 3029 3030 def create_property_chunking( 3031 self, model: PropertyChunkingModel, config: Config, **kwargs: Any 3032 ) -> PropertyChunking: 3033 record_merge_strategy = ( 3034 self._create_component_from_model( 3035 model=model.record_merge_strategy, config=config, **kwargs 3036 ) 3037 if model.record_merge_strategy 3038 else None 3039 ) 3040 3041 property_limit_type: PropertyLimitType 3042 match model.property_limit_type: 3043 case PropertyLimitTypeModel.property_count: 3044 property_limit_type = PropertyLimitType.property_count 3045 case PropertyLimitTypeModel.characters: 3046 property_limit_type = PropertyLimitType.characters 3047 case _: 3048 raise ValueError(f"Invalid PropertyLimitType {property_limit_type}") 3049 3050 return PropertyChunking( 3051 property_limit_type=property_limit_type, 3052 property_limit=model.property_limit, 3053 record_merge_strategy=record_merge_strategy, 3054 config=config, 3055 parameters=model.parameters or {}, 3056 ) 3057 3058 def create_query_properties( 3059 self, model: QueryPropertiesModel, config: Config, *, stream_name: str, **kwargs: Any 3060 ) -> QueryProperties: 3061 if isinstance(model.property_list, list): 3062 property_list = model.property_list 3063 else: 3064 property_list = self._create_component_from_model( 3065 model=model.property_list, config=config, **kwargs 3066 ) 3067 3068 property_chunking = ( 3069 self._create_component_from_model( 3070 model=model.property_chunking, config=config, **kwargs 3071 ) 3072 if model.property_chunking 3073 else None 3074 ) 3075 3076 property_selector = ( 3077 self._create_component_from_model( 3078 model=model.property_selector, config=config, stream_name=stream_name, **kwargs 3079 ) 3080 if model.property_selector 3081 else None 3082 ) 3083 3084 return QueryProperties( 3085 property_list=property_list, 3086 always_include_properties=model.always_include_properties, 3087 property_chunking=property_chunking, 3088 property_selector=property_selector, 3089 config=config, 3090 parameters=model.parameters or {}, 3091 ) 3092 3093 def create_json_schema_property_selector( 3094 self, 3095 model: JsonSchemaPropertySelectorModel, 3096 config: Config, 3097 *, 3098 stream_name: str, 3099 **kwargs: Any, 3100 ) -> JsonSchemaPropertySelector: 3101 configured_stream = self._stream_name_to_configured_stream.get(stream_name) 3102 3103 transformations = [] 3104 if model.transformations: 3105 for transformation_model in model.transformations: 3106 transformations.append( 3107 self._create_component_from_model(model=transformation_model, config=config) 3108 ) 3109 3110 return JsonSchemaPropertySelector( 3111 configured_stream=configured_stream, 3112 properties_transformations=transformations, 3113 config=config, 3114 parameters=model.parameters or {}, 3115 ) 3116 3117 @staticmethod 3118 def create_record_filter( 3119 model: RecordFilterModel, config: Config, **kwargs: Any 3120 ) -> RecordFilter: 3121 return RecordFilter( 3122 condition=model.condition or "", config=config, parameters=model.parameters or {} 3123 ) 3124 3125 @staticmethod 3126 def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: 3127 return RequestPath(parameters={}) 3128 3129 @staticmethod 3130 def create_request_option( 3131 model: RequestOptionModel, config: Config, **kwargs: Any 3132 ) -> RequestOption: 3133 inject_into = RequestOptionType(model.inject_into.value) 3134 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 3135 [ 3136 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 3137 for segment in model.field_path 3138 ] 3139 if model.field_path 3140 else None 3141 ) 3142 field_name = ( 3143 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 3144 if model.field_name 3145 else None 3146 ) 3147 return RequestOption( 3148 field_name=field_name, 3149 field_path=field_path, 3150 inject_into=inject_into, 3151 parameters=kwargs.get("parameters", {}), 3152 ) 3153 3154 def create_record_selector( 3155 self, 3156 model: RecordSelectorModel, 3157 config: Config, 3158 *, 3159 name: str, 3160 transformations: List[RecordTransformation] | None = None, 3161 decoder: Decoder | None = None, 3162 client_side_incremental_sync_cursor: Optional[Cursor] = None, 3163 file_uploader: Optional[DefaultFileUploader] = None, 3164 **kwargs: Any, 3165 ) -> RecordSelector: 3166 extractor = self._create_component_from_model( 3167 model=model.extractor, decoder=decoder, config=config 3168 ) 3169 record_filter = ( 3170 self._create_component_from_model(model.record_filter, config=config) 3171 if model.record_filter 3172 else None 3173 ) 3174 3175 transform_before_filtering = ( 3176 False if model.transform_before_filtering is None else model.transform_before_filtering 3177 ) 3178 if client_side_incremental_sync_cursor: 3179 record_filter = ClientSideIncrementalRecordFilterDecorator( 3180 config=config, 3181 parameters=model.parameters, 3182 condition=model.record_filter.condition 3183 if (model.record_filter and hasattr(model.record_filter, "condition")) 3184 else None, 3185 cursor=client_side_incremental_sync_cursor, 3186 ) 3187 transform_before_filtering = ( 3188 True 3189 if model.transform_before_filtering is None 3190 else model.transform_before_filtering 3191 ) 3192 3193 if model.schema_normalization is None: 3194 # default to no schema normalization if not set 3195 model.schema_normalization = SchemaNormalizationModel.None_ 3196 3197 schema_normalization = ( 3198 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 3199 if isinstance(model.schema_normalization, SchemaNormalizationModel) 3200 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 3201 ) 3202 3203 return RecordSelector( 3204 extractor=extractor, 3205 name=name, 3206 config=config, 3207 record_filter=record_filter, 3208 transformations=transformations or [], 3209 file_uploader=file_uploader, 3210 schema_normalization=schema_normalization, 3211 parameters=model.parameters or {}, 3212 transform_before_filtering=transform_before_filtering, 3213 ) 3214 3215 @staticmethod 3216 def create_remove_fields( 3217 model: RemoveFieldsModel, config: Config, **kwargs: Any 3218 ) -> RemoveFields: 3219 return RemoveFields( 3220 field_pointers=model.field_pointers, condition=model.condition or "", parameters={} 3221 ) 3222 3223 def create_selective_authenticator( 3224 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 3225 ) -> DeclarativeAuthenticator: 3226 authenticators = { 3227 name: self._create_component_from_model(model=auth, config=config) 3228 for name, auth in model.authenticators.items() 3229 } 3230 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 3231 return SelectiveAuthenticator( # type: ignore[abstract] 3232 config=config, 3233 authenticators=authenticators, 3234 authenticator_selection_path=model.authenticator_selection_path, 3235 **kwargs, 3236 ) 3237 3238 @staticmethod 3239 def create_legacy_session_token_authenticator( 3240 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 3241 ) -> LegacySessionTokenAuthenticator: 3242 return LegacySessionTokenAuthenticator( 3243 api_url=url_base, 3244 header=model.header, 3245 login_url=model.login_url, 3246 password=model.password or "", 3247 session_token=model.session_token or "", 3248 session_token_response_key=model.session_token_response_key or "", 3249 username=model.username or "", 3250 validate_session_url=model.validate_session_url, 3251 config=config, 3252 parameters=model.parameters or {}, 3253 ) 3254 3255 def create_simple_retriever( 3256 self, 3257 model: SimpleRetrieverModel, 3258 config: Config, 3259 *, 3260 name: str, 3261 primary_key: Optional[Union[str, List[str], List[List[str]]]], 3262 request_options_provider: Optional[RequestOptionsProvider] = None, 3263 cursor: Optional[Cursor] = None, 3264 has_stop_condition_cursor: bool = False, 3265 is_client_side_incremental_sync: bool = False, 3266 transformations: List[RecordTransformation], 3267 file_uploader: Optional[DefaultFileUploader] = None, 3268 incremental_sync: Optional[ 3269 Union[IncrementingCountCursorModel, DatetimeBasedCursorModel] 3270 ] = None, 3271 use_cache: Optional[bool] = None, 3272 log_formatter: Optional[Callable[[Response], Any]] = None, 3273 partition_router: Optional[PartitionRouter] = None, 3274 **kwargs: Any, 3275 ) -> SimpleRetriever: 3276 def _get_url(req: Requester) -> str: 3277 """ 3278 Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever. 3279 This is needed because the URL is not set until the requester is created. 3280 """ 3281 3282 _url: str = ( 3283 model.requester.url 3284 if hasattr(model.requester, "url") and model.requester.url is not None 3285 else req.get_url(stream_state=None, stream_slice=None, next_page_token=None) 3286 ) 3287 _url_base: str = ( 3288 model.requester.url_base 3289 if hasattr(model.requester, "url_base") and model.requester.url_base is not None 3290 else req.get_url_base(stream_state=None, stream_slice=None, next_page_token=None) 3291 ) 3292 3293 return _url or _url_base 3294 3295 if cursor is None: 3296 cursor = FinalStateCursor(name, None, self._message_repository) 3297 3298 decoder = ( 3299 self._create_component_from_model(model=model.decoder, config=config) 3300 if model.decoder 3301 else JsonDecoder(parameters={}) 3302 ) 3303 record_selector = self._create_component_from_model( 3304 model=model.record_selector, 3305 name=name, 3306 config=config, 3307 decoder=decoder, 3308 transformations=transformations, 3309 client_side_incremental_sync_cursor=cursor if is_client_side_incremental_sync else None, 3310 file_uploader=file_uploader, 3311 ) 3312 3313 query_properties: Optional[QueryProperties] = None 3314 query_properties_key: Optional[str] = None 3315 self._ensure_query_properties_to_model(model.requester) 3316 if self._has_query_properties_in_request_parameters(model.requester): 3317 # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple 3318 # places instead of default to request_parameters which isn't clearly documented 3319 if ( 3320 hasattr(model.requester, "fetch_properties_from_endpoint") 3321 and model.requester.fetch_properties_from_endpoint 3322 ): 3323 raise ValueError( 3324 f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters" 3325 ) 3326 3327 query_properties_definitions = [] 3328 for key, request_parameter in model.requester.request_parameters.items(): # type: ignore # request_parameters is already validated to be a Mapping using _has_query_properties_in_request_parameters() 3329 if isinstance(request_parameter, QueryPropertiesModel): 3330 query_properties_key = key 3331 query_properties_definitions.append(request_parameter) 3332 3333 if len(query_properties_definitions) > 1: 3334 raise ValueError( 3335 f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages" 3336 ) 3337 3338 if len(query_properties_definitions) == 1: 3339 query_properties = self._create_component_from_model( 3340 model=query_properties_definitions[0], stream_name=name, config=config 3341 ) 3342 3343 # Removes QueryProperties components from the interpolated mappings because it has been designed 3344 # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly 3345 # instead of through jinja interpolation 3346 if hasattr(model.requester, "request_parameters") and isinstance( 3347 model.requester.request_parameters, Mapping 3348 ): 3349 model.requester.request_parameters = self._remove_query_properties( 3350 model.requester.request_parameters 3351 ) 3352 elif ( 3353 hasattr(model.requester, "fetch_properties_from_endpoint") 3354 and model.requester.fetch_properties_from_endpoint 3355 ): 3356 # todo: Deprecate this condition once dependent connectors migrate to query_properties 3357 query_properties_definition = QueryPropertiesModel( 3358 type="QueryProperties", 3359 property_list=model.requester.fetch_properties_from_endpoint, 3360 always_include_properties=None, 3361 property_chunking=None, 3362 ) # type: ignore # $parameters has a default value 3363 3364 query_properties = self.create_query_properties( 3365 model=query_properties_definition, 3366 stream_name=name, 3367 config=config, 3368 ) 3369 elif hasattr(model.requester, "query_properties") and model.requester.query_properties: 3370 query_properties = self.create_query_properties( 3371 model=model.requester.query_properties, 3372 stream_name=name, 3373 config=config, 3374 ) 3375 3376 requester = self._create_component_from_model( 3377 model=model.requester, 3378 decoder=decoder, 3379 name=name, 3380 query_properties_key=query_properties_key, 3381 use_cache=use_cache, 3382 config=config, 3383 ) 3384 3385 if not request_options_provider: 3386 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 3387 if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance( 3388 partition_router, PartitionRouter 3389 ): 3390 request_options_provider = partition_router 3391 3392 paginator = ( 3393 self._create_component_from_model( 3394 model=model.paginator, 3395 config=config, 3396 url_base=_get_url(requester), 3397 extractor_model=model.record_selector.extractor, 3398 decoder=decoder, 3399 cursor_used_for_stop_condition=cursor if has_stop_condition_cursor else None, 3400 ) 3401 if model.paginator 3402 else NoPagination(parameters={}) 3403 ) 3404 3405 ignore_stream_slicer_parameters_on_paginated_requests = ( 3406 model.ignore_stream_slicer_parameters_on_paginated_requests or False 3407 ) 3408 3409 if ( 3410 model.partition_router 3411 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 3412 and not bool(self._connector_state_manager.get_stream_state(name, None)) 3413 and any( 3414 parent_stream_config.lazy_read_pointer 3415 for parent_stream_config in model.partition_router.parent_stream_configs 3416 ) 3417 ): 3418 if incremental_sync: 3419 if incremental_sync.type != "DatetimeBasedCursor": 3420 raise ValueError( 3421 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 3422 ) 3423 3424 elif incremental_sync.step or incremental_sync.cursor_granularity: 3425 raise ValueError( 3426 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 3427 ) 3428 3429 if model.decoder and model.decoder.type != "JsonDecoder": 3430 raise ValueError( 3431 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 3432 ) 3433 3434 return LazySimpleRetriever( 3435 name=name, 3436 paginator=paginator, 3437 primary_key=primary_key, 3438 requester=requester, 3439 record_selector=record_selector, 3440 stream_slicer=_NO_STREAM_SLICING, 3441 request_option_provider=request_options_provider, 3442 config=config, 3443 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3444 parameters=model.parameters or {}, 3445 ) 3446 3447 if ( 3448 model.record_selector.record_filter 3449 and model.pagination_reset 3450 and model.pagination_reset.limits 3451 ): 3452 raise ValueError("PaginationResetLimits are not supported while having record filter.") 3453 3454 return SimpleRetriever( 3455 name=name, 3456 paginator=paginator, 3457 primary_key=primary_key, 3458 requester=requester, 3459 record_selector=record_selector, 3460 stream_slicer=_NO_STREAM_SLICING, 3461 request_option_provider=request_options_provider, 3462 config=config, 3463 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3464 additional_query_properties=query_properties, 3465 log_formatter=self._get_log_formatter(log_formatter, name), 3466 pagination_tracker_factory=self._create_pagination_tracker_factory( 3467 model.pagination_reset, cursor 3468 ), 3469 parameters=model.parameters or {}, 3470 ) 3471 3472 def _create_pagination_tracker_factory( 3473 self, model: Optional[PaginationResetModel], cursor: Cursor 3474 ) -> Callable[[], PaginationTracker]: 3475 if model is None: 3476 return lambda: PaginationTracker() 3477 3478 # Until we figure out a way to use any cursor for PaginationTracker, we will have to have this cursor selector logic 3479 cursor_factory: Callable[[], Optional[ConcurrentCursor]] = lambda: None 3480 if model.action == PaginationResetActionModel.RESET: 3481 # in that case, we will let cursor_factory to return None even if the stream has a cursor 3482 pass 3483 elif model.action == PaginationResetActionModel.SPLIT_USING_CURSOR: 3484 if isinstance(cursor, ConcurrentCursor): 3485 cursor_factory = lambda: cursor.copy_without_state() # type: ignore # the if condition validates that it is a ConcurrentCursor 3486 elif isinstance(cursor, ConcurrentPerPartitionCursor): 3487 cursor_factory = lambda: cursor._cursor_factory.create( # type: ignore # if this becomes a problem, we would need to extract the cursor_factory instantiation logic and make it accessible here 3488 {}, datetime.timedelta(0) 3489 ) 3490 elif not isinstance(cursor, FinalStateCursor): 3491 LOGGER.warning( 3492 "Unknown cursor for PaginationTracker. Pagination resets might not work properly" 3493 ) 3494 else: 3495 raise ValueError(f"Unknown PaginationReset action: {model.action}") 3496 3497 limit = model.limits.number_of_records if model and model.limits else None 3498 return lambda: PaginationTracker(cursor_factory(), limit) 3499 3500 def _get_log_formatter( 3501 self, log_formatter: Callable[[Response], Any] | None, name: str 3502 ) -> Callable[[Response], Any] | None: 3503 if self._should_limit_slices_fetched(): 3504 return ( 3505 ( 3506 lambda response: format_http_message( 3507 response, 3508 f"Stream '{name}' request", 3509 f"Request performed in order to extract records for stream '{name}'", 3510 name, 3511 ) 3512 ) 3513 if not log_formatter 3514 else log_formatter 3515 ) 3516 return None 3517 3518 def _should_limit_slices_fetched(self) -> bool: 3519 """ 3520 Returns True if the number of slices fetched should be limited, False otherwise. 3521 This is used to limit the number of slices fetched during tests. 3522 """ 3523 return bool(self._limit_slices_fetched or self._emit_connector_builder_messages) 3524 3525 @staticmethod 3526 def _has_query_properties_in_request_parameters( 3527 requester: Union[HttpRequesterModel, CustomRequesterModel], 3528 ) -> bool: 3529 if not hasattr(requester, "request_parameters"): 3530 return False 3531 request_parameters = requester.request_parameters 3532 if request_parameters and isinstance(request_parameters, Mapping): 3533 for request_parameter in request_parameters.values(): 3534 if isinstance(request_parameter, QueryPropertiesModel): 3535 return True 3536 return False 3537 3538 @staticmethod 3539 def _remove_query_properties( 3540 request_parameters: Mapping[str, Union[str, QueryPropertiesModel]], 3541 ) -> Mapping[str, str]: 3542 return { 3543 parameter_field: request_parameter 3544 for parameter_field, request_parameter in request_parameters.items() 3545 if not isinstance(request_parameter, QueryPropertiesModel) 3546 } 3547 3548 def create_state_delegating_stream( 3549 self, 3550 model: StateDelegatingStreamModel, 3551 config: Config, 3552 has_parent_state: Optional[bool] = None, 3553 **kwargs: Any, 3554 ) -> DefaultStream: 3555 if ( 3556 model.full_refresh_stream.name != model.name 3557 or model.name != model.incremental_stream.name 3558 ): 3559 raise ValueError( 3560 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 3561 ) 3562 3563 stream_model = self._get_state_delegating_stream_model( 3564 False if has_parent_state is None else has_parent_state, model 3565 ) 3566 3567 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel 3568 3569 def _get_state_delegating_stream_model( 3570 self, has_parent_state: bool, model: StateDelegatingStreamModel 3571 ) -> DeclarativeStreamModel: 3572 return ( 3573 model.incremental_stream 3574 if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state 3575 else model.full_refresh_stream 3576 ) 3577 3578 def _create_async_job_status_mapping( 3579 self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any 3580 ) -> Mapping[str, AsyncJobStatus]: 3581 api_status_to_cdk_status = {} 3582 for cdk_status, api_statuses in model.dict().items(): 3583 if cdk_status == "type": 3584 # This is an element of the dict because of the typing of the CDK but it is not a CDK status 3585 continue 3586 3587 for status in api_statuses: 3588 if status in api_status_to_cdk_status: 3589 raise ValueError( 3590 f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once" 3591 ) 3592 api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status) 3593 return api_status_to_cdk_status 3594 3595 def _get_async_job_status(self, status: str) -> AsyncJobStatus: 3596 match status: 3597 case "running": 3598 return AsyncJobStatus.RUNNING 3599 case "completed": 3600 return AsyncJobStatus.COMPLETED 3601 case "failed": 3602 return AsyncJobStatus.FAILED 3603 case "timeout": 3604 return AsyncJobStatus.TIMED_OUT 3605 case _: 3606 raise ValueError(f"Unsupported CDK status {status}") 3607 3608 def create_async_retriever( 3609 self, 3610 model: AsyncRetrieverModel, 3611 config: Config, 3612 *, 3613 name: str, 3614 primary_key: Optional[ 3615 Union[str, List[str], List[List[str]]] 3616 ], # this seems to be needed to match create_simple_retriever 3617 stream_slicer: Optional[StreamSlicer], 3618 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3619 transformations: List[RecordTransformation], 3620 **kwargs: Any, 3621 ) -> AsyncRetriever: 3622 if model.download_target_requester and not model.download_target_extractor: 3623 raise ValueError( 3624 f"`download_target_extractor` required if using a `download_target_requester`" 3625 ) 3626 3627 def _get_download_retriever( 3628 requester: Requester, extractor: RecordExtractor, _decoder: Decoder 3629 ) -> SimpleRetriever: 3630 # We create a record selector for the download retriever 3631 # with no schema normalization and no transformations, neither record filter 3632 # as all this occurs in the record_selector of the AsyncRetriever 3633 record_selector = RecordSelector( 3634 extractor=extractor, 3635 name=name, 3636 record_filter=None, 3637 transformations=[], 3638 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 3639 config=config, 3640 parameters={}, 3641 ) 3642 paginator = ( 3643 self._create_component_from_model( 3644 model=model.download_paginator, 3645 decoder=_decoder, 3646 config=config, 3647 url_base="", 3648 ) 3649 if model.download_paginator 3650 else NoPagination(parameters={}) 3651 ) 3652 3653 return SimpleRetriever( 3654 requester=requester, 3655 record_selector=record_selector, 3656 primary_key=None, 3657 name=name, 3658 paginator=paginator, 3659 config=config, 3660 parameters={}, 3661 log_formatter=self._get_log_formatter(None, name), 3662 ) 3663 3664 def _get_job_timeout() -> datetime.timedelta: 3665 user_defined_timeout: Optional[int] = ( 3666 int( 3667 InterpolatedString.create( 3668 str(model.polling_job_timeout), 3669 parameters={}, 3670 ).eval(config) 3671 ) 3672 if model.polling_job_timeout 3673 else None 3674 ) 3675 3676 # check for user defined timeout during the test read or 15 minutes 3677 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 3678 # default value for non-connector builder is 60 minutes. 3679 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 3680 3681 return ( 3682 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 3683 ) 3684 3685 decoder = ( 3686 self._create_component_from_model(model=model.decoder, config=config) 3687 if model.decoder 3688 else JsonDecoder(parameters={}) 3689 ) 3690 record_selector = self._create_component_from_model( 3691 model=model.record_selector, 3692 config=config, 3693 decoder=decoder, 3694 name=name, 3695 transformations=transformations, 3696 client_side_incremental_sync=client_side_incremental_sync, 3697 ) 3698 3699 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 3700 if self._should_limit_slices_fetched(): 3701 stream_slicer = cast( 3702 StreamSlicer, 3703 StreamSlicerTestReadDecorator( 3704 wrapped_slicer=stream_slicer, 3705 maximum_number_of_slices=self._limit_slices_fetched or 5, 3706 ), 3707 ) 3708 3709 creation_requester = self._create_component_from_model( 3710 model=model.creation_requester, 3711 decoder=decoder, 3712 config=config, 3713 name=f"job creation - {name}", 3714 ) 3715 polling_requester = self._create_component_from_model( 3716 model=model.polling_requester, 3717 decoder=decoder, 3718 config=config, 3719 name=f"job polling - {name}", 3720 ) 3721 job_download_components_name = f"job download - {name}" 3722 download_decoder = ( 3723 self._create_component_from_model(model=model.download_decoder, config=config) 3724 if model.download_decoder 3725 else JsonDecoder(parameters={}) 3726 ) 3727 download_extractor = ( 3728 self._create_component_from_model( 3729 model=model.download_extractor, 3730 config=config, 3731 decoder=download_decoder, 3732 parameters=model.parameters, 3733 ) 3734 if model.download_extractor 3735 else DpathExtractor( 3736 [], 3737 config=config, 3738 decoder=download_decoder, 3739 parameters=model.parameters or {}, 3740 ) 3741 ) 3742 download_requester = self._create_component_from_model( 3743 model=model.download_requester, 3744 decoder=download_decoder, 3745 config=config, 3746 name=job_download_components_name, 3747 ) 3748 download_retriever = _get_download_retriever( 3749 download_requester, download_extractor, download_decoder 3750 ) 3751 abort_requester = ( 3752 self._create_component_from_model( 3753 model=model.abort_requester, 3754 decoder=decoder, 3755 config=config, 3756 name=f"job abort - {name}", 3757 ) 3758 if model.abort_requester 3759 else None 3760 ) 3761 delete_requester = ( 3762 self._create_component_from_model( 3763 model=model.delete_requester, 3764 decoder=decoder, 3765 config=config, 3766 name=f"job delete - {name}", 3767 ) 3768 if model.delete_requester 3769 else None 3770 ) 3771 download_target_requester = ( 3772 self._create_component_from_model( 3773 model=model.download_target_requester, 3774 decoder=decoder, 3775 config=config, 3776 name=f"job extract_url - {name}", 3777 ) 3778 if model.download_target_requester 3779 else None 3780 ) 3781 status_extractor = self._create_component_from_model( 3782 model=model.status_extractor, decoder=decoder, config=config, name=name 3783 ) 3784 download_target_extractor = ( 3785 self._create_component_from_model( 3786 model=model.download_target_extractor, 3787 decoder=decoder, 3788 config=config, 3789 name=name, 3790 ) 3791 if model.download_target_extractor 3792 else None 3793 ) 3794 3795 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3796 creation_requester=creation_requester, 3797 polling_requester=polling_requester, 3798 download_retriever=download_retriever, 3799 download_target_requester=download_target_requester, 3800 abort_requester=abort_requester, 3801 delete_requester=delete_requester, 3802 status_extractor=status_extractor, 3803 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3804 download_target_extractor=download_target_extractor, 3805 job_timeout=_get_job_timeout(), 3806 ) 3807 3808 async_job_partition_router = AsyncJobPartitionRouter( 3809 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3810 job_repository, 3811 stream_slices, 3812 self._job_tracker, 3813 self._message_repository, 3814 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3815 has_bulk_parent=False, 3816 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3817 # `None` == default retry is set to 3 attempts, under the hood. 3818 job_max_retry=1 if self._emit_connector_builder_messages else None, 3819 ), 3820 stream_slicer=stream_slicer, 3821 config=config, 3822 parameters=model.parameters or {}, 3823 ) 3824 3825 return AsyncRetriever( 3826 record_selector=record_selector, 3827 stream_slicer=async_job_partition_router, 3828 config=config, 3829 parameters=model.parameters or {}, 3830 ) 3831 3832 def create_spec(self, model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3833 config_migrations = [ 3834 self._create_component_from_model(migration, config) 3835 for migration in ( 3836 model.config_normalization_rules.config_migrations 3837 if ( 3838 model.config_normalization_rules 3839 and model.config_normalization_rules.config_migrations 3840 ) 3841 else [] 3842 ) 3843 ] 3844 config_transformations = [ 3845 self._create_component_from_model(transformation, config) 3846 for transformation in ( 3847 model.config_normalization_rules.transformations 3848 if ( 3849 model.config_normalization_rules 3850 and model.config_normalization_rules.transformations 3851 ) 3852 else [] 3853 ) 3854 ] 3855 config_validations = [ 3856 self._create_component_from_model(validation, config) 3857 for validation in ( 3858 model.config_normalization_rules.validations 3859 if ( 3860 model.config_normalization_rules 3861 and model.config_normalization_rules.validations 3862 ) 3863 else [] 3864 ) 3865 ] 3866 3867 return Spec( 3868 connection_specification=model.connection_specification, 3869 documentation_url=model.documentation_url, 3870 advanced_auth=model.advanced_auth, 3871 parameters={}, 3872 config_migrations=config_migrations, 3873 config_transformations=config_transformations, 3874 config_validations=config_validations, 3875 ) 3876 3877 def create_substream_partition_router( 3878 self, 3879 model: SubstreamPartitionRouterModel, 3880 config: Config, 3881 *, 3882 stream_name: str, 3883 **kwargs: Any, 3884 ) -> SubstreamPartitionRouter: 3885 parent_stream_configs = [] 3886 if model.parent_stream_configs: 3887 parent_stream_configs.extend( 3888 [ 3889 self.create_parent_stream_config_with_substream_wrapper( 3890 model=parent_stream_config, config=config, stream_name=stream_name, **kwargs 3891 ) 3892 for parent_stream_config in model.parent_stream_configs 3893 ] 3894 ) 3895 3896 return SubstreamPartitionRouter( 3897 parent_stream_configs=parent_stream_configs, 3898 parameters=model.parameters or {}, 3899 config=config, 3900 ) 3901 3902 def create_parent_stream_config_with_substream_wrapper( 3903 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 3904 ) -> Any: 3905 # getting the parent state 3906 child_state = self._connector_state_manager.get_stream_state(stream_name, None) 3907 3908 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3909 has_parent_state = bool( 3910 self._connector_state_manager.get_stream_state(stream_name, None) 3911 if model.incremental_dependency 3912 else False 3913 ) 3914 connector_state_manager = self._instantiate_parent_stream_state_manager( 3915 child_state, config, model, has_parent_state 3916 ) 3917 3918 substream_factory = ModelToComponentFactory( 3919 connector_state_manager=connector_state_manager, 3920 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3921 limit_slices_fetched=self._limit_slices_fetched, 3922 emit_connector_builder_messages=self._emit_connector_builder_messages, 3923 disable_retries=self._disable_retries, 3924 disable_cache=self._disable_cache, 3925 message_repository=StateFilteringMessageRepository( 3926 LogAppenderMessageRepositoryDecorator( 3927 { 3928 "airbyte_cdk": {"stream": {"is_substream": True}}, 3929 "http": {"is_auxiliary": True}, 3930 }, 3931 self._message_repository, 3932 self._evaluate_log_level(self._emit_connector_builder_messages), 3933 ), 3934 ), 3935 api_budget=self._api_budget, 3936 ) 3937 3938 return substream_factory.create_parent_stream_config( 3939 model=model, config=config, stream_name=stream_name, **kwargs 3940 ) 3941 3942 def _instantiate_parent_stream_state_manager( 3943 self, 3944 child_state: MutableMapping[str, Any], 3945 config: Config, 3946 model: ParentStreamConfigModel, 3947 has_parent_state: bool, 3948 ) -> ConnectorStateManager: 3949 """ 3950 With DefaultStream, the state needs to be provided during __init__ of the cursor as opposed to the 3951 `set_initial_state` flow that existed for the declarative cursors. This state is taken from 3952 self._connector_state_manager.get_stream_state (`self` being a newly created ModelToComponentFactory to account 3953 for the MessageRepository being different). So we need to pass a ConnectorStateManager to the 3954 ModelToComponentFactory that has the parent states. This method populates this if there is a child state and if 3955 incremental_dependency is set. 3956 """ 3957 if model.incremental_dependency and child_state: 3958 parent_stream_name = model.stream.name or "" 3959 parent_state = ConcurrentPerPartitionCursor.get_parent_state( 3960 child_state, parent_stream_name 3961 ) 3962 3963 if not parent_state: 3964 # there are two migration cases: state value from child stream or from global state 3965 parent_state = ConcurrentPerPartitionCursor.get_global_state( 3966 child_state, parent_stream_name 3967 ) 3968 3969 if not parent_state and not isinstance(parent_state, dict): 3970 cursor_values = child_state.values() 3971 if cursor_values and len(cursor_values) == 1: 3972 # We assume the child state is a pair `{<cursor_field>: <cursor_value>}` and we will use the 3973 # cursor value as a parent state. 3974 incremental_sync_model: Union[ 3975 DatetimeBasedCursorModel, 3976 IncrementingCountCursorModel, 3977 ] = ( 3978 model.stream.incremental_sync # type: ignore # if we are there, it is because there is incremental_dependency and therefore there is an incremental_sync on the parent stream 3979 if isinstance(model.stream, DeclarativeStreamModel) 3980 else self._get_state_delegating_stream_model( 3981 has_parent_state, model.stream 3982 ).incremental_sync 3983 ) 3984 cursor_field = InterpolatedString.create( 3985 incremental_sync_model.cursor_field, 3986 parameters=incremental_sync_model.parameters or {}, 3987 ).eval(config) 3988 parent_state = AirbyteStateMessage( 3989 type=AirbyteStateType.STREAM, 3990 stream=AirbyteStreamState( 3991 stream_descriptor=StreamDescriptor( 3992 name=parent_stream_name, namespace=None 3993 ), 3994 stream_state=AirbyteStateBlob( 3995 {cursor_field: list(cursor_values)[0]} 3996 ), 3997 ), 3998 ) 3999 return ConnectorStateManager([parent_state] if parent_state else []) 4000 4001 return ConnectorStateManager([]) 4002 4003 @staticmethod 4004 def create_wait_time_from_header( 4005 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 4006 ) -> WaitTimeFromHeaderBackoffStrategy: 4007 return WaitTimeFromHeaderBackoffStrategy( 4008 header=model.header, 4009 parameters=model.parameters or {}, 4010 config=config, 4011 regex=model.regex, 4012 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 4013 if model.max_waiting_time_in_seconds is not None 4014 else None, 4015 ) 4016 4017 @staticmethod 4018 def create_wait_until_time_from_header( 4019 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 4020 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 4021 return WaitUntilTimeFromHeaderBackoffStrategy( 4022 header=model.header, 4023 parameters=model.parameters or {}, 4024 config=config, 4025 min_wait=model.min_wait, 4026 regex=model.regex, 4027 ) 4028 4029 def get_message_repository(self) -> MessageRepository: 4030 return self._message_repository 4031 4032 def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: 4033 return Level.DEBUG if emit_connector_builder_messages else Level.INFO 4034 4035 @staticmethod 4036 def create_components_mapping_definition( 4037 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 4038 ) -> ComponentMappingDefinition: 4039 interpolated_value = InterpolatedString.create( 4040 model.value, parameters=model.parameters or {} 4041 ) 4042 field_path = [ 4043 InterpolatedString.create(path, parameters=model.parameters or {}) 4044 for path in model.field_path 4045 ] 4046 return ComponentMappingDefinition( 4047 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 4048 value=interpolated_value, 4049 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 4050 create_or_update=model.create_or_update, 4051 condition=model.condition, 4052 parameters=model.parameters or {}, 4053 ) 4054 4055 def create_http_components_resolver( 4056 self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None 4057 ) -> Any: 4058 retriever = self._create_component_from_model( 4059 model=model.retriever, 4060 config=config, 4061 name=f"{stream_name if stream_name else '__http_components_resolver'}", 4062 primary_key=None, 4063 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 4064 transformations=[], 4065 ) 4066 4067 components_mapping = [] 4068 for component_mapping_definition_model in model.components_mapping: 4069 if component_mapping_definition_model.condition: 4070 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 4071 components_mapping.append( 4072 self._create_component_from_model( 4073 model=component_mapping_definition_model, 4074 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4075 component_mapping_definition_model.value_type 4076 ), 4077 config=config, 4078 ) 4079 ) 4080 4081 return HttpComponentsResolver( 4082 retriever=retriever, 4083 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 4084 config=config, 4085 components_mapping=components_mapping, 4086 parameters=model.parameters or {}, 4087 ) 4088 4089 @staticmethod 4090 def create_stream_config( 4091 model: StreamConfigModel, config: Config, **kwargs: Any 4092 ) -> StreamConfig: 4093 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 4094 [x for x in model.configs_pointer] if model.configs_pointer else [] 4095 ) 4096 4097 return StreamConfig( 4098 configs_pointer=model_configs_pointer, 4099 default_values=model.default_values, 4100 parameters=model.parameters or {}, 4101 ) 4102 4103 def create_config_components_resolver( 4104 self, 4105 model: ConfigComponentsResolverModel, 4106 config: Config, 4107 ) -> Any: 4108 model_stream_configs = ( 4109 model.stream_config if isinstance(model.stream_config, list) else [model.stream_config] 4110 ) 4111 4112 stream_configs = [ 4113 self._create_component_from_model( 4114 stream_config, config=config, parameters=model.parameters or {} 4115 ) 4116 for stream_config in model_stream_configs 4117 ] 4118 4119 components_mapping = [ 4120 self._create_component_from_model( 4121 model=components_mapping_definition_model, 4122 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4123 components_mapping_definition_model.value_type 4124 ), 4125 config=config, 4126 parameters=model.parameters, 4127 ) 4128 for components_mapping_definition_model in model.components_mapping 4129 ] 4130 4131 return ConfigComponentsResolver( 4132 stream_configs=stream_configs, 4133 config=config, 4134 components_mapping=components_mapping, 4135 parameters=model.parameters or {}, 4136 ) 4137 4138 def create_parametrized_components_resolver( 4139 self, 4140 model: ParametrizedComponentsResolverModel, 4141 config: Config, 4142 ) -> ParametrizedComponentsResolver: 4143 stream_parameters = StreamParametersDefinition( 4144 list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream 4145 ) 4146 4147 components_mapping = [] 4148 for components_mapping_definition_model in model.components_mapping: 4149 if components_mapping_definition_model.condition: 4150 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 4151 components_mapping.append( 4152 self._create_component_from_model( 4153 model=components_mapping_definition_model, 4154 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4155 components_mapping_definition_model.value_type 4156 ), 4157 config=config, 4158 ) 4159 ) 4160 return ParametrizedComponentsResolver( 4161 stream_parameters=stream_parameters, 4162 config=config, 4163 components_mapping=components_mapping, 4164 parameters=model.parameters or {}, 4165 ) 4166 4167 _UNSUPPORTED_DECODER_ERROR = ( 4168 "Specified decoder of {decoder_type} is not supported for pagination." 4169 "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead." 4170 "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`." 4171 ) 4172 4173 def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool: 4174 if isinstance(decoder, (JsonDecoder, XmlDecoder)): 4175 return True 4176 elif isinstance(decoder, CompositeRawDecoder): 4177 return self._is_supported_parser_for_pagination(decoder.parser) 4178 else: 4179 return False 4180 4181 def _is_supported_parser_for_pagination(self, parser: Parser) -> bool: 4182 if isinstance(parser, JsonParser): 4183 return True 4184 elif isinstance(parser, GzipParser): 4185 return isinstance(parser.inner_parser, JsonParser) 4186 else: 4187 return False 4188 4189 def create_http_api_budget( 4190 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 4191 ) -> HttpAPIBudget: 4192 policies = [ 4193 self._create_component_from_model(model=policy, config=config) 4194 for policy in model.policies 4195 ] 4196 4197 return HttpAPIBudget( 4198 policies=policies, 4199 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 4200 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 4201 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 4202 ) 4203 4204 def create_fixed_window_call_rate_policy( 4205 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 4206 ) -> FixedWindowCallRatePolicy: 4207 matchers = [ 4208 self._create_component_from_model(model=matcher, config=config) 4209 for matcher in model.matchers 4210 ] 4211 4212 # Set the initial reset timestamp to 10 days from now. 4213 # This value will be updated by the first request. 4214 return FixedWindowCallRatePolicy( 4215 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 4216 period=parse_duration(model.period), 4217 call_limit=model.call_limit, 4218 matchers=matchers, 4219 ) 4220 4221 def create_file_uploader( 4222 self, model: FileUploaderModel, config: Config, **kwargs: Any 4223 ) -> FileUploader: 4224 name = "File Uploader" 4225 requester = self._create_component_from_model( 4226 model=model.requester, 4227 config=config, 4228 name=name, 4229 **kwargs, 4230 ) 4231 download_target_extractor = self._create_component_from_model( 4232 model=model.download_target_extractor, 4233 config=config, 4234 name=name, 4235 **kwargs, 4236 ) 4237 emit_connector_builder_messages = self._emit_connector_builder_messages 4238 file_uploader = DefaultFileUploader( 4239 requester=requester, 4240 download_target_extractor=download_target_extractor, 4241 config=config, 4242 file_writer=NoopFileWriter() 4243 if emit_connector_builder_messages 4244 else LocalFileSystemFileWriter(), 4245 parameters=model.parameters or {}, 4246 filename_extractor=model.filename_extractor if model.filename_extractor else None, 4247 ) 4248 4249 return ( 4250 ConnectorBuilderFileUploader(file_uploader) 4251 if emit_connector_builder_messages 4252 else file_uploader 4253 ) 4254 4255 def create_moving_window_call_rate_policy( 4256 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 4257 ) -> MovingWindowCallRatePolicy: 4258 rates = [ 4259 self._create_component_from_model(model=rate, config=config) for rate in model.rates 4260 ] 4261 matchers = [ 4262 self._create_component_from_model(model=matcher, config=config) 4263 for matcher in model.matchers 4264 ] 4265 return MovingWindowCallRatePolicy( 4266 rates=rates, 4267 matchers=matchers, 4268 ) 4269 4270 def create_unlimited_call_rate_policy( 4271 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 4272 ) -> UnlimitedCallRatePolicy: 4273 matchers = [ 4274 self._create_component_from_model(model=matcher, config=config) 4275 for matcher in model.matchers 4276 ] 4277 4278 return UnlimitedCallRatePolicy( 4279 matchers=matchers, 4280 ) 4281 4282 def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate: 4283 interpolated_limit = InterpolatedString.create(str(model.limit), parameters={}) 4284 return Rate( 4285 limit=int(interpolated_limit.eval(config=config)), 4286 interval=parse_duration(model.interval), 4287 ) 4288 4289 def create_http_request_matcher( 4290 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 4291 ) -> HttpRequestRegexMatcher: 4292 return HttpRequestRegexMatcher( 4293 method=model.method, 4294 url_base=model.url_base, 4295 url_path_pattern=model.url_path_pattern, 4296 params=model.params, 4297 headers=model.headers, 4298 ) 4299 4300 def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None: 4301 self._api_budget = self.create_component( 4302 model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config 4303 ) 4304 4305 def create_grouping_partition_router( 4306 self, 4307 model: GroupingPartitionRouterModel, 4308 config: Config, 4309 *, 4310 stream_name: str, 4311 **kwargs: Any, 4312 ) -> GroupingPartitionRouter: 4313 underlying_router = self._create_component_from_model( 4314 model=model.underlying_partition_router, 4315 config=config, 4316 stream_name=stream_name, 4317 **kwargs, 4318 ) 4319 if model.group_size < 1: 4320 raise ValueError(f"Group size must be greater than 0, got {model.group_size}") 4321 4322 # Request options in underlying partition routers are not supported for GroupingPartitionRouter 4323 # because they are specific to individual partitions and cannot be aggregated or handled 4324 # when grouping, potentially leading to incorrect API calls. Any request customization 4325 # should be managed at the stream level through the requester's configuration. 4326 if isinstance(underlying_router, SubstreamPartitionRouter): 4327 if any( 4328 parent_config.request_option 4329 for parent_config in underlying_router.parent_stream_configs 4330 ): 4331 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4332 4333 if isinstance(underlying_router, ListPartitionRouter): 4334 if underlying_router.request_option: 4335 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4336 4337 return GroupingPartitionRouter( 4338 group_size=model.group_size, 4339 underlying_partition_router=underlying_router, 4340 deduplicate=model.deduplicate if model.deduplicate is not None else True, 4341 config=config, 4342 ) 4343 4344 def _ensure_query_properties_to_model( 4345 self, requester: Union[HttpRequesterModel, CustomRequesterModel] 4346 ) -> None: 4347 """ 4348 For some reason, it seems like CustomRequesterModel request_parameters stays as dictionaries which means that 4349 the other conditions relying on it being QueryPropertiesModel instead of a dict fail. Here, we migrate them to 4350 proper model. 4351 """ 4352 if not hasattr(requester, "request_parameters"): 4353 return 4354 4355 request_parameters = requester.request_parameters 4356 if request_parameters and isinstance(request_parameters, Dict): 4357 for request_parameter_key in request_parameters.keys(): 4358 request_parameter = request_parameters[request_parameter_key] 4359 if ( 4360 isinstance(request_parameter, Dict) 4361 and request_parameter.get("type") == "QueryProperties" 4362 ): 4363 request_parameters[request_parameter_key] = QueryPropertiesModel.parse_obj( 4364 request_parameter 4365 ) 4366 4367 def _get_catalog_defined_cursor_field( 4368 self, stream_name: str, allow_catalog_defined_cursor_field: bool 4369 ) -> Optional[CursorField]: 4370 if not allow_catalog_defined_cursor_field: 4371 return None 4372 4373 configured_stream = self._stream_name_to_configured_stream.get(stream_name) 4374 4375 # Depending on the operation is being performed, there may not be a configured stream yet. In this 4376 # case we return None which will then use the default cursor field defined on the cursor model. 4377 # We also treat cursor_field: [""] (list with empty string) as no cursor field, since this can 4378 # occur when the platform serializes "no cursor configured" streams incorrectly. 4379 if ( 4380 not configured_stream 4381 or not configured_stream.cursor_field 4382 or not configured_stream.cursor_field[0] 4383 ): 4384 return None 4385 elif len(configured_stream.cursor_field) > 1: 4386 raise ValueError( 4387 f"The `{stream_name}` stream does not support nested cursor_field. Please specify only a single cursor_field for the stream in the configured catalog." 4388 ) 4389 else: 4390 return CursorField( 4391 cursor_field_key=configured_stream.cursor_field[0], 4392 supports_catalog_defined_cursor_field=allow_catalog_defined_cursor_field, 4393 )
675 def __init__( 676 self, 677 limit_pages_fetched_per_slice: Optional[int] = None, 678 limit_slices_fetched: Optional[int] = None, 679 emit_connector_builder_messages: bool = False, 680 disable_retries: bool = False, 681 disable_cache: bool = False, 682 message_repository: Optional[MessageRepository] = None, 683 connector_state_manager: Optional[ConnectorStateManager] = None, 684 max_concurrent_async_job_count: Optional[int] = None, 685 configured_catalog: Optional[ConfiguredAirbyteCatalog] = None, 686 api_budget: Optional[APIBudget] = None, 687 ): 688 self._init_mappings() 689 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 690 self._limit_slices_fetched = limit_slices_fetched 691 self._emit_connector_builder_messages = emit_connector_builder_messages 692 self._disable_retries = disable_retries 693 self._disable_cache = disable_cache 694 self._message_repository = message_repository or InMemoryMessageRepository( 695 self._evaluate_log_level(emit_connector_builder_messages) 696 ) 697 self._stream_name_to_configured_stream = self._create_stream_name_to_configured_stream( 698 configured_catalog 699 ) 700 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 701 self._api_budget: Optional[Union[APIBudget]] = api_budget 702 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 703 # placeholder for deprecation warnings 704 self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = []
827 def create_component( 828 self, 829 model_type: Type[BaseModel], 830 component_definition: ComponentDefinition, 831 config: Config, 832 **kwargs: Any, 833 ) -> Any: 834 """ 835 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 836 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 837 creating declarative components from that model. 838 839 :param model_type: The type of declarative component that is being initialized 840 :param component_definition: The mapping that represents a declarative component 841 :param config: The connector config that is provided by the customer 842 :return: The declarative component to be used at runtime 843 """ 844 845 component_type = component_definition.get("type") 846 if component_definition.get("type") != model_type.__name__: 847 raise ValueError( 848 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 849 ) 850 851 declarative_component_model = model_type.parse_obj(component_definition) 852 853 if not isinstance(declarative_component_model, model_type): 854 raise ValueError( 855 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 856 ) 857 858 return self._create_component_from_model( 859 model=declarative_component_model, config=config, **kwargs 860 )
Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating creating declarative components from that model.
Parameters
- model_type: The type of declarative component that is being initialized
- component_definition: The mapping that represents a declarative component
- config: The connector config that is provided by the customer
Returns
The declarative component to be used at runtime
877 def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]: 878 """ 879 Returns the deprecation warnings that were collected during the creation of components. 880 """ 881 return self._collected_deprecation_logs
Returns the deprecation warnings that were collected during the creation of components.
898 def create_config_migration( 899 self, model: ConfigMigrationModel, config: Config 900 ) -> ConfigMigration: 901 transformations: List[ConfigTransformation] = [ 902 self._create_component_from_model(transformation, config) 903 for transformation in model.transformations 904 ] 905 906 return ConfigMigration( 907 description=model.description, 908 transformations=transformations, 909 )
911 def create_config_add_fields( 912 self, model: ConfigAddFieldsModel, config: Config, **kwargs: Any 913 ) -> ConfigAddFields: 914 fields = [self._create_component_from_model(field, config) for field in model.fields] 915 return ConfigAddFields( 916 fields=fields, 917 condition=model.condition or "", 918 )
967 @staticmethod 968 def create_added_field_definition( 969 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 970 ) -> AddedFieldDefinition: 971 interpolated_value = InterpolatedString.create( 972 model.value, parameters=model.parameters or {} 973 ) 974 return AddedFieldDefinition( 975 path=model.path, 976 value=interpolated_value, 977 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 978 parameters=model.parameters or {}, 979 )
981 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 982 added_field_definitions = [ 983 self._create_component_from_model( 984 model=added_field_definition_model, 985 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 986 added_field_definition_model.value_type 987 ), 988 config=config, 989 ) 990 for added_field_definition_model in model.fields 991 ] 992 return AddFields( 993 fields=added_field_definitions, 994 condition=model.condition or "", 995 parameters=model.parameters or {}, 996 )
1022 def create_dpath_flatten_fields( 1023 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 1024 ) -> DpathFlattenFields: 1025 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 1026 key_transformation = ( 1027 KeyTransformation( 1028 config=config, 1029 prefix=model.key_transformation.prefix, 1030 suffix=model.key_transformation.suffix, 1031 parameters=model.parameters or {}, 1032 ) 1033 if model.key_transformation is not None 1034 else None 1035 ) 1036 return DpathFlattenFields( 1037 config=config, 1038 field_path=model_field_path, 1039 delete_origin_value=model.delete_origin_value 1040 if model.delete_origin_value is not None 1041 else False, 1042 replace_record=model.replace_record if model.replace_record is not None else False, 1043 key_transformation=key_transformation, 1044 parameters=model.parameters or {}, 1045 )
1059 def create_api_key_authenticator( 1060 self, 1061 model: ApiKeyAuthenticatorModel, 1062 config: Config, 1063 token_provider: Optional[TokenProvider] = None, 1064 **kwargs: Any, 1065 ) -> ApiKeyAuthenticator: 1066 if model.inject_into is None and model.header is None: 1067 raise ValueError( 1068 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 1069 ) 1070 1071 if model.inject_into is not None and model.header is not None: 1072 raise ValueError( 1073 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 1074 ) 1075 1076 if token_provider is not None and model.api_token != "": 1077 raise ValueError( 1078 "If token_provider is set, api_token is ignored and has to be set to empty string." 1079 ) 1080 1081 request_option = ( 1082 self._create_component_from_model( 1083 model.inject_into, config, parameters=model.parameters or {} 1084 ) 1085 if model.inject_into 1086 else RequestOption( 1087 inject_into=RequestOptionType.header, 1088 field_name=model.header or "", 1089 parameters=model.parameters or {}, 1090 ) 1091 ) 1092 1093 return ApiKeyAuthenticator( 1094 token_provider=( 1095 token_provider 1096 if token_provider is not None 1097 else InterpolatedStringTokenProvider( 1098 api_token=model.api_token or "", 1099 config=config, 1100 parameters=model.parameters or {}, 1101 ) 1102 ), 1103 request_option=request_option, 1104 config=config, 1105 parameters=model.parameters or {}, 1106 )
1108 def create_legacy_to_per_partition_state_migration( 1109 self, 1110 model: LegacyToPerPartitionStateMigrationModel, 1111 config: Mapping[str, Any], 1112 declarative_stream: DeclarativeStreamModel, 1113 ) -> LegacyToPerPartitionStateMigration: 1114 retriever = declarative_stream.retriever 1115 if not isinstance(retriever, (SimpleRetrieverModel, AsyncRetrieverModel)): 1116 raise ValueError( 1117 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever or AsyncRetriever. Got {type(retriever)}" 1118 ) 1119 partition_router = retriever.partition_router 1120 if not isinstance( 1121 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 1122 ): 1123 raise ValueError( 1124 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 1125 ) 1126 if not hasattr(partition_router, "parent_stream_configs"): 1127 raise ValueError( 1128 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 1129 ) 1130 1131 if not hasattr(declarative_stream, "incremental_sync"): 1132 raise ValueError( 1133 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 1134 ) 1135 1136 return LegacyToPerPartitionStateMigration( 1137 partition_router, # type: ignore # was already checked above 1138 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 1139 config, 1140 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 1141 )
1143 def create_session_token_authenticator( 1144 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 1145 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 1146 decoder = ( 1147 self._create_component_from_model(model=model.decoder, config=config) 1148 if model.decoder 1149 else JsonDecoder(parameters={}) 1150 ) 1151 login_requester = self._create_component_from_model( 1152 model=model.login_requester, 1153 config=config, 1154 name=f"{name}_login_requester", 1155 decoder=decoder, 1156 ) 1157 token_provider = SessionTokenProvider( 1158 login_requester=login_requester, 1159 session_token_path=model.session_token_path, 1160 expiration_duration=parse_duration(model.expiration_duration) 1161 if model.expiration_duration 1162 else None, 1163 parameters=model.parameters or {}, 1164 message_repository=self._message_repository, 1165 decoder=decoder, 1166 ) 1167 if model.request_authentication.type == "Bearer": 1168 return ModelToComponentFactory.create_bearer_authenticator( 1169 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 1170 config, 1171 token_provider=token_provider, 1172 ) 1173 else: 1174 # Get the api_token template if specified, default to just the session token 1175 api_token_template = ( 1176 getattr(model.request_authentication, "api_token", None) or "{{ session_token }}" 1177 ) 1178 final_token_provider: TokenProvider = InterpolatedSessionTokenProvider( 1179 config=config, 1180 api_token=api_token_template, 1181 session_token_provider=token_provider, 1182 parameters=model.parameters or {}, 1183 ) 1184 return self.create_api_key_authenticator( 1185 ApiKeyAuthenticatorModel( 1186 type="ApiKeyAuthenticator", 1187 api_token="", 1188 inject_into=model.request_authentication.inject_into, 1189 ), # type: ignore # $parameters and headers default to None 1190 config=config, 1191 token_provider=final_token_provider, 1192 )
1194 @staticmethod 1195 def create_basic_http_authenticator( 1196 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 1197 ) -> BasicHttpAuthenticator: 1198 return BasicHttpAuthenticator( 1199 password=model.password or "", 1200 username=model.username, 1201 config=config, 1202 parameters=model.parameters or {}, 1203 )
1205 @staticmethod 1206 def create_bearer_authenticator( 1207 model: BearerAuthenticatorModel, 1208 config: Config, 1209 token_provider: Optional[TokenProvider] = None, 1210 **kwargs: Any, 1211 ) -> BearerAuthenticator: 1212 if token_provider is not None and model.api_token != "": 1213 raise ValueError( 1214 "If token_provider is set, api_token is ignored and has to be set to empty string." 1215 ) 1216 return BearerAuthenticator( 1217 token_provider=( 1218 token_provider 1219 if token_provider is not None 1220 else InterpolatedStringTokenProvider( 1221 api_token=model.api_token or "", 1222 config=config, 1223 parameters=model.parameters or {}, 1224 ) 1225 ), 1226 config=config, 1227 parameters=model.parameters or {}, 1228 )
1230 @staticmethod 1231 def create_dynamic_stream_check_config( 1232 model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any 1233 ) -> DynamicStreamCheckConfig: 1234 return DynamicStreamCheckConfig( 1235 dynamic_stream_name=model.dynamic_stream_name, 1236 stream_count=model.stream_count or 0, 1237 )
1239 def create_check_stream( 1240 self, model: CheckStreamModel, config: Config, **kwargs: Any 1241 ) -> CheckStream: 1242 if model.dynamic_streams_check_configs is None and model.stream_names is None: 1243 raise ValueError( 1244 "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream" 1245 ) 1246 1247 dynamic_streams_check_configs = ( 1248 [ 1249 self._create_component_from_model(model=dynamic_stream_check_config, config=config) 1250 for dynamic_stream_check_config in model.dynamic_streams_check_configs 1251 ] 1252 if model.dynamic_streams_check_configs 1253 else [] 1254 ) 1255 1256 return CheckStream( 1257 stream_names=model.stream_names or [], 1258 dynamic_streams_check_configs=dynamic_streams_check_configs, 1259 parameters={}, 1260 )
1262 @staticmethod 1263 def create_check_dynamic_stream( 1264 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 1265 ) -> CheckDynamicStream: 1266 assert model.use_check_availability is not None # for mypy 1267 1268 use_check_availability = model.use_check_availability 1269 1270 return CheckDynamicStream( 1271 stream_count=model.stream_count, 1272 use_check_availability=use_check_availability, 1273 parameters={}, 1274 )
1276 def create_composite_error_handler( 1277 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 1278 ) -> CompositeErrorHandler: 1279 error_handlers = [ 1280 self._create_component_from_model(model=error_handler_model, config=config) 1281 for error_handler_model in model.error_handlers 1282 ] 1283 return CompositeErrorHandler( 1284 error_handlers=error_handlers, parameters=model.parameters or {} 1285 )
1287 @staticmethod 1288 def create_concurrency_level( 1289 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 1290 ) -> ConcurrencyLevel: 1291 return ConcurrencyLevel( 1292 default_concurrency=model.default_concurrency, 1293 max_concurrency=model.max_concurrency, 1294 config=config, 1295 parameters={}, 1296 )
1298 @staticmethod 1299 def apply_stream_state_migrations( 1300 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 1301 ) -> MutableMapping[str, Any]: 1302 if stream_state_migrations: 1303 for state_migration in stream_state_migrations: 1304 if state_migration.should_migrate(stream_state): 1305 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 1306 stream_state = dict(state_migration.migrate(stream_state)) 1307 return stream_state
1309 def create_concurrent_cursor_from_datetime_based_cursor( 1310 self, 1311 model_type: Type[BaseModel], 1312 component_definition: ComponentDefinition, 1313 stream_name: str, 1314 stream_namespace: Optional[str], 1315 stream_state: MutableMapping[str, Any], 1316 config: Config, 1317 message_repository: Optional[MessageRepository] = None, 1318 runtime_lookback_window: Optional[datetime.timedelta] = None, 1319 **kwargs: Any, 1320 ) -> ConcurrentCursor: 1321 component_type = component_definition.get("type") 1322 if component_definition.get("type") != model_type.__name__: 1323 raise ValueError( 1324 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1325 ) 1326 1327 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1328 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1329 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1330 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1331 if "$parameters" not in component_definition and "parameters" in component_definition: 1332 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1333 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1334 1335 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1336 raise ValueError( 1337 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1338 ) 1339 1340 model_parameters = datetime_based_cursor_model.parameters or {} 1341 1342 cursor_field = self._get_catalog_defined_cursor_field( 1343 stream_name=stream_name, 1344 allow_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1345 or False, 1346 ) 1347 1348 if not cursor_field: 1349 interpolated_cursor_field = InterpolatedString.create( 1350 datetime_based_cursor_model.cursor_field, 1351 parameters=model_parameters, 1352 ) 1353 cursor_field = CursorField( 1354 cursor_field_key=interpolated_cursor_field.eval(config=config), 1355 supports_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1356 or False, 1357 ) 1358 1359 interpolated_partition_field_start = InterpolatedString.create( 1360 datetime_based_cursor_model.partition_field_start or "start_time", 1361 parameters=model_parameters, 1362 ) 1363 interpolated_partition_field_end = InterpolatedString.create( 1364 datetime_based_cursor_model.partition_field_end or "end_time", 1365 parameters=model_parameters, 1366 ) 1367 1368 slice_boundary_fields = ( 1369 interpolated_partition_field_start.eval(config=config), 1370 interpolated_partition_field_end.eval(config=config), 1371 ) 1372 1373 datetime_format = datetime_based_cursor_model.datetime_format 1374 1375 cursor_granularity = ( 1376 parse_duration(datetime_based_cursor_model.cursor_granularity) 1377 if datetime_based_cursor_model.cursor_granularity 1378 else None 1379 ) 1380 1381 lookback_window = None 1382 interpolated_lookback_window = ( 1383 InterpolatedString.create( 1384 datetime_based_cursor_model.lookback_window, 1385 parameters=model_parameters, 1386 ) 1387 if datetime_based_cursor_model.lookback_window 1388 else None 1389 ) 1390 if interpolated_lookback_window: 1391 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1392 if evaluated_lookback_window: 1393 lookback_window = parse_duration(evaluated_lookback_window) 1394 1395 connector_state_converter: DateTimeStreamStateConverter 1396 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1397 datetime_format=datetime_format, 1398 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1399 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1400 cursor_granularity=cursor_granularity, 1401 ) 1402 1403 # Adjusts the stream state by applying the runtime lookback window. 1404 # This is used to ensure correct state handling in case of failed partitions. 1405 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1406 if runtime_lookback_window and stream_state_value: 1407 new_stream_state = ( 1408 connector_state_converter.parse_timestamp(stream_state_value) 1409 - runtime_lookback_window 1410 ) 1411 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1412 new_stream_state 1413 ) 1414 1415 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1416 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1417 start_date_runtime_value = self.create_min_max_datetime( 1418 model=datetime_based_cursor_model.start_datetime, config=config 1419 ) 1420 else: 1421 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1422 1423 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1424 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1425 end_date_runtime_value = self.create_min_max_datetime( 1426 model=datetime_based_cursor_model.end_datetime, config=config 1427 ) 1428 else: 1429 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1430 1431 interpolated_start_date = MinMaxDatetime.create( 1432 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1433 parameters=datetime_based_cursor_model.parameters, 1434 ) 1435 interpolated_end_date = ( 1436 None 1437 if not end_date_runtime_value 1438 else MinMaxDatetime.create( 1439 end_date_runtime_value, datetime_based_cursor_model.parameters 1440 ) 1441 ) 1442 1443 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1444 if not interpolated_start_date.datetime_format: 1445 interpolated_start_date.datetime_format = datetime_format 1446 if interpolated_end_date and not interpolated_end_date.datetime_format: 1447 interpolated_end_date.datetime_format = datetime_format 1448 1449 start_date = interpolated_start_date.get_datetime(config=config) 1450 end_date_provider = ( 1451 partial(interpolated_end_date.get_datetime, config) 1452 if interpolated_end_date 1453 else connector_state_converter.get_end_provider() 1454 ) 1455 1456 if ( 1457 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1458 ) or ( 1459 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1460 ): 1461 raise ValueError( 1462 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1463 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1464 ) 1465 1466 # When step is not defined, default to a step size from the starting date to the present moment 1467 step_length = datetime.timedelta.max 1468 interpolated_step = ( 1469 InterpolatedString.create( 1470 datetime_based_cursor_model.step, 1471 parameters=model_parameters, 1472 ) 1473 if datetime_based_cursor_model.step 1474 else None 1475 ) 1476 if interpolated_step: 1477 evaluated_step = interpolated_step.eval(config) 1478 if evaluated_step: 1479 step_length = parse_duration(evaluated_step) 1480 1481 clamping_strategy: ClampingStrategy = NoClamping() 1482 if datetime_based_cursor_model.clamping: 1483 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1484 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1485 # object which we want to keep agnostic of being low-code 1486 target = InterpolatedString( 1487 string=datetime_based_cursor_model.clamping.target, 1488 parameters=model_parameters, 1489 ) 1490 evaluated_target = target.eval(config=config) 1491 match evaluated_target: 1492 case "DAY": 1493 clamping_strategy = DayClampingStrategy() 1494 end_date_provider = ClampingEndProvider( 1495 DayClampingStrategy(is_ceiling=False), 1496 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1497 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1498 ) 1499 case "WEEK": 1500 if ( 1501 not datetime_based_cursor_model.clamping.target_details 1502 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1503 ): 1504 raise ValueError( 1505 "Given WEEK clamping, weekday needs to be provided as target_details" 1506 ) 1507 weekday = self._assemble_weekday( 1508 datetime_based_cursor_model.clamping.target_details["weekday"] 1509 ) 1510 clamping_strategy = WeekClampingStrategy(weekday) 1511 end_date_provider = ClampingEndProvider( 1512 WeekClampingStrategy(weekday, is_ceiling=False), 1513 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1514 granularity=cursor_granularity or datetime.timedelta(days=1), 1515 ) 1516 case "MONTH": 1517 clamping_strategy = MonthClampingStrategy() 1518 end_date_provider = ClampingEndProvider( 1519 MonthClampingStrategy(is_ceiling=False), 1520 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1521 granularity=cursor_granularity or datetime.timedelta(days=1), 1522 ) 1523 case _: 1524 raise ValueError( 1525 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1526 ) 1527 1528 return ConcurrentCursor( 1529 stream_name=stream_name, 1530 stream_namespace=stream_namespace, 1531 stream_state=stream_state, 1532 message_repository=message_repository or self._message_repository, 1533 connector_state_manager=self._connector_state_manager, 1534 connector_state_converter=connector_state_converter, 1535 cursor_field=cursor_field, 1536 slice_boundary_fields=slice_boundary_fields, 1537 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1538 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1539 lookback_window=lookback_window, 1540 slice_range=step_length, 1541 cursor_granularity=cursor_granularity, 1542 clamping_strategy=clamping_strategy, 1543 )
1545 def create_concurrent_cursor_from_incrementing_count_cursor( 1546 self, 1547 model_type: Type[BaseModel], 1548 component_definition: ComponentDefinition, 1549 stream_name: str, 1550 stream_namespace: Optional[str], 1551 stream_state: MutableMapping[str, Any], 1552 config: Config, 1553 message_repository: Optional[MessageRepository] = None, 1554 **kwargs: Any, 1555 ) -> ConcurrentCursor: 1556 component_type = component_definition.get("type") 1557 if component_definition.get("type") != model_type.__name__: 1558 raise ValueError( 1559 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1560 ) 1561 1562 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1563 1564 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1565 raise ValueError( 1566 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1567 ) 1568 1569 start_value: Union[int, str, None] = incrementing_count_cursor_model.start_value 1570 # Pydantic Union type coercion can convert int 0 to string '0' depending on Union order. 1571 # We need to handle both int and str representations of numeric values. 1572 # Evaluate the InterpolatedString and convert to int for the ConcurrentCursor. 1573 if start_value is not None: 1574 interpolated_start_value = InterpolatedString.create( 1575 str(start_value), # Ensure we pass a string to InterpolatedString.create 1576 parameters=incrementing_count_cursor_model.parameters or {}, 1577 ) 1578 evaluated_start_value: int = int(interpolated_start_value.eval(config=config)) 1579 else: 1580 evaluated_start_value = 0 1581 1582 cursor_field = self._get_catalog_defined_cursor_field( 1583 stream_name=stream_name, 1584 allow_catalog_defined_cursor_field=incrementing_count_cursor_model.allow_catalog_defined_cursor_field 1585 or False, 1586 ) 1587 1588 if not cursor_field: 1589 interpolated_cursor_field = InterpolatedString.create( 1590 incrementing_count_cursor_model.cursor_field, 1591 parameters=incrementing_count_cursor_model.parameters or {}, 1592 ) 1593 cursor_field = CursorField( 1594 cursor_field_key=interpolated_cursor_field.eval(config=config), 1595 supports_catalog_defined_cursor_field=incrementing_count_cursor_model.allow_catalog_defined_cursor_field 1596 or False, 1597 ) 1598 1599 connector_state_converter = IncrementingCountStreamStateConverter( 1600 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1601 ) 1602 1603 return ConcurrentCursor( 1604 stream_name=stream_name, 1605 stream_namespace=stream_namespace, 1606 stream_state=stream_state, 1607 message_repository=message_repository or self._message_repository, 1608 connector_state_manager=self._connector_state_manager, 1609 connector_state_converter=connector_state_converter, 1610 cursor_field=cursor_field, 1611 slice_boundary_fields=None, 1612 start=evaluated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1613 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1614 )
1635 def create_concurrent_cursor_from_perpartition_cursor( 1636 self, 1637 state_manager: ConnectorStateManager, 1638 model_type: Type[BaseModel], 1639 component_definition: ComponentDefinition, 1640 stream_name: str, 1641 stream_namespace: Optional[str], 1642 config: Config, 1643 stream_state: MutableMapping[str, Any], 1644 partition_router: PartitionRouter, 1645 attempt_to_create_cursor_if_not_provided: bool = False, 1646 **kwargs: Any, 1647 ) -> ConcurrentPerPartitionCursor: 1648 component_type = component_definition.get("type") 1649 if component_definition.get("type") != model_type.__name__: 1650 raise ValueError( 1651 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1652 ) 1653 1654 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1655 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1656 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1657 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1658 if "$parameters" not in component_definition and "parameters" in component_definition: 1659 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1660 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1661 1662 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1663 raise ValueError( 1664 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1665 ) 1666 1667 cursor_field = self._get_catalog_defined_cursor_field( 1668 stream_name=stream_name, 1669 allow_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1670 or False, 1671 ) 1672 1673 if not cursor_field: 1674 interpolated_cursor_field = InterpolatedString.create( 1675 datetime_based_cursor_model.cursor_field, 1676 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1677 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1678 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1679 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1680 parameters=datetime_based_cursor_model.parameters or {}, 1681 ) 1682 cursor_field = CursorField( 1683 cursor_field_key=interpolated_cursor_field.eval(config=config), 1684 supports_catalog_defined_cursor_field=datetime_based_cursor_model.allow_catalog_defined_cursor_field 1685 or False, 1686 ) 1687 1688 datetime_format = datetime_based_cursor_model.datetime_format 1689 1690 cursor_granularity = ( 1691 parse_duration(datetime_based_cursor_model.cursor_granularity) 1692 if datetime_based_cursor_model.cursor_granularity 1693 else None 1694 ) 1695 1696 connector_state_converter: DateTimeStreamStateConverter 1697 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1698 datetime_format=datetime_format, 1699 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1700 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1701 cursor_granularity=cursor_granularity, 1702 ) 1703 1704 # Create the cursor factory 1705 cursor_factory = ConcurrentCursorFactory( 1706 partial( 1707 self.create_concurrent_cursor_from_datetime_based_cursor, 1708 state_manager=state_manager, 1709 model_type=model_type, 1710 component_definition=component_definition, 1711 stream_name=stream_name, 1712 stream_namespace=stream_namespace, 1713 config=config, 1714 message_repository=NoopMessageRepository(), 1715 ) 1716 ) 1717 1718 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state 1719 use_global_cursor = isinstance( 1720 partition_router, GroupingPartitionRouter 1721 ) or component_definition.get("global_substream_cursor", False) 1722 1723 # Return the concurrent cursor and state converter 1724 return ConcurrentPerPartitionCursor( 1725 cursor_factory=cursor_factory, 1726 partition_router=partition_router, 1727 stream_name=stream_name, 1728 stream_namespace=stream_namespace, 1729 stream_state=stream_state, 1730 message_repository=self._message_repository, # type: ignore 1731 connector_state_manager=state_manager, 1732 connector_state_converter=connector_state_converter, 1733 cursor_field=cursor_field, 1734 use_global_cursor=use_global_cursor, 1735 attempt_to_create_cursor_if_not_provided=attempt_to_create_cursor_if_not_provided, 1736 )
1738 @staticmethod 1739 def create_constant_backoff_strategy( 1740 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1741 ) -> ConstantBackoffStrategy: 1742 return ConstantBackoffStrategy( 1743 backoff_time_in_seconds=model.backoff_time_in_seconds, 1744 config=config, 1745 parameters=model.parameters or {}, 1746 )
1748 def create_cursor_pagination( 1749 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1750 ) -> CursorPaginationStrategy: 1751 if isinstance(decoder, PaginationDecoderDecorator): 1752 inner_decoder = decoder.decoder 1753 else: 1754 inner_decoder = decoder 1755 decoder = PaginationDecoderDecorator(decoder=decoder) 1756 1757 if self._is_supported_decoder_for_pagination(inner_decoder): 1758 decoder_to_use = decoder 1759 else: 1760 raise ValueError( 1761 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1762 ) 1763 1764 # Pydantic v1 Union type coercion can convert int to string depending on Union order. 1765 # If page_size is a string that represents an integer (not an interpolation), convert it back. 1766 page_size = model.page_size 1767 if isinstance(page_size, str) and page_size.isdigit(): 1768 page_size = int(page_size) 1769 1770 return CursorPaginationStrategy( 1771 cursor_value=model.cursor_value, 1772 decoder=decoder_to_use, 1773 page_size=page_size, 1774 stop_condition=model.stop_condition, 1775 config=config, 1776 parameters=model.parameters or {}, 1777 )
1779 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1780 """ 1781 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1782 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1783 :param model: The Pydantic model of the custom component being created 1784 :param config: The custom defined connector config 1785 :return: The declarative component built from the Pydantic model to be used at runtime 1786 """ 1787 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1788 component_fields = get_type_hints(custom_component_class) 1789 model_args = model.dict() 1790 model_args["config"] = config 1791 1792 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1793 # we defer to these arguments over the component's definition 1794 for key, arg in kwargs.items(): 1795 model_args[key] = arg 1796 1797 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1798 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1799 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1800 for model_field, model_value in model_args.items(): 1801 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1802 if ( 1803 isinstance(model_value, dict) 1804 and "type" not in model_value 1805 and model_field in component_fields 1806 ): 1807 derived_type = self._derive_component_type_from_type_hints( 1808 component_fields.get(model_field) 1809 ) 1810 if derived_type: 1811 model_value["type"] = derived_type 1812 1813 if self._is_component(model_value): 1814 model_args[model_field] = self._create_nested_component( 1815 model, 1816 model_field, 1817 model_value, 1818 config, 1819 **kwargs, 1820 ) 1821 elif isinstance(model_value, list): 1822 vals = [] 1823 for v in model_value: 1824 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1825 derived_type = self._derive_component_type_from_type_hints( 1826 component_fields.get(model_field) 1827 ) 1828 if derived_type: 1829 v["type"] = derived_type 1830 if self._is_component(v): 1831 vals.append( 1832 self._create_nested_component( 1833 model, 1834 model_field, 1835 v, 1836 config, 1837 **kwargs, 1838 ) 1839 ) 1840 else: 1841 vals.append(v) 1842 model_args[model_field] = vals 1843 1844 kwargs = { 1845 class_field: model_args[class_field] 1846 for class_field in component_fields.keys() 1847 if class_field in model_args 1848 } 1849 return custom_component_class(**kwargs)
Generically creates a custom component based on the model type and a class_name reference to the custom Python class being instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
Parameters
- model: The Pydantic model of the custom component being created
- config: The custom defined connector config
Returns
The declarative component built from the Pydantic model to be used at runtime
1984 def create_default_stream( 1985 self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any 1986 ) -> AbstractStream: 1987 primary_key = model.primary_key.__root__ if model.primary_key else None 1988 self._migrate_state(model, config) 1989 1990 partition_router = self._build_stream_slicer_from_partition_router( 1991 model.retriever, 1992 config, 1993 stream_name=model.name, 1994 **kwargs, 1995 ) 1996 concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config) 1997 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1998 cursor_model: DatetimeBasedCursorModel = model.incremental_sync 1999 2000 end_time_option = ( 2001 self._create_component_from_model( 2002 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 2003 ) 2004 if cursor_model.end_time_option 2005 else None 2006 ) 2007 start_time_option = ( 2008 self._create_component_from_model( 2009 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 2010 ) 2011 if cursor_model.start_time_option 2012 else None 2013 ) 2014 2015 datetime_request_options_provider = DatetimeBasedRequestOptionsProvider( 2016 start_time_option=start_time_option, 2017 end_time_option=end_time_option, 2018 partition_field_start=cursor_model.partition_field_start, 2019 partition_field_end=cursor_model.partition_field_end, 2020 config=config, 2021 parameters=model.parameters or {}, 2022 ) 2023 request_options_provider = ( 2024 datetime_request_options_provider 2025 if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor) 2026 else PerPartitionRequestOptionsProvider( 2027 partition_router, datetime_request_options_provider 2028 ) 2029 ) 2030 elif model.incremental_sync and isinstance( 2031 model.incremental_sync, IncrementingCountCursorModel 2032 ): 2033 if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor): 2034 raise ValueError( 2035 "PerPartition does not support per partition states because switching to global state is time based" 2036 ) 2037 2038 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 2039 2040 start_time_option = ( 2041 self._create_component_from_model( 2042 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2043 config, 2044 parameters=cursor_model.parameters or {}, 2045 ) 2046 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2047 else None 2048 ) 2049 2050 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 2051 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 2052 partition_field_start = "start" 2053 2054 request_options_provider = DatetimeBasedRequestOptionsProvider( 2055 start_time_option=start_time_option, 2056 partition_field_start=partition_field_start, 2057 config=config, 2058 parameters=model.parameters or {}, 2059 ) 2060 else: 2061 request_options_provider = None 2062 2063 transformations = [] 2064 if model.transformations: 2065 for transformation_model in model.transformations: 2066 transformations.append( 2067 self._create_component_from_model(model=transformation_model, config=config) 2068 ) 2069 file_uploader = None 2070 if model.file_uploader: 2071 file_uploader = self._create_component_from_model( 2072 model=model.file_uploader, config=config 2073 ) 2074 2075 stream_slicer: ConcurrentStreamSlicer = ( 2076 partition_router 2077 if isinstance(concurrent_cursor, FinalStateCursor) 2078 else concurrent_cursor 2079 ) 2080 2081 retriever = self._create_component_from_model( 2082 model=model.retriever, 2083 config=config, 2084 name=model.name, 2085 primary_key=primary_key, 2086 request_options_provider=request_options_provider, 2087 stream_slicer=stream_slicer, 2088 partition_router=partition_router, 2089 has_stop_condition_cursor=self._is_stop_condition_on_cursor(model), 2090 is_client_side_incremental_sync=self._is_client_side_filtering_enabled(model), 2091 cursor=concurrent_cursor, 2092 transformations=transformations, 2093 file_uploader=file_uploader, 2094 incremental_sync=model.incremental_sync, 2095 ) 2096 if isinstance(retriever, AsyncRetriever): 2097 stream_slicer = retriever.stream_slicer 2098 2099 schema_loader: SchemaLoader 2100 if model.schema_loader and isinstance(model.schema_loader, list): 2101 nested_schema_loaders = [ 2102 self._create_component_from_model(model=nested_schema_loader, config=config) 2103 for nested_schema_loader in model.schema_loader 2104 ] 2105 schema_loader = CompositeSchemaLoader( 2106 schema_loaders=nested_schema_loaders, parameters={} 2107 ) 2108 elif model.schema_loader: 2109 schema_loader = self._create_component_from_model( 2110 model=model.schema_loader, # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models 2111 config=config, 2112 ) 2113 else: 2114 options = model.parameters or {} 2115 if "name" not in options: 2116 options["name"] = model.name 2117 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 2118 schema_loader = CachingSchemaLoaderDecorator(schema_loader) 2119 2120 stream_name = model.name or "" 2121 return DefaultStream( 2122 partition_generator=StreamSlicerPartitionGenerator( 2123 DeclarativePartitionFactory( 2124 stream_name, 2125 schema_loader, 2126 retriever, 2127 self._message_repository, 2128 ), 2129 stream_slicer, 2130 slice_limit=self._limit_slices_fetched, 2131 ), 2132 name=stream_name, 2133 json_schema=schema_loader.get_json_schema, 2134 primary_key=get_primary_key_from_stream(primary_key), 2135 cursor_field=( 2136 concurrent_cursor.cursor_field 2137 if hasattr(concurrent_cursor, "cursor_field") 2138 else None 2139 ), 2140 logger=logging.getLogger(f"airbyte.{stream_name}"), 2141 cursor=concurrent_cursor, 2142 supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader), 2143 )
2285 def create_default_error_handler( 2286 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 2287 ) -> DefaultErrorHandler: 2288 backoff_strategies = [] 2289 if model.backoff_strategies: 2290 for backoff_strategy_model in model.backoff_strategies: 2291 backoff_strategies.append( 2292 self._create_component_from_model(model=backoff_strategy_model, config=config) 2293 ) 2294 2295 response_filters = [] 2296 if model.response_filters: 2297 for response_filter_model in model.response_filters: 2298 response_filters.append( 2299 self._create_component_from_model(model=response_filter_model, config=config) 2300 ) 2301 response_filters.append( 2302 HttpResponseFilter(config=config, parameters=model.parameters or {}) 2303 ) 2304 2305 return DefaultErrorHandler( 2306 backoff_strategies=backoff_strategies, 2307 max_retries=model.max_retries, 2308 response_filters=response_filters, 2309 config=config, 2310 parameters=model.parameters or {}, 2311 )
2313 def create_default_paginator( 2314 self, 2315 model: DefaultPaginatorModel, 2316 config: Config, 2317 *, 2318 url_base: str, 2319 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2320 decoder: Optional[Decoder] = None, 2321 cursor_used_for_stop_condition: Optional[Cursor] = None, 2322 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 2323 if decoder: 2324 if self._is_supported_decoder_for_pagination(decoder): 2325 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 2326 else: 2327 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 2328 else: 2329 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 2330 page_size_option = ( 2331 self._create_component_from_model(model=model.page_size_option, config=config) 2332 if model.page_size_option 2333 else None 2334 ) 2335 page_token_option = ( 2336 self._create_component_from_model(model=model.page_token_option, config=config) 2337 if model.page_token_option 2338 else None 2339 ) 2340 pagination_strategy = self._create_component_from_model( 2341 model=model.pagination_strategy, 2342 config=config, 2343 decoder=decoder_to_use, 2344 extractor_model=extractor_model, 2345 ) 2346 if cursor_used_for_stop_condition: 2347 pagination_strategy = StopConditionPaginationStrategyDecorator( 2348 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2349 ) 2350 paginator = DefaultPaginator( 2351 decoder=decoder_to_use, 2352 page_size_option=page_size_option, 2353 page_token_option=page_token_option, 2354 pagination_strategy=pagination_strategy, 2355 url_base=url_base, 2356 config=config, 2357 parameters=model.parameters or {}, 2358 ) 2359 if self._limit_pages_fetched_per_slice: 2360 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2361 return paginator
2363 def create_dpath_extractor( 2364 self, 2365 model: DpathExtractorModel, 2366 config: Config, 2367 decoder: Optional[Decoder] = None, 2368 **kwargs: Any, 2369 ) -> DpathExtractor: 2370 if decoder: 2371 decoder_to_use = decoder 2372 else: 2373 decoder_to_use = JsonDecoder(parameters={}) 2374 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2375 return DpathExtractor( 2376 decoder=decoder_to_use, 2377 field_path=model_field_path, 2378 config=config, 2379 parameters=model.parameters or {}, 2380 )
2401 def create_http_requester( 2402 self, 2403 model: HttpRequesterModel, 2404 config: Config, 2405 decoder: Decoder = JsonDecoder(parameters={}), 2406 query_properties_key: Optional[str] = None, 2407 use_cache: Optional[bool] = None, 2408 *, 2409 name: str, 2410 ) -> HttpRequester: 2411 authenticator = ( 2412 self._create_component_from_model( 2413 model=model.authenticator, 2414 config=config, 2415 url_base=model.url or model.url_base, 2416 name=name, 2417 decoder=decoder, 2418 ) 2419 if model.authenticator 2420 else None 2421 ) 2422 error_handler = ( 2423 self._create_component_from_model(model=model.error_handler, config=config) 2424 if model.error_handler 2425 else DefaultErrorHandler( 2426 backoff_strategies=[], 2427 response_filters=[], 2428 config=config, 2429 parameters=model.parameters or {}, 2430 ) 2431 ) 2432 2433 api_budget = self._api_budget 2434 2435 request_options_provider = InterpolatedRequestOptionsProvider( 2436 request_body=model.request_body, 2437 request_body_data=model.request_body_data, 2438 request_body_json=model.request_body_json, 2439 request_headers=model.request_headers, 2440 request_parameters=model.request_parameters, # type: ignore # QueryProperties have been removed in `create_simple_retriever` 2441 query_properties_key=query_properties_key, 2442 config=config, 2443 parameters=model.parameters or {}, 2444 ) 2445 2446 assert model.use_cache is not None # for mypy 2447 assert model.http_method is not None # for mypy 2448 2449 should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache 2450 2451 return HttpRequester( 2452 name=name, 2453 url=model.url, 2454 url_base=model.url_base, 2455 path=model.path, 2456 authenticator=authenticator, 2457 error_handler=error_handler, 2458 api_budget=api_budget, 2459 http_method=HttpMethod[model.http_method.value], 2460 request_options_provider=request_options_provider, 2461 config=config, 2462 disable_retries=self._disable_retries, 2463 parameters=model.parameters or {}, 2464 message_repository=self._message_repository, 2465 use_cache=should_use_cache, 2466 decoder=decoder, 2467 stream_response=decoder.is_stream_response() if decoder else False, 2468 )
2470 @staticmethod 2471 def create_http_response_filter( 2472 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2473 ) -> HttpResponseFilter: 2474 if model.action: 2475 action = ResponseAction(model.action.value) 2476 else: 2477 action = None 2478 2479 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2480 2481 http_codes = ( 2482 set(model.http_codes) if model.http_codes else set() 2483 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2484 2485 return HttpResponseFilter( 2486 action=action, 2487 failure_type=failure_type, 2488 error_message=model.error_message or "", 2489 error_message_contains=model.error_message_contains or "", 2490 http_codes=http_codes, 2491 predicate=model.predicate or "", 2492 config=config, 2493 parameters=model.parameters or {}, 2494 )
2502 def create_complex_field_type( 2503 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2504 ) -> ComplexFieldType: 2505 items = ( 2506 self._create_component_from_model(model=model.items, config=config) 2507 if isinstance(model.items, ComplexFieldTypeModel) 2508 else model.items 2509 ) 2510 2511 return ComplexFieldType(field_type=model.field_type, items=items)
2513 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2514 target_type = ( 2515 self._create_component_from_model(model=model.target_type, config=config) 2516 if isinstance(model.target_type, ComplexFieldTypeModel) 2517 else model.target_type 2518 ) 2519 2520 return TypesMap( 2521 target_type=target_type, 2522 current_type=model.current_type, 2523 condition=model.condition if model.condition is not None else "True", 2524 )
2526 def create_schema_type_identifier( 2527 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2528 ) -> SchemaTypeIdentifier: 2529 types_mapping = [] 2530 if model.types_mapping: 2531 types_mapping.extend( 2532 [ 2533 self._create_component_from_model(types_map, config=config) 2534 for types_map in model.types_mapping 2535 ] 2536 ) 2537 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2538 [x for x in model.schema_pointer] if model.schema_pointer else [] 2539 ) 2540 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2541 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2542 [x for x in model.type_pointer] if model.type_pointer else None 2543 ) 2544 2545 return SchemaTypeIdentifier( 2546 schema_pointer=model_schema_pointer, 2547 key_pointer=model_key_pointer, 2548 type_pointer=model_type_pointer, 2549 types_mapping=types_mapping, 2550 parameters=model.parameters or {}, 2551 )
2553 def create_dynamic_schema_loader( 2554 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2555 ) -> DynamicSchemaLoader: 2556 schema_transformations = [] 2557 if model.schema_transformations: 2558 for transformation_model in model.schema_transformations: 2559 schema_transformations.append( 2560 self._create_component_from_model(model=transformation_model, config=config) 2561 ) 2562 name = "dynamic_properties" 2563 retriever = self._create_component_from_model( 2564 model=model.retriever, 2565 config=config, 2566 name=name, 2567 primary_key=None, 2568 partition_router=self._build_stream_slicer_from_partition_router( 2569 model.retriever, config 2570 ), 2571 transformations=[], 2572 use_cache=True, 2573 log_formatter=( 2574 lambda response: format_http_message( 2575 response, 2576 f"Schema loader '{name}' request", 2577 f"Request performed in order to extract schema.", 2578 name, 2579 is_auxiliary=True, 2580 ) 2581 ), 2582 ) 2583 schema_type_identifier = self._create_component_from_model( 2584 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2585 ) 2586 schema_filter = ( 2587 self._create_component_from_model( 2588 model.schema_filter, config=config, parameters=model.parameters or {} 2589 ) 2590 if model.schema_filter is not None 2591 else None 2592 ) 2593 2594 return DynamicSchemaLoader( 2595 retriever=retriever, 2596 config=config, 2597 schema_transformations=schema_transformations, 2598 schema_filter=schema_filter, 2599 schema_type_identifier=schema_type_identifier, 2600 parameters=model.parameters or {}, 2601 )
2621 def create_gzip_decoder( 2622 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2623 ) -> Decoder: 2624 _compressed_response_types = { 2625 "gzip", 2626 "x-gzip", 2627 "gzip, deflate", 2628 "x-gzip, deflate", 2629 "application/zip", 2630 "application/gzip", 2631 "application/x-gzip", 2632 "application/x-zip-compressed", 2633 } 2634 2635 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2636 2637 if self._emit_connector_builder_messages: 2638 # This is very surprising but if the response is not streamed, 2639 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2640 # which uses urllib3 directly and does not uncompress the data. 2641 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2642 2643 return CompositeRawDecoder.by_headers( 2644 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2645 stream_response=True, 2646 fallback_parser=gzip_parser.inner_parser, 2647 )
2696 def create_jwt_authenticator( 2697 self, model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2698 ) -> JwtAuthenticator: 2699 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2700 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2701 request_option = ( 2702 self._create_component_from_model(model.request_option, config) 2703 if model.request_option 2704 else None 2705 ) 2706 return JwtAuthenticator( 2707 config=config, 2708 parameters=model.parameters or {}, 2709 algorithm=JwtAlgorithm(model.algorithm.value), 2710 secret_key=model.secret_key, 2711 base64_encode_secret_key=model.base64_encode_secret_key, 2712 token_duration=model.token_duration, 2713 header_prefix=model.header_prefix, 2714 kid=jwt_headers.kid, 2715 typ=jwt_headers.typ, 2716 cty=jwt_headers.cty, 2717 iss=jwt_payload.iss, 2718 sub=jwt_payload.sub, 2719 aud=jwt_payload.aud, 2720 additional_jwt_headers=model.additional_jwt_headers, 2721 additional_jwt_payload=model.additional_jwt_payload, 2722 passphrase=model.passphrase, 2723 request_option=request_option, 2724 )
2726 def create_list_partition_router( 2727 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2728 ) -> ListPartitionRouter: 2729 request_option = ( 2730 self._create_component_from_model(model.request_option, config) 2731 if model.request_option 2732 else None 2733 ) 2734 return ListPartitionRouter( 2735 cursor_field=model.cursor_field, 2736 request_option=request_option, 2737 values=model.values, 2738 config=config, 2739 parameters=model.parameters or {}, 2740 )
2742 @staticmethod 2743 def create_min_max_datetime( 2744 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2745 ) -> MinMaxDatetime: 2746 return MinMaxDatetime( 2747 datetime=model.datetime, 2748 datetime_format=model.datetime_format or "", 2749 max_datetime=model.max_datetime or "", 2750 min_datetime=model.min_datetime or "", 2751 parameters=model.parameters or {}, 2752 )
2764 def create_oauth_authenticator( 2765 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2766 ) -> DeclarativeOauth2Authenticator: 2767 profile_assertion = ( 2768 self._create_component_from_model(model.profile_assertion, config=config) 2769 if model.profile_assertion 2770 else None 2771 ) 2772 2773 refresh_token_error_status_codes, refresh_token_error_key, refresh_token_error_values = ( 2774 self._get_refresh_token_error_information(model) 2775 ) 2776 if model.refresh_token_updater: 2777 # ignore type error because fixing it would have a lot of dependencies, revisit later 2778 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2779 config, 2780 InterpolatedString.create( 2781 model.token_refresh_endpoint, # type: ignore 2782 parameters=model.parameters or {}, 2783 ).eval(config), 2784 access_token_name=InterpolatedString.create( 2785 model.access_token_name or "access_token", parameters=model.parameters or {} 2786 ).eval(config), 2787 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2788 expires_in_name=InterpolatedString.create( 2789 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2790 ).eval(config), 2791 client_id_name=InterpolatedString.create( 2792 model.client_id_name or "client_id", parameters=model.parameters or {} 2793 ).eval(config), 2794 client_id=InterpolatedString.create( 2795 model.client_id, parameters=model.parameters or {} 2796 ).eval(config) 2797 if model.client_id 2798 else model.client_id, 2799 client_secret_name=InterpolatedString.create( 2800 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2801 ).eval(config), 2802 client_secret=InterpolatedString.create( 2803 model.client_secret, parameters=model.parameters or {} 2804 ).eval(config) 2805 if model.client_secret 2806 else model.client_secret, 2807 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2808 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2809 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2810 grant_type_name=InterpolatedString.create( 2811 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2812 ).eval(config), 2813 grant_type=InterpolatedString.create( 2814 model.grant_type or "refresh_token", parameters=model.parameters or {} 2815 ).eval(config), 2816 refresh_request_body=InterpolatedMapping( 2817 model.refresh_request_body or {}, parameters=model.parameters or {} 2818 ).eval(config), 2819 refresh_request_headers=InterpolatedMapping( 2820 model.refresh_request_headers or {}, parameters=model.parameters or {} 2821 ).eval(config), 2822 scopes=model.scopes, 2823 token_expiry_date_format=model.token_expiry_date_format, 2824 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2825 message_repository=self._message_repository, 2826 refresh_token_error_status_codes=refresh_token_error_status_codes, 2827 refresh_token_error_key=refresh_token_error_key, 2828 refresh_token_error_values=refresh_token_error_values, 2829 ) 2830 # ignore type error because fixing it would have a lot of dependencies, revisit later 2831 return DeclarativeOauth2Authenticator( # type: ignore 2832 access_token_name=model.access_token_name or "access_token", 2833 access_token_value=model.access_token_value, 2834 client_id_name=model.client_id_name or "client_id", 2835 client_id=model.client_id, 2836 client_secret_name=model.client_secret_name or "client_secret", 2837 client_secret=model.client_secret, 2838 expires_in_name=model.expires_in_name or "expires_in", 2839 grant_type_name=model.grant_type_name or "grant_type", 2840 grant_type=model.grant_type or "refresh_token", 2841 refresh_request_body=model.refresh_request_body, 2842 refresh_request_headers=model.refresh_request_headers, 2843 refresh_token_name=model.refresh_token_name or "refresh_token", 2844 refresh_token=model.refresh_token, 2845 scopes=model.scopes, 2846 token_expiry_date=model.token_expiry_date, 2847 token_expiry_date_format=model.token_expiry_date_format, 2848 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2849 token_refresh_endpoint=model.token_refresh_endpoint, 2850 config=config, 2851 parameters=model.parameters or {}, 2852 message_repository=self._message_repository, 2853 profile_assertion=profile_assertion, 2854 use_profile_assertion=model.use_profile_assertion, 2855 refresh_token_error_status_codes=refresh_token_error_status_codes, 2856 refresh_token_error_key=refresh_token_error_key, 2857 refresh_token_error_values=refresh_token_error_values, 2858 )
2908 def create_offset_increment( 2909 self, 2910 model: OffsetIncrementModel, 2911 config: Config, 2912 decoder: Decoder, 2913 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2914 **kwargs: Any, 2915 ) -> OffsetIncrement: 2916 if isinstance(decoder, PaginationDecoderDecorator): 2917 inner_decoder = decoder.decoder 2918 else: 2919 inner_decoder = decoder 2920 decoder = PaginationDecoderDecorator(decoder=decoder) 2921 2922 if self._is_supported_decoder_for_pagination(inner_decoder): 2923 decoder_to_use = decoder 2924 else: 2925 raise ValueError( 2926 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2927 ) 2928 2929 # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever) 2930 # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the 2931 # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing 2932 # behavior by having two separate extractors with identical behavior since they use the same extractor model. 2933 # When we have more time to investigate we can look into reusing the same component. 2934 extractor = ( 2935 self._create_component_from_model( 2936 model=extractor_model, config=config, decoder=decoder_to_use 2937 ) 2938 if extractor_model 2939 else None 2940 ) 2941 2942 # Pydantic v1 Union type coercion can convert int to string depending on Union order. 2943 # If page_size is a string that represents an integer (not an interpolation), convert it back. 2944 page_size = model.page_size 2945 if isinstance(page_size, str) and page_size.isdigit(): 2946 page_size = int(page_size) 2947 2948 return OffsetIncrement( 2949 page_size=page_size, 2950 config=config, 2951 decoder=decoder_to_use, 2952 extractor=extractor, 2953 inject_on_first_request=model.inject_on_first_request or False, 2954 parameters=model.parameters or {}, 2955 )
2957 @staticmethod 2958 def create_page_increment( 2959 model: PageIncrementModel, config: Config, **kwargs: Any 2960 ) -> PageIncrement: 2961 # Pydantic v1 Union type coercion can convert int to string depending on Union order. 2962 # If page_size is a string that represents an integer (not an interpolation), convert it back. 2963 page_size = model.page_size 2964 if isinstance(page_size, str) and page_size.isdigit(): 2965 page_size = int(page_size) 2966 2967 return PageIncrement( 2968 page_size=page_size, 2969 config=config, 2970 start_from_page=model.start_from_page or 0, 2971 inject_on_first_request=model.inject_on_first_request or False, 2972 parameters=model.parameters or {}, 2973 )
2975 def create_parent_stream_config( 2976 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 2977 ) -> ParentStreamConfig: 2978 declarative_stream = self._create_component_from_model( 2979 model.stream, 2980 config=config, 2981 is_parent=True, 2982 **kwargs, 2983 ) 2984 request_option = ( 2985 self._create_component_from_model(model.request_option, config=config) 2986 if model.request_option 2987 else None 2988 ) 2989 2990 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2991 raise ValueError( 2992 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2993 ) 2994 2995 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2996 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2997 ) 2998 2999 return ParentStreamConfig( 3000 parent_key=model.parent_key, 3001 request_option=request_option, 3002 stream=declarative_stream, 3003 partition_field=model.partition_field, 3004 config=config, 3005 incremental_dependency=model.incremental_dependency or False, 3006 parameters=model.parameters or {}, 3007 extra_fields=model.extra_fields, 3008 lazy_read_pointer=model_lazy_read_pointer, 3009 )
3011 def create_properties_from_endpoint( 3012 self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any 3013 ) -> PropertiesFromEndpoint: 3014 retriever = self._create_component_from_model( 3015 model=model.retriever, 3016 config=config, 3017 name="dynamic_properties", 3018 primary_key=None, 3019 stream_slicer=None, 3020 transformations=[], 3021 use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different 3022 ) 3023 return PropertiesFromEndpoint( 3024 property_field_path=model.property_field_path, 3025 retriever=retriever, 3026 config=config, 3027 parameters=model.parameters or {}, 3028 )
3030 def create_property_chunking( 3031 self, model: PropertyChunkingModel, config: Config, **kwargs: Any 3032 ) -> PropertyChunking: 3033 record_merge_strategy = ( 3034 self._create_component_from_model( 3035 model=model.record_merge_strategy, config=config, **kwargs 3036 ) 3037 if model.record_merge_strategy 3038 else None 3039 ) 3040 3041 property_limit_type: PropertyLimitType 3042 match model.property_limit_type: 3043 case PropertyLimitTypeModel.property_count: 3044 property_limit_type = PropertyLimitType.property_count 3045 case PropertyLimitTypeModel.characters: 3046 property_limit_type = PropertyLimitType.characters 3047 case _: 3048 raise ValueError(f"Invalid PropertyLimitType {property_limit_type}") 3049 3050 return PropertyChunking( 3051 property_limit_type=property_limit_type, 3052 property_limit=model.property_limit, 3053 record_merge_strategy=record_merge_strategy, 3054 config=config, 3055 parameters=model.parameters or {}, 3056 )
3058 def create_query_properties( 3059 self, model: QueryPropertiesModel, config: Config, *, stream_name: str, **kwargs: Any 3060 ) -> QueryProperties: 3061 if isinstance(model.property_list, list): 3062 property_list = model.property_list 3063 else: 3064 property_list = self._create_component_from_model( 3065 model=model.property_list, config=config, **kwargs 3066 ) 3067 3068 property_chunking = ( 3069 self._create_component_from_model( 3070 model=model.property_chunking, config=config, **kwargs 3071 ) 3072 if model.property_chunking 3073 else None 3074 ) 3075 3076 property_selector = ( 3077 self._create_component_from_model( 3078 model=model.property_selector, config=config, stream_name=stream_name, **kwargs 3079 ) 3080 if model.property_selector 3081 else None 3082 ) 3083 3084 return QueryProperties( 3085 property_list=property_list, 3086 always_include_properties=model.always_include_properties, 3087 property_chunking=property_chunking, 3088 property_selector=property_selector, 3089 config=config, 3090 parameters=model.parameters or {}, 3091 )
3093 def create_json_schema_property_selector( 3094 self, 3095 model: JsonSchemaPropertySelectorModel, 3096 config: Config, 3097 *, 3098 stream_name: str, 3099 **kwargs: Any, 3100 ) -> JsonSchemaPropertySelector: 3101 configured_stream = self._stream_name_to_configured_stream.get(stream_name) 3102 3103 transformations = [] 3104 if model.transformations: 3105 for transformation_model in model.transformations: 3106 transformations.append( 3107 self._create_component_from_model(model=transformation_model, config=config) 3108 ) 3109 3110 return JsonSchemaPropertySelector( 3111 configured_stream=configured_stream, 3112 properties_transformations=transformations, 3113 config=config, 3114 parameters=model.parameters or {}, 3115 )
3129 @staticmethod 3130 def create_request_option( 3131 model: RequestOptionModel, config: Config, **kwargs: Any 3132 ) -> RequestOption: 3133 inject_into = RequestOptionType(model.inject_into.value) 3134 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 3135 [ 3136 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 3137 for segment in model.field_path 3138 ] 3139 if model.field_path 3140 else None 3141 ) 3142 field_name = ( 3143 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 3144 if model.field_name 3145 else None 3146 ) 3147 return RequestOption( 3148 field_name=field_name, 3149 field_path=field_path, 3150 inject_into=inject_into, 3151 parameters=kwargs.get("parameters", {}), 3152 )
3154 def create_record_selector( 3155 self, 3156 model: RecordSelectorModel, 3157 config: Config, 3158 *, 3159 name: str, 3160 transformations: List[RecordTransformation] | None = None, 3161 decoder: Decoder | None = None, 3162 client_side_incremental_sync_cursor: Optional[Cursor] = None, 3163 file_uploader: Optional[DefaultFileUploader] = None, 3164 **kwargs: Any, 3165 ) -> RecordSelector: 3166 extractor = self._create_component_from_model( 3167 model=model.extractor, decoder=decoder, config=config 3168 ) 3169 record_filter = ( 3170 self._create_component_from_model(model.record_filter, config=config) 3171 if model.record_filter 3172 else None 3173 ) 3174 3175 transform_before_filtering = ( 3176 False if model.transform_before_filtering is None else model.transform_before_filtering 3177 ) 3178 if client_side_incremental_sync_cursor: 3179 record_filter = ClientSideIncrementalRecordFilterDecorator( 3180 config=config, 3181 parameters=model.parameters, 3182 condition=model.record_filter.condition 3183 if (model.record_filter and hasattr(model.record_filter, "condition")) 3184 else None, 3185 cursor=client_side_incremental_sync_cursor, 3186 ) 3187 transform_before_filtering = ( 3188 True 3189 if model.transform_before_filtering is None 3190 else model.transform_before_filtering 3191 ) 3192 3193 if model.schema_normalization is None: 3194 # default to no schema normalization if not set 3195 model.schema_normalization = SchemaNormalizationModel.None_ 3196 3197 schema_normalization = ( 3198 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 3199 if isinstance(model.schema_normalization, SchemaNormalizationModel) 3200 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 3201 ) 3202 3203 return RecordSelector( 3204 extractor=extractor, 3205 name=name, 3206 config=config, 3207 record_filter=record_filter, 3208 transformations=transformations or [], 3209 file_uploader=file_uploader, 3210 schema_normalization=schema_normalization, 3211 parameters=model.parameters or {}, 3212 transform_before_filtering=transform_before_filtering, 3213 )
3223 def create_selective_authenticator( 3224 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 3225 ) -> DeclarativeAuthenticator: 3226 authenticators = { 3227 name: self._create_component_from_model(model=auth, config=config) 3228 for name, auth in model.authenticators.items() 3229 } 3230 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 3231 return SelectiveAuthenticator( # type: ignore[abstract] 3232 config=config, 3233 authenticators=authenticators, 3234 authenticator_selection_path=model.authenticator_selection_path, 3235 **kwargs, 3236 )
3238 @staticmethod 3239 def create_legacy_session_token_authenticator( 3240 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 3241 ) -> LegacySessionTokenAuthenticator: 3242 return LegacySessionTokenAuthenticator( 3243 api_url=url_base, 3244 header=model.header, 3245 login_url=model.login_url, 3246 password=model.password or "", 3247 session_token=model.session_token or "", 3248 session_token_response_key=model.session_token_response_key or "", 3249 username=model.username or "", 3250 validate_session_url=model.validate_session_url, 3251 config=config, 3252 parameters=model.parameters or {}, 3253 )
3255 def create_simple_retriever( 3256 self, 3257 model: SimpleRetrieverModel, 3258 config: Config, 3259 *, 3260 name: str, 3261 primary_key: Optional[Union[str, List[str], List[List[str]]]], 3262 request_options_provider: Optional[RequestOptionsProvider] = None, 3263 cursor: Optional[Cursor] = None, 3264 has_stop_condition_cursor: bool = False, 3265 is_client_side_incremental_sync: bool = False, 3266 transformations: List[RecordTransformation], 3267 file_uploader: Optional[DefaultFileUploader] = None, 3268 incremental_sync: Optional[ 3269 Union[IncrementingCountCursorModel, DatetimeBasedCursorModel] 3270 ] = None, 3271 use_cache: Optional[bool] = None, 3272 log_formatter: Optional[Callable[[Response], Any]] = None, 3273 partition_router: Optional[PartitionRouter] = None, 3274 **kwargs: Any, 3275 ) -> SimpleRetriever: 3276 def _get_url(req: Requester) -> str: 3277 """ 3278 Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever. 3279 This is needed because the URL is not set until the requester is created. 3280 """ 3281 3282 _url: str = ( 3283 model.requester.url 3284 if hasattr(model.requester, "url") and model.requester.url is not None 3285 else req.get_url(stream_state=None, stream_slice=None, next_page_token=None) 3286 ) 3287 _url_base: str = ( 3288 model.requester.url_base 3289 if hasattr(model.requester, "url_base") and model.requester.url_base is not None 3290 else req.get_url_base(stream_state=None, stream_slice=None, next_page_token=None) 3291 ) 3292 3293 return _url or _url_base 3294 3295 if cursor is None: 3296 cursor = FinalStateCursor(name, None, self._message_repository) 3297 3298 decoder = ( 3299 self._create_component_from_model(model=model.decoder, config=config) 3300 if model.decoder 3301 else JsonDecoder(parameters={}) 3302 ) 3303 record_selector = self._create_component_from_model( 3304 model=model.record_selector, 3305 name=name, 3306 config=config, 3307 decoder=decoder, 3308 transformations=transformations, 3309 client_side_incremental_sync_cursor=cursor if is_client_side_incremental_sync else None, 3310 file_uploader=file_uploader, 3311 ) 3312 3313 query_properties: Optional[QueryProperties] = None 3314 query_properties_key: Optional[str] = None 3315 self._ensure_query_properties_to_model(model.requester) 3316 if self._has_query_properties_in_request_parameters(model.requester): 3317 # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple 3318 # places instead of default to request_parameters which isn't clearly documented 3319 if ( 3320 hasattr(model.requester, "fetch_properties_from_endpoint") 3321 and model.requester.fetch_properties_from_endpoint 3322 ): 3323 raise ValueError( 3324 f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters" 3325 ) 3326 3327 query_properties_definitions = [] 3328 for key, request_parameter in model.requester.request_parameters.items(): # type: ignore # request_parameters is already validated to be a Mapping using _has_query_properties_in_request_parameters() 3329 if isinstance(request_parameter, QueryPropertiesModel): 3330 query_properties_key = key 3331 query_properties_definitions.append(request_parameter) 3332 3333 if len(query_properties_definitions) > 1: 3334 raise ValueError( 3335 f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages" 3336 ) 3337 3338 if len(query_properties_definitions) == 1: 3339 query_properties = self._create_component_from_model( 3340 model=query_properties_definitions[0], stream_name=name, config=config 3341 ) 3342 3343 # Removes QueryProperties components from the interpolated mappings because it has been designed 3344 # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly 3345 # instead of through jinja interpolation 3346 if hasattr(model.requester, "request_parameters") and isinstance( 3347 model.requester.request_parameters, Mapping 3348 ): 3349 model.requester.request_parameters = self._remove_query_properties( 3350 model.requester.request_parameters 3351 ) 3352 elif ( 3353 hasattr(model.requester, "fetch_properties_from_endpoint") 3354 and model.requester.fetch_properties_from_endpoint 3355 ): 3356 # todo: Deprecate this condition once dependent connectors migrate to query_properties 3357 query_properties_definition = QueryPropertiesModel( 3358 type="QueryProperties", 3359 property_list=model.requester.fetch_properties_from_endpoint, 3360 always_include_properties=None, 3361 property_chunking=None, 3362 ) # type: ignore # $parameters has a default value 3363 3364 query_properties = self.create_query_properties( 3365 model=query_properties_definition, 3366 stream_name=name, 3367 config=config, 3368 ) 3369 elif hasattr(model.requester, "query_properties") and model.requester.query_properties: 3370 query_properties = self.create_query_properties( 3371 model=model.requester.query_properties, 3372 stream_name=name, 3373 config=config, 3374 ) 3375 3376 requester = self._create_component_from_model( 3377 model=model.requester, 3378 decoder=decoder, 3379 name=name, 3380 query_properties_key=query_properties_key, 3381 use_cache=use_cache, 3382 config=config, 3383 ) 3384 3385 if not request_options_provider: 3386 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 3387 if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance( 3388 partition_router, PartitionRouter 3389 ): 3390 request_options_provider = partition_router 3391 3392 paginator = ( 3393 self._create_component_from_model( 3394 model=model.paginator, 3395 config=config, 3396 url_base=_get_url(requester), 3397 extractor_model=model.record_selector.extractor, 3398 decoder=decoder, 3399 cursor_used_for_stop_condition=cursor if has_stop_condition_cursor else None, 3400 ) 3401 if model.paginator 3402 else NoPagination(parameters={}) 3403 ) 3404 3405 ignore_stream_slicer_parameters_on_paginated_requests = ( 3406 model.ignore_stream_slicer_parameters_on_paginated_requests or False 3407 ) 3408 3409 if ( 3410 model.partition_router 3411 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 3412 and not bool(self._connector_state_manager.get_stream_state(name, None)) 3413 and any( 3414 parent_stream_config.lazy_read_pointer 3415 for parent_stream_config in model.partition_router.parent_stream_configs 3416 ) 3417 ): 3418 if incremental_sync: 3419 if incremental_sync.type != "DatetimeBasedCursor": 3420 raise ValueError( 3421 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 3422 ) 3423 3424 elif incremental_sync.step or incremental_sync.cursor_granularity: 3425 raise ValueError( 3426 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 3427 ) 3428 3429 if model.decoder and model.decoder.type != "JsonDecoder": 3430 raise ValueError( 3431 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 3432 ) 3433 3434 return LazySimpleRetriever( 3435 name=name, 3436 paginator=paginator, 3437 primary_key=primary_key, 3438 requester=requester, 3439 record_selector=record_selector, 3440 stream_slicer=_NO_STREAM_SLICING, 3441 request_option_provider=request_options_provider, 3442 config=config, 3443 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3444 parameters=model.parameters or {}, 3445 ) 3446 3447 if ( 3448 model.record_selector.record_filter 3449 and model.pagination_reset 3450 and model.pagination_reset.limits 3451 ): 3452 raise ValueError("PaginationResetLimits are not supported while having record filter.") 3453 3454 return SimpleRetriever( 3455 name=name, 3456 paginator=paginator, 3457 primary_key=primary_key, 3458 requester=requester, 3459 record_selector=record_selector, 3460 stream_slicer=_NO_STREAM_SLICING, 3461 request_option_provider=request_options_provider, 3462 config=config, 3463 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3464 additional_query_properties=query_properties, 3465 log_formatter=self._get_log_formatter(log_formatter, name), 3466 pagination_tracker_factory=self._create_pagination_tracker_factory( 3467 model.pagination_reset, cursor 3468 ), 3469 parameters=model.parameters or {}, 3470 )
3548 def create_state_delegating_stream( 3549 self, 3550 model: StateDelegatingStreamModel, 3551 config: Config, 3552 has_parent_state: Optional[bool] = None, 3553 **kwargs: Any, 3554 ) -> DefaultStream: 3555 if ( 3556 model.full_refresh_stream.name != model.name 3557 or model.name != model.incremental_stream.name 3558 ): 3559 raise ValueError( 3560 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 3561 ) 3562 3563 stream_model = self._get_state_delegating_stream_model( 3564 False if has_parent_state is None else has_parent_state, model 3565 ) 3566 3567 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel
3608 def create_async_retriever( 3609 self, 3610 model: AsyncRetrieverModel, 3611 config: Config, 3612 *, 3613 name: str, 3614 primary_key: Optional[ 3615 Union[str, List[str], List[List[str]]] 3616 ], # this seems to be needed to match create_simple_retriever 3617 stream_slicer: Optional[StreamSlicer], 3618 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3619 transformations: List[RecordTransformation], 3620 **kwargs: Any, 3621 ) -> AsyncRetriever: 3622 if model.download_target_requester and not model.download_target_extractor: 3623 raise ValueError( 3624 f"`download_target_extractor` required if using a `download_target_requester`" 3625 ) 3626 3627 def _get_download_retriever( 3628 requester: Requester, extractor: RecordExtractor, _decoder: Decoder 3629 ) -> SimpleRetriever: 3630 # We create a record selector for the download retriever 3631 # with no schema normalization and no transformations, neither record filter 3632 # as all this occurs in the record_selector of the AsyncRetriever 3633 record_selector = RecordSelector( 3634 extractor=extractor, 3635 name=name, 3636 record_filter=None, 3637 transformations=[], 3638 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 3639 config=config, 3640 parameters={}, 3641 ) 3642 paginator = ( 3643 self._create_component_from_model( 3644 model=model.download_paginator, 3645 decoder=_decoder, 3646 config=config, 3647 url_base="", 3648 ) 3649 if model.download_paginator 3650 else NoPagination(parameters={}) 3651 ) 3652 3653 return SimpleRetriever( 3654 requester=requester, 3655 record_selector=record_selector, 3656 primary_key=None, 3657 name=name, 3658 paginator=paginator, 3659 config=config, 3660 parameters={}, 3661 log_formatter=self._get_log_formatter(None, name), 3662 ) 3663 3664 def _get_job_timeout() -> datetime.timedelta: 3665 user_defined_timeout: Optional[int] = ( 3666 int( 3667 InterpolatedString.create( 3668 str(model.polling_job_timeout), 3669 parameters={}, 3670 ).eval(config) 3671 ) 3672 if model.polling_job_timeout 3673 else None 3674 ) 3675 3676 # check for user defined timeout during the test read or 15 minutes 3677 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 3678 # default value for non-connector builder is 60 minutes. 3679 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 3680 3681 return ( 3682 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 3683 ) 3684 3685 decoder = ( 3686 self._create_component_from_model(model=model.decoder, config=config) 3687 if model.decoder 3688 else JsonDecoder(parameters={}) 3689 ) 3690 record_selector = self._create_component_from_model( 3691 model=model.record_selector, 3692 config=config, 3693 decoder=decoder, 3694 name=name, 3695 transformations=transformations, 3696 client_side_incremental_sync=client_side_incremental_sync, 3697 ) 3698 3699 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 3700 if self._should_limit_slices_fetched(): 3701 stream_slicer = cast( 3702 StreamSlicer, 3703 StreamSlicerTestReadDecorator( 3704 wrapped_slicer=stream_slicer, 3705 maximum_number_of_slices=self._limit_slices_fetched or 5, 3706 ), 3707 ) 3708 3709 creation_requester = self._create_component_from_model( 3710 model=model.creation_requester, 3711 decoder=decoder, 3712 config=config, 3713 name=f"job creation - {name}", 3714 ) 3715 polling_requester = self._create_component_from_model( 3716 model=model.polling_requester, 3717 decoder=decoder, 3718 config=config, 3719 name=f"job polling - {name}", 3720 ) 3721 job_download_components_name = f"job download - {name}" 3722 download_decoder = ( 3723 self._create_component_from_model(model=model.download_decoder, config=config) 3724 if model.download_decoder 3725 else JsonDecoder(parameters={}) 3726 ) 3727 download_extractor = ( 3728 self._create_component_from_model( 3729 model=model.download_extractor, 3730 config=config, 3731 decoder=download_decoder, 3732 parameters=model.parameters, 3733 ) 3734 if model.download_extractor 3735 else DpathExtractor( 3736 [], 3737 config=config, 3738 decoder=download_decoder, 3739 parameters=model.parameters or {}, 3740 ) 3741 ) 3742 download_requester = self._create_component_from_model( 3743 model=model.download_requester, 3744 decoder=download_decoder, 3745 config=config, 3746 name=job_download_components_name, 3747 ) 3748 download_retriever = _get_download_retriever( 3749 download_requester, download_extractor, download_decoder 3750 ) 3751 abort_requester = ( 3752 self._create_component_from_model( 3753 model=model.abort_requester, 3754 decoder=decoder, 3755 config=config, 3756 name=f"job abort - {name}", 3757 ) 3758 if model.abort_requester 3759 else None 3760 ) 3761 delete_requester = ( 3762 self._create_component_from_model( 3763 model=model.delete_requester, 3764 decoder=decoder, 3765 config=config, 3766 name=f"job delete - {name}", 3767 ) 3768 if model.delete_requester 3769 else None 3770 ) 3771 download_target_requester = ( 3772 self._create_component_from_model( 3773 model=model.download_target_requester, 3774 decoder=decoder, 3775 config=config, 3776 name=f"job extract_url - {name}", 3777 ) 3778 if model.download_target_requester 3779 else None 3780 ) 3781 status_extractor = self._create_component_from_model( 3782 model=model.status_extractor, decoder=decoder, config=config, name=name 3783 ) 3784 download_target_extractor = ( 3785 self._create_component_from_model( 3786 model=model.download_target_extractor, 3787 decoder=decoder, 3788 config=config, 3789 name=name, 3790 ) 3791 if model.download_target_extractor 3792 else None 3793 ) 3794 3795 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3796 creation_requester=creation_requester, 3797 polling_requester=polling_requester, 3798 download_retriever=download_retriever, 3799 download_target_requester=download_target_requester, 3800 abort_requester=abort_requester, 3801 delete_requester=delete_requester, 3802 status_extractor=status_extractor, 3803 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3804 download_target_extractor=download_target_extractor, 3805 job_timeout=_get_job_timeout(), 3806 ) 3807 3808 async_job_partition_router = AsyncJobPartitionRouter( 3809 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3810 job_repository, 3811 stream_slices, 3812 self._job_tracker, 3813 self._message_repository, 3814 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3815 has_bulk_parent=False, 3816 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3817 # `None` == default retry is set to 3 attempts, under the hood. 3818 job_max_retry=1 if self._emit_connector_builder_messages else None, 3819 ), 3820 stream_slicer=stream_slicer, 3821 config=config, 3822 parameters=model.parameters or {}, 3823 ) 3824 3825 return AsyncRetriever( 3826 record_selector=record_selector, 3827 stream_slicer=async_job_partition_router, 3828 config=config, 3829 parameters=model.parameters or {}, 3830 )
3832 def create_spec(self, model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3833 config_migrations = [ 3834 self._create_component_from_model(migration, config) 3835 for migration in ( 3836 model.config_normalization_rules.config_migrations 3837 if ( 3838 model.config_normalization_rules 3839 and model.config_normalization_rules.config_migrations 3840 ) 3841 else [] 3842 ) 3843 ] 3844 config_transformations = [ 3845 self._create_component_from_model(transformation, config) 3846 for transformation in ( 3847 model.config_normalization_rules.transformations 3848 if ( 3849 model.config_normalization_rules 3850 and model.config_normalization_rules.transformations 3851 ) 3852 else [] 3853 ) 3854 ] 3855 config_validations = [ 3856 self._create_component_from_model(validation, config) 3857 for validation in ( 3858 model.config_normalization_rules.validations 3859 if ( 3860 model.config_normalization_rules 3861 and model.config_normalization_rules.validations 3862 ) 3863 else [] 3864 ) 3865 ] 3866 3867 return Spec( 3868 connection_specification=model.connection_specification, 3869 documentation_url=model.documentation_url, 3870 advanced_auth=model.advanced_auth, 3871 parameters={}, 3872 config_migrations=config_migrations, 3873 config_transformations=config_transformations, 3874 config_validations=config_validations, 3875 )
3877 def create_substream_partition_router( 3878 self, 3879 model: SubstreamPartitionRouterModel, 3880 config: Config, 3881 *, 3882 stream_name: str, 3883 **kwargs: Any, 3884 ) -> SubstreamPartitionRouter: 3885 parent_stream_configs = [] 3886 if model.parent_stream_configs: 3887 parent_stream_configs.extend( 3888 [ 3889 self.create_parent_stream_config_with_substream_wrapper( 3890 model=parent_stream_config, config=config, stream_name=stream_name, **kwargs 3891 ) 3892 for parent_stream_config in model.parent_stream_configs 3893 ] 3894 ) 3895 3896 return SubstreamPartitionRouter( 3897 parent_stream_configs=parent_stream_configs, 3898 parameters=model.parameters or {}, 3899 config=config, 3900 )
3902 def create_parent_stream_config_with_substream_wrapper( 3903 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 3904 ) -> Any: 3905 # getting the parent state 3906 child_state = self._connector_state_manager.get_stream_state(stream_name, None) 3907 3908 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3909 has_parent_state = bool( 3910 self._connector_state_manager.get_stream_state(stream_name, None) 3911 if model.incremental_dependency 3912 else False 3913 ) 3914 connector_state_manager = self._instantiate_parent_stream_state_manager( 3915 child_state, config, model, has_parent_state 3916 ) 3917 3918 substream_factory = ModelToComponentFactory( 3919 connector_state_manager=connector_state_manager, 3920 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3921 limit_slices_fetched=self._limit_slices_fetched, 3922 emit_connector_builder_messages=self._emit_connector_builder_messages, 3923 disable_retries=self._disable_retries, 3924 disable_cache=self._disable_cache, 3925 message_repository=StateFilteringMessageRepository( 3926 LogAppenderMessageRepositoryDecorator( 3927 { 3928 "airbyte_cdk": {"stream": {"is_substream": True}}, 3929 "http": {"is_auxiliary": True}, 3930 }, 3931 self._message_repository, 3932 self._evaluate_log_level(self._emit_connector_builder_messages), 3933 ), 3934 ), 3935 api_budget=self._api_budget, 3936 ) 3937 3938 return substream_factory.create_parent_stream_config( 3939 model=model, config=config, stream_name=stream_name, **kwargs 3940 )
4003 @staticmethod 4004 def create_wait_time_from_header( 4005 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 4006 ) -> WaitTimeFromHeaderBackoffStrategy: 4007 return WaitTimeFromHeaderBackoffStrategy( 4008 header=model.header, 4009 parameters=model.parameters or {}, 4010 config=config, 4011 regex=model.regex, 4012 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 4013 if model.max_waiting_time_in_seconds is not None 4014 else None, 4015 )
4017 @staticmethod 4018 def create_wait_until_time_from_header( 4019 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 4020 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 4021 return WaitUntilTimeFromHeaderBackoffStrategy( 4022 header=model.header, 4023 parameters=model.parameters or {}, 4024 config=config, 4025 min_wait=model.min_wait, 4026 regex=model.regex, 4027 )
4035 @staticmethod 4036 def create_components_mapping_definition( 4037 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 4038 ) -> ComponentMappingDefinition: 4039 interpolated_value = InterpolatedString.create( 4040 model.value, parameters=model.parameters or {} 4041 ) 4042 field_path = [ 4043 InterpolatedString.create(path, parameters=model.parameters or {}) 4044 for path in model.field_path 4045 ] 4046 return ComponentMappingDefinition( 4047 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 4048 value=interpolated_value, 4049 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 4050 create_or_update=model.create_or_update, 4051 condition=model.condition, 4052 parameters=model.parameters or {}, 4053 )
4055 def create_http_components_resolver( 4056 self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None 4057 ) -> Any: 4058 retriever = self._create_component_from_model( 4059 model=model.retriever, 4060 config=config, 4061 name=f"{stream_name if stream_name else '__http_components_resolver'}", 4062 primary_key=None, 4063 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 4064 transformations=[], 4065 ) 4066 4067 components_mapping = [] 4068 for component_mapping_definition_model in model.components_mapping: 4069 if component_mapping_definition_model.condition: 4070 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 4071 components_mapping.append( 4072 self._create_component_from_model( 4073 model=component_mapping_definition_model, 4074 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4075 component_mapping_definition_model.value_type 4076 ), 4077 config=config, 4078 ) 4079 ) 4080 4081 return HttpComponentsResolver( 4082 retriever=retriever, 4083 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 4084 config=config, 4085 components_mapping=components_mapping, 4086 parameters=model.parameters or {}, 4087 )
4089 @staticmethod 4090 def create_stream_config( 4091 model: StreamConfigModel, config: Config, **kwargs: Any 4092 ) -> StreamConfig: 4093 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 4094 [x for x in model.configs_pointer] if model.configs_pointer else [] 4095 ) 4096 4097 return StreamConfig( 4098 configs_pointer=model_configs_pointer, 4099 default_values=model.default_values, 4100 parameters=model.parameters or {}, 4101 )
4103 def create_config_components_resolver( 4104 self, 4105 model: ConfigComponentsResolverModel, 4106 config: Config, 4107 ) -> Any: 4108 model_stream_configs = ( 4109 model.stream_config if isinstance(model.stream_config, list) else [model.stream_config] 4110 ) 4111 4112 stream_configs = [ 4113 self._create_component_from_model( 4114 stream_config, config=config, parameters=model.parameters or {} 4115 ) 4116 for stream_config in model_stream_configs 4117 ] 4118 4119 components_mapping = [ 4120 self._create_component_from_model( 4121 model=components_mapping_definition_model, 4122 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4123 components_mapping_definition_model.value_type 4124 ), 4125 config=config, 4126 parameters=model.parameters, 4127 ) 4128 for components_mapping_definition_model in model.components_mapping 4129 ] 4130 4131 return ConfigComponentsResolver( 4132 stream_configs=stream_configs, 4133 config=config, 4134 components_mapping=components_mapping, 4135 parameters=model.parameters or {}, 4136 )
4138 def create_parametrized_components_resolver( 4139 self, 4140 model: ParametrizedComponentsResolverModel, 4141 config: Config, 4142 ) -> ParametrizedComponentsResolver: 4143 stream_parameters = StreamParametersDefinition( 4144 list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream 4145 ) 4146 4147 components_mapping = [] 4148 for components_mapping_definition_model in model.components_mapping: 4149 if components_mapping_definition_model.condition: 4150 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 4151 components_mapping.append( 4152 self._create_component_from_model( 4153 model=components_mapping_definition_model, 4154 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 4155 components_mapping_definition_model.value_type 4156 ), 4157 config=config, 4158 ) 4159 ) 4160 return ParametrizedComponentsResolver( 4161 stream_parameters=stream_parameters, 4162 config=config, 4163 components_mapping=components_mapping, 4164 parameters=model.parameters or {}, 4165 )
4189 def create_http_api_budget( 4190 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 4191 ) -> HttpAPIBudget: 4192 policies = [ 4193 self._create_component_from_model(model=policy, config=config) 4194 for policy in model.policies 4195 ] 4196 4197 return HttpAPIBudget( 4198 policies=policies, 4199 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 4200 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 4201 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 4202 )
4204 def create_fixed_window_call_rate_policy( 4205 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 4206 ) -> FixedWindowCallRatePolicy: 4207 matchers = [ 4208 self._create_component_from_model(model=matcher, config=config) 4209 for matcher in model.matchers 4210 ] 4211 4212 # Set the initial reset timestamp to 10 days from now. 4213 # This value will be updated by the first request. 4214 return FixedWindowCallRatePolicy( 4215 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 4216 period=parse_duration(model.period), 4217 call_limit=model.call_limit, 4218 matchers=matchers, 4219 )
4221 def create_file_uploader( 4222 self, model: FileUploaderModel, config: Config, **kwargs: Any 4223 ) -> FileUploader: 4224 name = "File Uploader" 4225 requester = self._create_component_from_model( 4226 model=model.requester, 4227 config=config, 4228 name=name, 4229 **kwargs, 4230 ) 4231 download_target_extractor = self._create_component_from_model( 4232 model=model.download_target_extractor, 4233 config=config, 4234 name=name, 4235 **kwargs, 4236 ) 4237 emit_connector_builder_messages = self._emit_connector_builder_messages 4238 file_uploader = DefaultFileUploader( 4239 requester=requester, 4240 download_target_extractor=download_target_extractor, 4241 config=config, 4242 file_writer=NoopFileWriter() 4243 if emit_connector_builder_messages 4244 else LocalFileSystemFileWriter(), 4245 parameters=model.parameters or {}, 4246 filename_extractor=model.filename_extractor if model.filename_extractor else None, 4247 ) 4248 4249 return ( 4250 ConnectorBuilderFileUploader(file_uploader) 4251 if emit_connector_builder_messages 4252 else file_uploader 4253 )
4255 def create_moving_window_call_rate_policy( 4256 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 4257 ) -> MovingWindowCallRatePolicy: 4258 rates = [ 4259 self._create_component_from_model(model=rate, config=config) for rate in model.rates 4260 ] 4261 matchers = [ 4262 self._create_component_from_model(model=matcher, config=config) 4263 for matcher in model.matchers 4264 ] 4265 return MovingWindowCallRatePolicy( 4266 rates=rates, 4267 matchers=matchers, 4268 )
4270 def create_unlimited_call_rate_policy( 4271 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 4272 ) -> UnlimitedCallRatePolicy: 4273 matchers = [ 4274 self._create_component_from_model(model=matcher, config=config) 4275 for matcher in model.matchers 4276 ] 4277 4278 return UnlimitedCallRatePolicy( 4279 matchers=matchers, 4280 )
4289 def create_http_request_matcher( 4290 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 4291 ) -> HttpRequestRegexMatcher: 4292 return HttpRequestRegexMatcher( 4293 method=model.method, 4294 url_base=model.url_base, 4295 url_path_pattern=model.url_path_pattern, 4296 params=model.params, 4297 headers=model.headers, 4298 )
4305 def create_grouping_partition_router( 4306 self, 4307 model: GroupingPartitionRouterModel, 4308 config: Config, 4309 *, 4310 stream_name: str, 4311 **kwargs: Any, 4312 ) -> GroupingPartitionRouter: 4313 underlying_router = self._create_component_from_model( 4314 model=model.underlying_partition_router, 4315 config=config, 4316 stream_name=stream_name, 4317 **kwargs, 4318 ) 4319 if model.group_size < 1: 4320 raise ValueError(f"Group size must be greater than 0, got {model.group_size}") 4321 4322 # Request options in underlying partition routers are not supported for GroupingPartitionRouter 4323 # because they are specific to individual partitions and cannot be aggregated or handled 4324 # when grouping, potentially leading to incorrect API calls. Any request customization 4325 # should be managed at the stream level through the requester's configuration. 4326 if isinstance(underlying_router, SubstreamPartitionRouter): 4327 if any( 4328 parent_config.request_option 4329 for parent_config in underlying_router.parent_stream_configs 4330 ): 4331 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4332 4333 if isinstance(underlying_router, ListPartitionRouter): 4334 if underlying_router.request_option: 4335 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4336 4337 return GroupingPartitionRouter( 4338 group_size=model.group_size, 4339 underlying_partition_router=underlying_router, 4340 deduplicate=model.deduplicate if model.deduplicate is not None else True, 4341 config=config, 4342 )