Skip to content

Commit

Permalink
✨ Source Instagram: Migrate Instagram sources Media, User, UserLiftim…
Browse files Browse the repository at this point in the history
…elnsights and Stories to low-code (#39504)

Co-authored-by: Natik Gadzhi <[email protected]>
  • Loading branch information
2 people authored and xiaohansong committed Jul 2, 2024
1 parent f2ad05d commit a4e0a9e
Show file tree
Hide file tree
Showing 34 changed files with 4,470 additions and 671 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ test_strictness_level: high
acceptance_tests:
spec:
tests:
- spec_path: "integration_tests/spec.json"
- spec_path: "source_instagram/spec.json"
connection:
tests:
- config_path: "secrets/config.json"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
{"stream":"user_lifetime_insights","data":{"page_id":"144706962067225","breakdown":"city","business_account_id":"17841408147298757","metric":"follower_demographics"},"emitted_at":1704378481116}
{"stream":"user_lifetime_insights","data":{"page_id":"144706962067225","breakdown":"country","business_account_id":"17841408147298757","metric":"follower_demographics"},"emitted_at":1704378481343}
{"stream":"user_lifetime_insights","data":{"page_id":"144706962067225","breakdown":"age,gender","business_account_id":"17841408147298757","metric":"follower_demographics"},"emitted_at":1704378481574}
{"stream": "Api", "data": {"id": "144706962067225", "account": {"business_account_id": "17841408147298757", "page_id": "144706962067225"}}, "emitted_at": 1718280847529}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 6acf6b55-4f1e-4fca-944e-1a3caef8aba8
dockerImageTag: 3.0.11
dockerImageTag: 3.0.12
dockerRepository: airbyte/source-instagram
githubIssueLabel: source-instagram
icon: instagram.svg
Expand Down Expand Up @@ -48,7 +48,7 @@ data:
documentationUrl: https://docs.airbyte.com/integrations/sources/instagram
tags:
- language:python
- cdk:python
- cdk:low-code
ab_internal:
sl: 200
ql: 400
Expand Down
330 changes: 312 additions & 18 deletions airbyte-integrations/connectors/source-instagram/poetry.lock

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
version = "3.0.11"
version = "3.0.12"
name = "source-instagram"
description = "Source implementation for Instagram."
authors = [ "Airbyte <[email protected]>",]
Expand All @@ -18,7 +18,7 @@ include = "source_instagram"
[tool.poetry.dependencies]
python = "^3.9,<3.12"
facebook-business = "==19.0.3"
airbyte-cdk = "0.80.0"
airbyte-cdk = "^1"
cached-property = "==1.5.2"

[tool.poetry.scripts]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Dict, MutableMapping, Optional

import requests
from airbyte_cdk.connector_builder.connector_builder_handler import resolve_manifest
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
from airbyte_cdk.sources.declarative.types import Config
from source_instagram import SourceInstagram

from .common import remove_params_from_url

GRAPH_URL = resolve_manifest(source=SourceInstagram()).record.data["manifest"]["definitions"]["base_requester"]["url_base"]


def get_http_response(path: str, request_params: Dict, config: Config) -> Optional[MutableMapping[str, Any]]:
url = f"{GRAPH_URL}/{path}"
token = config["access_token"]
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
params = {
**request_params,
}
response = requests.get(url, params=params, headers=headers)
if response.status_code == 200:
return response.json()


@dataclass
class InstagramClearUrlTransformation(RecordTransformation):
def transform(self, record: MutableMapping[str, Any], config: Optional[Config] = None, **kwargs) -> MutableMapping[str, Any]:
"""
Transforms the given record by removing specific query parameters from certain URLs to ensure consistency
and prevent test failures due to dynamic parameters.
Specifically, this function removes the `_nc_rid` parameter from the `media_url` and the `ccb` parameter
from the `profile_picture_url`. The `_nc_rid` parameter is generated anew each time and the `ccb` parameter
can change its value, which can cause tests to fail when checking for identity.
Removing these parameters does not invalidate the URLs. The links remain correct and functional, allowing
users to view the video or see the picture.
"""
if record.get("media_url"):
record["media_url"] = remove_params_from_url(record["media_url"], params=["_nc_rid"])
if record.get("profile_picture_url"):
record["profile_picture_url"] = remove_params_from_url(record["profile_picture_url"], params=["ccb"])

return record


@dataclass
class InstagramMediaChildrenTransformation(RecordTransformation):
def transform(self, record: MutableMapping[str, Any], config: Optional[Config] = None, **kwargs) -> MutableMapping[str, Any]:
"""
Transforms the 'children' field in the record, which is an array of Media IDs with a common Media parent.
This transformation fetches detailed information for each Media ID from the /media endpoint and updates the 'children' array
with this information.
Example input:
"children": {
"data": [
{
"id": "7608776690540"
},
{
"id": "2896800415362"
}
]
}
After fetching information for each Media ID:
children:
[
{
"id": "7608776690540",
"ig_id": "2521545917836833225",
"media_type": "IMAGE",
"media_url": "https://fake_url?_nc_cat=...",
// more fields
},
{
"id": "2896800415362",
"ig_id": "2521545917736276706",
"media_type": "IMAGE",
"media_url": "https://fake_url?_nc_cat=...",
// more fields
}
}
"""
children = record.get("children")
children_fetched = []
fields = "id,ig_id,media_type,media_url,owner,permalink,shortcode,thumbnail_url,timestamp,username"
if children:
children_ids = [child.get("id") for child in children.get("data")]
for children_id in children_ids:
media_data = get_http_response(children_id, {"fields": fields}, config=config)
media_data = InstagramClearUrlTransformation().transform(media_data)
if media_data.get("timestamp"):
dt = datetime.strptime(media_data["timestamp"], "%Y-%m-%dT%H:%M:%S%z")
formatted_str = dt.strftime("%Y-%m-%dT%H:%M:%S%z")
formatted_str_with_colon = formatted_str[:-2] + ":" + formatted_str[-2:]
media_data["timestamp"] = formatted_str_with_colon
children_fetched.append(media_data)

record["children"] = children_fetched
return record


@dataclass
class InstagramBreakDownResultsTransformation(RecordTransformation):
"""
The transformation flattens a nested array of breakdown results located at total_value.breakdowns[0].results into a single object
(dictionary). In this transformation, each key-value pair in the resulting object represents a dimension and its corresponding value.
Example input:
{
"total_value": {
"breakdowns": [
{
"dimension_keys": [
"city"
],
"results": [
{
"dimension_values": [
"London, England"
],
"value": 263
},
{
"dimension_values": [
"Sydney, New South Wales"
],
"value": 467
}
]
}
]
},
"id": "id/insights/follower_demographics/lifetime"
}
Example output:
{
"value": {
"London, England": 263,
"Sydney, New South Wales": 467,
}
The nested 'results' array is transformed into a 'value' dictionary where each key is a dimension and each value is the corresponding value.
"""

def transform(self, record: MutableMapping[str, Any], **kwargs) -> MutableMapping[str, Any]:
record_total_value = record.pop("total_value")
record["value"] = {res.get("dimension_values", [""])[0]: res.get("value") for res in record_total_value["breakdowns"][0]["results"]}
return record
Loading

0 comments on commit a4e0a9e

Please sign in to comment.