generated from moj-analytical-services/data-engineering-template
-
Notifications
You must be signed in to change notification settings - Fork 1
/
__main__.py
78 lines (67 loc) · 2.5 KB
/
__main__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from data_engineering_pulumi_components.aws import Bucket
from data_engineering_pulumi_components.utils import Tagger
from pulumi import ResourceOptions, get_stack, export, Output
from pulumi_aws.iam import RolePolicy
from pulumi_aws.s3 import BucketPolicy
import data_engineering_exports.pull as pull
import data_engineering_exports.push as push
import data_engineering_exports.utils as utils
# PUSH INFRASTRUCTURE
# When files are added to the export bucket, move or copy them to their target bucket
stack = get_stack()
tagger = Tagger(environment_name=stack)
export_bucket = Bucket(name="mojap-hub-exports", tagger=tagger)
export("export_bucket", export_bucket._bucket.arn)
# Load the datasets and build AWS resources from them
push_config_files = utils.list_yaml_files("push_datasets")
datasets = push.PushExportDatasets(push_config_files, export_bucket, tagger)
datasets.load_datasets_and_users()
datasets.build_lambda_functions()
datasets.build_role_policies()
# Create combined bucket notification
# You can only have one BucketNotification per bucket, so create a single combined one
bucket_notification = push.make_combined_bucket_notification(
name="export-bucket-notification", export_bucket=export_bucket, datasets=datasets
)
# PULL INFRASTRUCTURE
# Let an external role get files from a bucket
pull_config_files = utils.list_yaml_files("pull_datasets")
# For each config, create a bucket
for file in pull_config_files:
dataset = utils.load_yaml(file)
name = dataset["name"]
pull_arns = dataset["pull_arns"]
users = dataset["users"]
if "allow_push" in dataset.keys():
writable = dataset["allow_push"]
else:
writable = False
pull_bucket = Bucket(
name=f"mojap-{name}",
tagger=tagger,
)
# Add bucket policy allowing the specified arn to read
bucket_policy = Output.all(
bucket_arn=pull_bucket.arn,
pull_arns=pull_arns,
allow_push=writable
).apply(
pull.create_pull_bucket_policy
)
BucketPolicy(
resource_name=f"{name}-bucket-policy",
bucket=pull_bucket.id,
policy=bucket_policy,
opts=ResourceOptions(parent=pull_bucket),
)
# Add role policy for each user
role_policy = Output.all(bucket_arn=pull_bucket.arn).apply(
pull.create_read_write_role_policy
)
for user in users:
RolePolicy(
resource_name=user + "_exports_pull",
policy=role_policy.json,
role=user,
name=f"hub-exports-pull-{name}",
)