diff --git a/Infra/locals.tf b/Infra/locals.tf index 3603f32..ffc2102 100644 --- a/Infra/locals.tf +++ b/Infra/locals.tf @@ -1,17 +1,17 @@ locals { - + #buckets lambda_layer_bucket_name = "my-lambda-layer-bucket-001" - lambda_layer = "lambda_layer" - rapid_api_host = "zillow56.p.rapidapi.com" - rapid_api_key = "XXXX" - bucket_name = "real-estate-etl-101" - raw_repertory = "raw_data" - std_repertory = "std_data" - aws_region = "eu-west-3" + lambda_layer = "lambda_layer" + rapid_api_host = "zillow56.p.rapidapi.com" + rapid_api_key = "XXXX" + bucket_name = "real-estate-etl-101" + raw_repertory = "raw_data" + std_repertory = "std_data" + aws_region = "eu-west-3" - utils_bucket = "real-estate-etl-utils" - glue_script_key = "script/glue_etl_script.py" + utils_bucket = "real-estate-etl-utils" + glue_script_key = "script/glue_etl_script.py" glue_local_script_path = "../etl/glue_etl_job/transform_data.py" # first method layer @@ -20,19 +20,19 @@ locals { requirements_path = "../requirements.txt" path_to_system_folder = "../etl/extract/System" - + compatible_layer_runtimes = ["python3.10"] compatible_architectures = ["x86_64"] # lambda path_to_source_folder = "../etl/extract" #path_to_source_file = "../etl/extract" - path_to_output = "lambda_function_extract_data.zip" - function_name = "lambda_extract_fromAPI" - function_handler = "extract_data.lambda_handler" - memory_size = 512 - timeout = 300 - runtime = "python3.10" + path_to_output = "lambda_function_extract_data.zip" + function_name = "lambda_extract_fromAPI" + function_handler = "extract_data.lambda_handler" + memory_size = 512 + timeout = 300 + runtime = "python3.10" # Glue catalog glue_catalog_database_name = "real-estate-database" @@ -40,39 +40,39 @@ locals { # iam # Glue Crawler - glue_Crawler_Name = "real_estate_crawler" - houston_crawler_name = "real_estate_houston_crawler" + glue_Crawler_Name = "real_estate_crawler" + houston_crawler_name = "real_estate_houston_crawler" panamera_crawler_name = "real_estate_panamera_crawler" - houston = "houston" - panamera = "pasadena" + houston = "houston" + panamera = "pasadena" # Glue Classifier classifier_name = "real_estate_classifier" - json_path = "$[*]" + json_path = "$[*]" # Glue Job - glue_job_name = "real_estate_job" - glue_version = "4.0" - worker_type = "G.1X" - number_of_workers = 2 - time_out = 2880 - script_location = "" - class = "GlueApp" - enable-job-insights = "true" - enable-auto-scaling = "false" + glue_job_name = "real_estate_job" + glue_version = "4.0" + worker_type = "G.1X" + number_of_workers = 2 + time_out = 2880 + script_location = "" + class = "GlueApp" + enable-job-insights = "true" + enable-auto-scaling = "false" enable-glue-datacatalog = "true" - job-language = "python" - job-bookmark-option = "job-bookmark-disable" - datalake-formats = "iceberg" - conf = "spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions --conf spark.sql.catalog.glue_catalog=org.apache.iceberg.spark.SparkCatalog --conf spark.sql.catalog.glue_catalog.warehouse=s3://tnt-erp-sql/ --conf spark.sql.catalog.glue_catalog.catalog-impl=org.apache.iceberg.aws.glue.GlueCatalog --conf spark.sql.catalog.glue_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO" - + job-language = "python" + job-bookmark-option = "job-bookmark-disable" + datalake-formats = "iceberg" + conf = "spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions --conf spark.sql.catalog.glue_catalog=org.apache.iceberg.spark.SparkCatalog --conf spark.sql.catalog.glue_catalog.warehouse=s3://tnt-erp-sql/ --conf spark.sql.catalog.glue_catalog.catalog-impl=org.apache.iceberg.aws.glue.GlueCatalog --conf spark.sql.catalog.glue_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO" + # cloudwatch - schedule_name = "schedule" + schedule_name = "schedule" schedule_value = "cron(0 8 ? * MON-FRI *)" # Glue Trigger - glue_trigger_name = "realestate-glue-job-trigger" - glue_trigger_schedule_type = "SCHEDULED" + glue_trigger_name = "realestate-glue-job-trigger" + glue_trigger_schedule_type = "SCHEDULED" glue_trigger_schedule_value = "cron(15 12 * * ? *)" - + } \ No newline at end of file diff --git a/Infra/main.tf b/Infra/main.tf index 3ba4a57..358b725 100644 --- a/Infra/main.tf +++ b/Infra/main.tf @@ -1,71 +1,71 @@ -module "s3bucket"{ +module "s3bucket" { source = "./modules/s3" - bucket_name = local.bucket_name + bucket_name = local.bucket_name raw_repertory = local.raw_repertory std_repertory = local.std_repertory - utils_bucket_name = local.utils_bucket - glue_script_key = local.glue_script_key + utils_bucket_name = local.utils_bucket + glue_script_key = local.glue_script_key glue_local_script_path = local.glue_local_script_path } -module "lambdaLayer"{ +module "lambdaLayer" { source = "./modules/request_layer" requirements_path = local.requirements_path - layer_zip_path = local.layer_zip_path - layer_name = local.layer_name + layer_zip_path = local.layer_zip_path + layer_name = local.layer_name path_to_system_folder = local.path_to_system_folder lambda_layer_bucket_name = local.lambda_layer_bucket_name - lambda_layer = local.lambda_layer + lambda_layer = local.lambda_layer #path_to_request_layer_source = local.path_to_request_layer_source #path_to_request_layer_artifact = local.path_to_request_layer_artifact - + #path_to_request_layer_filename = local.path_to_request_layer_filename #request_layer_name = local.request_layer_name #path_to_request_layer_source = local.path_to_request_layer_source #path_to_request_layer_artifact = local.path_to_request_layer_artifact - + #path_to_request_layer_filename = local.path_to_request_layer_filename #request_layer_name = local.request_layer_name compatible_layer_runtimes = local.compatible_layer_runtimes - compatible_architectures = local.compatible_architectures + compatible_architectures = local.compatible_architectures } module "lambdaFunction" { source = "./modules/lambda" - path_to_source_folder = local.path_to_source_folder - path_to_output = local.path_to_output - function_name = local.function_name - function_handler = local.function_handler - memory_size = local.memory_size - timeout = local.timeout - runtime = local.runtime - rapid_api_host = local.rapid_api_host - rapid_api_key = local.rapid_api_key - bucket_name = local.bucket_name - raw_repertory = local.raw_repertory - lambda_layer_arns = [module.lambdaLayer.lamnda_layer_arn] - aws_region = local.aws_region - s3_bucket_arn = module.s3bucket.s3_etl_bucket_arn - -} - -module "cloudwatch_schedule_module"{ - source = "./modules/eventbridge" - schedule_name = local.schedule_name - schedule_value = local.schedule_value - aws_lambda_arn = module.lambdaFunction.lambda_function_arn + path_to_source_folder = local.path_to_source_folder + path_to_output = local.path_to_output + function_name = local.function_name + function_handler = local.function_handler + memory_size = local.memory_size + timeout = local.timeout + runtime = local.runtime + rapid_api_host = local.rapid_api_host + rapid_api_key = local.rapid_api_key + bucket_name = local.bucket_name + raw_repertory = local.raw_repertory + lambda_layer_arns = [module.lambdaLayer.lamnda_layer_arn] + aws_region = local.aws_region + s3_bucket_arn = module.s3bucket.s3_etl_bucket_arn + +} + +module "cloudwatch_schedule_module" { + source = "./modules/eventbridge" + schedule_name = local.schedule_name + schedule_value = local.schedule_value + aws_lambda_arn = module.lambdaFunction.lambda_function_arn aws_lambda_function_name = module.lambdaFunction.lambda_function_name } @@ -81,58 +81,58 @@ module "glueIamRole" { } module "glueClassifier" { - source = "./modules/glue_classifier" + source = "./modules/glue_classifier" classifier_name = local.classifier_name - json_path = local.json_path + json_path = local.json_path } module "glueCrawler" { source = "./modules/glue_crawler" - database = module.glueCatalogDatabase.database_name - houston_crawler_name = local.houston_crawler_name + database = module.glueCatalogDatabase.database_name + houston_crawler_name = local.houston_crawler_name panamera_crawler_name = local.panamera_crawler_name - houston = local.houston + houston = local.houston panamera = local.panamera #name = local.glue_Crawler_Name glue_iam_role = module.glueIamRole.glue_iam_arn - - classifiers = [module.glueClassifier.aws_glue_classifier_id] + + classifiers = [module.glueClassifier.aws_glue_classifier_id] s3_target_path_panamera = module.s3bucket.aws_s3_bucket_uri - s3_target_path_houston = module.s3bucket.aws_s3_bucket_uri + s3_target_path_houston = module.s3bucket.aws_s3_bucket_uri #s3_target_path = module.s3bucket.aws_s3_bucket_uri } module "glueJob" { source = "./modules/glue_job" - name = local.glue_job_name + name = local.glue_job_name iam_glue_arn = module.glueIamRole.glue_iam_arn glue_version = local.glue_version #worker_type = local.worker_type - script_location = module.s3bucket.aws_s3_bucket_glue_script_uri - timeout = local.time_out - class = local.class - enable-job-insights = local.enable-job-insights - enable-auto-scaling = local.enable-auto-scaling + script_location = module.s3bucket.aws_s3_bucket_glue_script_uri + timeout = local.time_out + class = local.class + enable-job-insights = local.enable-job-insights + enable-auto-scaling = local.enable-auto-scaling enable-glue-datacatalog = local.enable-glue-datacatalog - job-language = local.job-language - job-bookmark-option = local.job-bookmark-option - datalake-formats = local.datalake-formats - conf = local.conf + job-language = local.job-language + job-bookmark-option = local.job-bookmark-option + datalake-formats = local.datalake-formats + conf = local.conf } - + module "glueTrigger" { source = "./modules/glue_trigger" - name = local.glue_trigger_name - schedule_type = local.glue_trigger_schedule_type + name = local.glue_trigger_name + schedule_type = local.glue_trigger_schedule_type schedule_value = local.schedule_value - job_name = module.glueJob.aws_glue_job_name + job_name = module.glueJob.aws_glue_job_name } diff --git a/Infra/modules/request_layer/main.tf b/Infra/modules/request_layer/main.tf index 25ee497..c04ebdf 100644 --- a/Infra/modules/request_layer/main.tf +++ b/Infra/modules/request_layer/main.tf @@ -33,6 +33,17 @@ resource "aws_s3_object" "lambda_layer_zip" { #content_type = "application/x-directory" } +resource "aws_lambda_layer_version" "requests_layer" { + s3_bucket = aws_s3_bucket.lambda_layer_bucket.id + s3_key = aws_s3_object.lambda_layer_zip.key + layer_name = var.layer_name + #source_code_hash = filebase64sha256(var.path_to_request_layer_filename) + + compatible_runtimes = var.compatible_layer_runtimes + depends_on = [aws_s3_object.lambda_layer_zip] + #compatible_architectures = var.compatible_architectures + +} diff --git a/Infra/modules/request_layer/output.tf b/Infra/modules/request_layer/output.tf index d849a42..f597583 100644 --- a/Infra/modules/request_layer/output.tf +++ b/Infra/modules/request_layer/output.tf @@ -1,5 +1,3 @@ -/* output "lamnda_layer_arn" { - value = aws_lambda_layer_version.my-lambda-layer.arn + value = aws_lambda_layer_version.requests_layer.arn } -*/ \ No newline at end of file