From c00490cd719b481685e21fcd725ebc3082ed4516 Mon Sep 17 00:00:00 2001 From: Abilashkumar Date: Wed, 25 Dec 2024 11:26:26 +0530 Subject: [PATCH 1/4] s3-eventbridge-lambda-textract-node --- s3-eventbridge-lambda-textract-node/README.md | 66 +++++++++++++++ .../example-pattern.json | 59 +++++++++++++ .../src/index.mjs | 54 ++++++++++++ .../template.yaml | 84 +++++++++++++++++++ 4 files changed, 263 insertions(+) create mode 100644 s3-eventbridge-lambda-textract-node/README.md create mode 100644 s3-eventbridge-lambda-textract-node/example-pattern.json create mode 100644 s3-eventbridge-lambda-textract-node/src/index.mjs create mode 100644 s3-eventbridge-lambda-textract-node/template.yaml diff --git a/s3-eventbridge-lambda-textract-node/README.md b/s3-eventbridge-lambda-textract-node/README.md new file mode 100644 index 000000000..12754ca83 --- /dev/null +++ b/s3-eventbridge-lambda-textract-node/README.md @@ -0,0 +1,66 @@ +# Amazon S3 to Amazon Textract through AWS EventBridge + +This pattern demonstrates how to create an S3 bucket which when uploaded with an object invokes a Lambda function through EventBridge and detects the text in a document through Amazon Textract. The lambda function code uses NodeJs runtime. + +Learn more about this pattern at Serverless Land Patterns: https://serverlessland.com/patterns/s3-eventbridge-lambda-textract + +Important: this application uses various AWS services and there are costs associated with these services after the Free Tier usage - please see the [AWS Pricing page](https://aws.amazon.com/pricing/) for details. You are responsible for any AWS costs incurred. No warranty is implied in this example. + +## Requirements + +* [Create an AWS account](https://portal.aws.amazon.com/gp/aws/developer/registration/index.html) if you do not already have one and log in. The IAM user that you use must have sufficient permissions to make necessary AWS service calls and manage AWS resources. +* [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) installed and configured +* [Git Installed](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) +* [AWS Serverless Application Model](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/serverless-sam-cli-install.html) (AWS SAM) installed + +## Deployment Instructions + +1. Create a new directory, navigate to that directory in a terminal and clone the GitHub repository: + ``` + git clone https://github.com/aws-samples/serverless-patterns + ``` +1. Change directory to the pattern directory: + ``` + cd s3-eventbridge-lambda-textract + ``` +1. From the command line, use AWS SAM to deploy the AWS resources for the pattern as specified in the template.yml file: + ``` + sam deploy --guided + ``` +1. During the prompts: + * Enter a stack name + * Enter the desired AWS Region + * Allow SAM CLI to create IAM roles with the required permissions. + + Once you have run `sam deploy --guided` mode once and saved arguments to a configuration file (samconfig.toml), you can use `sam deploy` in future to use these defaults. + +1. Note the outputs from the SAM deployment process. These contain the resource names and/or ARNs which are used for testing. + +## How it works + +The Cloudformation template creates 2 S3 buckets (source and destination buckets) along with a Lambda function (NodeJs) and an EventBridge event. The Lambda function is triggered by the EventBridge which listens to an object upload in the S3 bucket. The lambda function makes a DetectText API call and stores the output in the destination S3 bucket. + +## Testing + +Upload the file (document/image) to the input S3 -input-bucket- bucket via the console or use the PutObject API call: + +``` +aws s3api put-object --bucket your-bucket-name --key your-document.pdf --body /path/to/your/document.pdf +``` + +Replace the parameters in the above command appropriately. + +## Cleanup + +1. Delete the stack + ```bash + aws cloudformation delete-stack --stack-name STACK_NAME + ``` +1. Confirm the stack has been deleted + ```bash + aws cloudformation list-stacks --query "StackSummaries[?contains(StackName,'STACK_NAME')].StackStatus" + ``` +---- +Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. + +SPDX-License-Identifier: MIT-0 diff --git a/s3-eventbridge-lambda-textract-node/example-pattern.json b/s3-eventbridge-lambda-textract-node/example-pattern.json new file mode 100644 index 000000000..91f1a1613 --- /dev/null +++ b/s3-eventbridge-lambda-textract-node/example-pattern.json @@ -0,0 +1,59 @@ +{ + "title": "S3 to Textract using EventBridge ", + "description": "SAM template for an S3 object upload to invoke a Lambda function through EventBridge that detects the text in a document through Amazon Textract", + "language": "nodejs", + "level": "200", + "framework": "SAM", + "introBox": { + "headline": "How it works", + "text": [ + "This pattern demonstrates how to creates two S3 buckets (source and destination) which when uploaded with an object invokes a Lambda function through EventBridge and detects the text in a document through Amazon Textract.", + "Once a file is uploaded to an S3 bucket, it is listened by the EventBridge which further invokes the lambda function", + "The lambda function writes the output of the textract text detection to another S3 bucket. " + ] + }, + "gitHub": { + "template": { + "repoURL": "https://github.com/aws-samples/serverless-patterns/tree/main/s3-eventbridge-lambda-textract", + "templateURL": "serverless-patterns/s3-eventbridge-lambda-textract", + "projectFolder": "s3-eventbridge-lambda-textract", + "templateFile": "template.yaml" + } + }, + "resources": { + "bullets": [ + { + "text": "Detecting text with an AWS lambda function", + "link": "https://docs.aws.amazon.com/textract/latest/dg/lambda.html" + }, + { + "text": "Amazon Textract", + "link": "https://docs.aws.amazon.com/textract/latest/dg/what-is.html" + } + ] + }, + "deploy": { + "text": [ + "sam deploy" + ] + }, + "testing": { + "text": [ + "See the GitHub repo for detailed testing instructions." + ] + }, + "cleanup": { + "text": [ + "Delete the stack: cdk delete." + ] + }, + "authors": [ + { + "name": "Abilashkumar P C", + "image": "https://drive.google.com/file/d/1bxOh_WBw8J_xEqvT-qRezH8WXqSBPI24/view?usp=sharing", + "bio": "Sr. Cloud Support Engineer @ AWS", + "linkedin": "abilashkumar-p-c" + } + ] + } + \ No newline at end of file diff --git a/s3-eventbridge-lambda-textract-node/src/index.mjs b/s3-eventbridge-lambda-textract-node/src/index.mjs new file mode 100644 index 000000000..6a9266a13 --- /dev/null +++ b/s3-eventbridge-lambda-textract-node/src/index.mjs @@ -0,0 +1,54 @@ +import { S3Client, PutObjectCommand } from "@aws-sdk/client-s3"; +import { TextractClient, DetectDocumentTextCommand } from "@aws-sdk/client-textract"; + +const s3Client = new S3Client(); +const textractClient = new TextractClient(); + +export const handler = async (event, context) => { + // Extract bucket and key from the EventBridge event + console.log(event) + event.Records[0].s3.bucket.name; + + const bucket = event.Records[0].s3.bucket.name; + const key = event.Records[0].s3.object.key; + console.log(bucket); + console.log(key); + try { + // Call Textract to detect document text + const detectParams = { + Document: { + S3Object: { + Bucket: bucket, + Name: key + } + } + }; + const detectCommand = new DetectDocumentTextCommand(detectParams); + const response = await textractClient.send(detectCommand); + console.log(response); + // Prepare the output key + let outputKey = `textract-output-${key}`; + outputKey = outputKey.substring(0, outputKey.lastIndexOf('.')) + '.json'; + console.log(outputKey); + + // Write the Textract output to the output bucket + const putParams = { + Bucket: process.env.OUTPUT_BUCKET, + Key: outputKey, + Body: JSON.stringify(response) + }; + const putCommand = new PutObjectCommand(putParams); + await s3Client.send(putCommand); + + return { + statusCode: 200, + body: JSON.stringify('Document processed successfully') + }; + } catch (error) { + console.error('Error:', error); + return { + statusCode: 500, + body: JSON.stringify('Error processing document') + }; + } +}; diff --git a/s3-eventbridge-lambda-textract-node/template.yaml b/s3-eventbridge-lambda-textract-node/template.yaml new file mode 100644 index 000000000..bd6f10e4c --- /dev/null +++ b/s3-eventbridge-lambda-textract-node/template.yaml @@ -0,0 +1,84 @@ +AWSTemplateFormatVersion: '2010-09-09' +Transform: AWS::Serverless-2016-10-31 +Description: 'SAM template for S3 trigger to Lambda for Textract document detection with EventBridge using NodeJs' + +Resources: + # Input S3 bucket + InputBucket: + Type: AWS::S3::Bucket + Properties: + BucketName: !Sub '${AWS::StackName}-input-bucket-${AWS::AccountId}' + NotificationConfiguration: + EventBridgeConfiguration: + EventBridgeEnabled: true + + # Output S3 bucket + OutputBucket: + Type: AWS::S3::Bucket + Properties: + BucketName: !Sub '${AWS::StackName}-output-bucket-${AWS::AccountId}' + + # Lambda function + TextractFunction: + Type: AWS::Serverless::Function + Properties: + FunctionName: !Sub '${AWS::StackName}-textract-function' + Handler: index.handler + Runtime: nodejs18.x + Timeout: 60 + Environment: + Variables: + OUTPUT_BUCKET: !Ref OutputBucket + Policies: + - S3ReadPolicy: + BucketName: !Ref InputBucket + - S3WritePolicy: + BucketName: !Ref OutputBucket + - Statement: + - Effect: Allow + Action: + - textract:DetectDocumentText + Resource: '*' + CodeUri: src/ + + # EventBridge Rule + S3ObjectCreatedRule: + Type: AWS::Events::Rule + Properties: + Description: "Rule to capture S3 object created events" + EventPattern: + source: + - aws.s3 + detail-type: + - Object Created + detail: + bucket: + name: + - !Ref InputBucket + State: "ENABLED" + Targets: + - Arn: !GetAtt TextractFunction.Arn + Id: "TextractFunctionTarget" + + # Permission for EventBridge to invoke Lambda + TextractFunctionPermission: + Type: AWS::Lambda::Permission + Properties: + FunctionName: !Ref TextractFunction + Action: "lambda:InvokeFunction" + Principal: "events.amazonaws.com" + SourceArn: !GetAtt S3ObjectCreatedRule.Arn + +Outputs: + InputBucketName: + Description: 'Name of the input S3 bucket' + Value: !Ref InputBucket + OutputBucketName: + Description: 'Name of the output S3 bucket' + Value: !Ref OutputBucket + TextractFunctionName: + Description: 'Name of the Textract Lambda function' + Value: !Ref TextractFunction + TextractFunctionArn: + Description: 'ARN of the Textract Lambda function' + Value: !GetAtt TextractFunction.Arn From ca829bda46c5870413a5729ce049072d89326ff7 Mon Sep 17 00:00:00 2001 From: Abilashkumar <111060919+abilashkumar@users.noreply.github.com> Date: Wed, 25 Dec 2024 14:24:58 +0530 Subject: [PATCH 2/4] Update README.md --- s3-eventbridge-lambda-textract-node/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3-eventbridge-lambda-textract-node/README.md b/s3-eventbridge-lambda-textract-node/README.md index 12754ca83..e363d90ef 100644 --- a/s3-eventbridge-lambda-textract-node/README.md +++ b/s3-eventbridge-lambda-textract-node/README.md @@ -2,7 +2,7 @@ This pattern demonstrates how to create an S3 bucket which when uploaded with an object invokes a Lambda function through EventBridge and detects the text in a document through Amazon Textract. The lambda function code uses NodeJs runtime. -Learn more about this pattern at Serverless Land Patterns: https://serverlessland.com/patterns/s3-eventbridge-lambda-textract +Learn more about this pattern at Serverless Land Patterns: https://serverlessland.com/patterns/s3-eventbridge-lambda-textract-node Important: this application uses various AWS services and there are costs associated with these services after the Free Tier usage - please see the [AWS Pricing page](https://aws.amazon.com/pricing/) for details. You are responsible for any AWS costs incurred. No warranty is implied in this example. From 7a69e1fc09c656a1dff80b8f722613fb6dbd0b82 Mon Sep 17 00:00:00 2001 From: Abilashkumar <111060919+abilashkumar@users.noreply.github.com> Date: Wed, 25 Dec 2024 14:27:48 +0530 Subject: [PATCH 3/4] Update README.md --- s3-eventbridge-lambda-textract-node/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3-eventbridge-lambda-textract-node/README.md b/s3-eventbridge-lambda-textract-node/README.md index e363d90ef..05cfdfd75 100644 --- a/s3-eventbridge-lambda-textract-node/README.md +++ b/s3-eventbridge-lambda-textract-node/README.md @@ -21,7 +21,7 @@ Important: this application uses various AWS services and there are costs associ ``` 1. Change directory to the pattern directory: ``` - cd s3-eventbridge-lambda-textract + cd s3-eventbridge-lambda-textract-node ``` 1. From the command line, use AWS SAM to deploy the AWS resources for the pattern as specified in the template.yml file: ``` From 1d2f4dd12d7bda2130c6fa72f58e28db6a329775 Mon Sep 17 00:00:00 2001 From: Abilashkumar <111060919+abilashkumar@users.noreply.github.com> Date: Wed, 25 Dec 2024 14:33:11 +0530 Subject: [PATCH 4/4] Update example-pattern.json --- s3-eventbridge-lambda-textract-node/example-pattern.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/s3-eventbridge-lambda-textract-node/example-pattern.json b/s3-eventbridge-lambda-textract-node/example-pattern.json index 91f1a1613..72f3c3f3b 100644 --- a/s3-eventbridge-lambda-textract-node/example-pattern.json +++ b/s3-eventbridge-lambda-textract-node/example-pattern.json @@ -14,9 +14,9 @@ }, "gitHub": { "template": { - "repoURL": "https://github.com/aws-samples/serverless-patterns/tree/main/s3-eventbridge-lambda-textract", - "templateURL": "serverless-patterns/s3-eventbridge-lambda-textract", - "projectFolder": "s3-eventbridge-lambda-textract", + "repoURL": "https://github.com/aws-samples/serverless-patterns/tree/main/s3-eventbridge-lambda-textract-node", + "templateURL": "serverless-patterns/s3-eventbridge-lambda-textract-node", + "projectFolder": "s3-eventbridge-lambda-textract-node", "templateFile": "template.yaml" } }, @@ -56,4 +56,4 @@ } ] } - \ No newline at end of file +