hack2skill · butterfling · Jul 30, 2023 · Jul 30, 2023 · Jul 30, 2023 · Aug 1, 2023
diff --git a/Account_Breach_plot.py b/Account_Breach_plot.py
@@ -0,0 +1,39 @@
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+# Read the CSV file
+df = pd.read_csv('new.csv')
+
+Country = 'UK'
+Gender = 'male'
+
+# Filter data for 'Country' Australia and 'Gender' female
+australia_female_data = df[(df['Country'] == 'UK') & (df['Gender'] == 'male')]
+
+# Create a double-bar graph using Seaborn
+sns.set(style='whitegrid')
+plt.figure(figsize=(12, 8))
+
+# Plot 'Name Breach' count
+sns.barplot(x='Age Group', y='Name Breach', data=australia_female_data, color='black', label='Name Breach')
+
+# Plot 'Total Records'
+sns.barplot(x='Age Group', y='Total Records', data=australia_female_data, color='red', alpha=0.8, label='Total Records')
+
+# Add data labels on the bars
+for index, value in enumerate(australia_female_data['Account-Number Breach']):
+    plt.text(index, value, str(value), ha='center', va='bottom', fontweight='bold', fontsize=10, color='black')
+
+for index, value in enumerate(australia_female_data['Total Records']):
+    plt.text(index, value, str(value), ha='center', va='bottom', fontweight='bold', fontsize=10, color='black')
+
+# Add labels and title
+plt.xlabel('Age Group')
+plt.ylabel('Count')
+plt.title(f'Account-Number Breach vs. Total Records in {Country} for {Gender} by Age Group')
+plt.legend()
+
+# Show the plot
+plt.tight_layout()
+plt.show()
diff --git a/Dynamodb.py b/Dynamodb.py
@@ -0,0 +1,46 @@
+
+aws_access_key_id = 
+aws_secret_key = 
+
+session = boto3.Session(
+    aws_access_key_id=aws_access_key_id,
+    aws_secret_access_key=aws_secret_key
+)
+
+dynamodb = session.resource('dynamodb')
+
+table_name = 'MyTable'
+
+
+table = dynamodb.create_table(
+    TableName="HexaDCP",
+    KeySchema=[
+        {
+            'AttributeName': 'CustomerID',
+            'KeyType': 'HASH'  # Partition key
+        },
+        {
+            'AttributeName': 'Rule_ID',
+            'KeyType': 'RANGE'  # Sort key
+        }
+    ],
+    AttributeDefinitions=[
+        {
+            'AttributeName': 'CustomerID',
+            'AttributeType': 'S' 
+        },
+        {
+            'AttributeName': 'Rule_ID',
+            'AttributeType': 'S'
+        }
+
+
+    ],
+    ProvisionedThroughput={
+        'ReadCapacityUnits': 5,   
+        'WriteCapacityUnits': 5    
+    }
+)
+
+
+table.meta.client.get_waiter('table_exists').wait(TableName=table_name)
diff --git a/Group.py b/Group.py
@@ -0,0 +1,27 @@
+import pandas as pd
+
+input_csv_file = "final-output.csv"
+df = pd.read_csv(input_csv_file)
+
+age_bins = [18, 31, 46, 61, 76]
+age_labels = ["18-30", "31-45", "46-60", "61-75"]
+
+df["Age Group"] = pd.cut(df["Age"], bins=age_bins, labels=age_labels, right=False)
+
+count_columns = ["Name", "Email", "Account-Number", "Financial-Information"]
+
+for column in count_columns:
+    df[column] = df[column].astype(str).apply(lambda x: x.count('1'))
+
+grouped_df = df.groupby(["Age Group", "Country", "Gender"])[[column for column in count_columns]].sum().reset_index()
+
+
+total_records = df.groupby(["Age Group", "Country", "Gender"]).size().reset_index(name="Total-Records")
+
+
+grouped_df = pd.merge(grouped_df, total_records, on=["Age Group", "Country", "Gender"])
+
+output_csv_file = "new.csv"
+grouped_df.to_csv(output_csv_file, index=False)
+
+
diff --git a/Hexa-prompts.csv b/Hexa-prompts.csv
diff --git a/Name_Breach_plot.py b/Name_Breach_plot.py
@@ -0,0 +1,77 @@
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import smtplib
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from email.mime.application import MIMEApplication
+from matplotlib.backends.backend_pdf import PdfPages
+
+
+df = pd.read_csv('new.csv')
+
+
+def get_visualization(Country , Gender):
+
+    australia_female_data = df[(df['Country'] == Country) & (df['Gender'] == Gender)]
+
+
+    if australia_female_data.empty:
+        print(f"No data found for Country: {Country} and Gender: {Gender}")
+        return
+
+    sns.set(style='whitegrid')
+    plt.figure(figsize=(12, 8))
+
+    sns.barplot(x='Age Group', y='Name Breach', data=australia_female_data, color='black', label='Name Breach')
+    sns.barplot(x='Age Group', y='Total Records', data=australia_female_data, color='red', alpha=0.8, label='Total Records')
+
+    for index, value in enumerate(australia_female_data['Name Breach']):
+        plt.text(index, value, str(value), ha='center', va='bottom', fontweight='bold', fontsize=10, color='black')
+
+    for index, value in enumerate(australia_female_data['Total Records']):
+        plt.text(index, value, str(value), ha='center', va='bottom', fontweight='bold', fontsize=10, color='black')
+
+    plt.xlabel('Age Group')
+    plt.ylabel('Count')
+    plt.title(f'Name Breach vs. Total Records in {Country} for {Gender} by Age Group')
+    plt.legend()
+
+    plt.tight_layout()
+
+    return plt
+
+
+
+# smtp_server = 'smtp.gmail.com'
+# smtp_port = 587
+# sender_email = '[email protected]'
+# sender_password = 'vaeufurfdoskmhel'
+# receiver_email = '[email protected]'
+
+
+# msg = MIMEMultipart()
+# msg['From'] = sender_email
+# msg['To'] = receiver_email
+# msg['Subject'] = 'Data Breach Report'
+
+# # Attach the PDF file
+# with open('report.pdf', 'rb') as f:
+#     attach_pdf = MIMEApplication(f.read(), _subtype='pdf')
+#     attach_pdf.add_header('Content-Disposition', 'attachment', filename='report.pdf')
+#     msg.attach(attach_pdf)
+
+# # Send the email
+# try:
+#     server = smtplib.SMTP(smtp_server, smtp_port)
+#     server.starttls()
+#     server.login(sender_email, sender_password)
+#     server.sendmail(sender_email, receiver_email, msg.as_string())
+#     print('Email sent successfully!')
+#     server.quit()
+# except Exception as e:
+#     print('Error sending email:', str(e))
+
+
+
+
diff --git a/Prompts.csv b/Prompts.csv
diff --git a/README.md b/README.md
@@ -9,18 +9,32 @@
 
 ## README.md must consist of the following information:
 
-#### Team Name -
-#### Problem Statement - 
-#### Team Leader Email -
+#### Team Name - GenAI-Ninjas
+#### Problem Statement - Organizations face significant challenges in ensuring data compliance and protection, including the complexity of regulations, cross-border data transfers, and the risk of data breaches. There is a need for an advanced AI-driven solution that streamlines compliance processes, enhances data protection, and provides actionable insights to mitigate risks and ensure regulatory adherence across Industries.
+#### Team Leader Email - [email protected]
 
 ## A Brief of the Prototype:
-  This section must include UML Diagrams and prototype description
-
-## Tech Stack: 
-   List Down all technologies used to Build the prototype
+  User_Flow Diagrams , Tech Stack , Demo Video:
+  https://drive.google.com/drive/folders/1CHrxZy5DmxPt9kJs14eQW8hwUBqg_vJ8?usp=sharing
 
 ## Step-by-Step Code Execution Instructions:
-  This Section must contain a set of instructions required to clone and run the prototype so that it can be tested and deeply analyzed
+  1) Navigate to the ai.py file , and run it using 'python ai.py' command
+  2) The chatbot is fine-tuned using curie engine of gpt-3.5 and is prompted to answer specific set of questions
+  3) write the "Get me the data breaches for" + "Any country in the list [India , Australia , UK , France , Germany , Brazil, China , USA]"
+     This shall give a json output which indicates the number of breaches according to 'Name' , 'Email', 'Financial-Information' etc, related       to the first , second and third Rules of the GDPR rule set
+  4) Secondly , the prompt "Can you show me the Account number breach VS Total records for" + [Country_Name] + [Gender]
+     This shall generate a plot depecting the same
+  5) HexaDCP also provides the functionality to send the report by gmail using the prompt "Send me the report on my Email" + [Your_Email_Add]
+     Which shall send the report to the given mail.
+  6) Furthermore, the bot uses FLOW.AI which provides additional insights to the CSV. Hence by running the prompt "How many Email Breaches of Age Group 18-30 for" + [Country-Name] shall answer the same.
+  7) The Model is condfigured with XGBOOST which assigns an input vector say 'v' an label , indicating the types of lables broken by the USER. this can be found at the file "final-output.csv"
+8) Two databases namely , 'MongoDB' and 'AWS Dynamo Db' is used to store the json file called as 'customer-details.json' in mongodb compass and 'total_records_by_country.csv' in the key-value pairs in Dynamo-DB
 
 ## What I Learned:
-   Write about the biggest learning you had while developing the prototype
+  While developing the prototype of HEXA DCP BOT during the 24-hour Hackathon, one of the most significant learnings was the power of collaboration and efficient time management. Within a limited timeframe, our team had to tackle complex challenges and bring together various technologies (Python, TensorFlow, XGBoost, Matplotlib, MERN Stack, Flowise.ai) to create a comprehensive data compliance and protection solution.
+
+We learned to leverage each team member's strengths, divide tasks strategically, and communicate effectively to ensure a streamlined development process. This experience taught us the importance of setting clear goals, prioritizing critical features, and making agile decisions to optimize our limited time effectively.
+
+Moreover, the Hackathon pushed us to think creatively and innovatively to address real-world data compliance challenges. Integrating AI and ML technologies into the prototype not only enhanced its capabilities but also provided valuable insights into how AI can revolutionize data protection strategies.
+
+The fast-paced environment of the Hackathon taught us to adapt swiftly to unexpected hurdles and pivot our approach when needed. In the end, we were proud of what our team accomplished within the 24-hour timeframe. The experience not only strengthened our technical skills but also fostered collaboration, creativity, and problem-solving abilities. We learned that with the right mindset, teamwork, and determination, we could deliver a powerful and promising product that addresses critical data compliance challenges and protects the privacy of organizations and individuals alike.