-
Notifications
You must be signed in to change notification settings - Fork 0
/
csv_to_db.py
54 lines (47 loc) · 1.52 KB
/
csv_to_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
import mysql.connector
# Function to read CSV and insert data into DB
def read_csv_and_insert_to_db(csv_file_path):
# Read CSV file into DataFrame
df = pd.read_csv(csv_file_path)
# Convert NaN values to None (SQL NULL)
df = df.where(pd.notnull(df), None)
# Call the function to insert data into the database
insert_posts_to_db(df)
# Function to insert or update data into the database
def insert_posts_to_db(df):
# SQL query for inserting or updating data
insert_query = """INSERT INTO posts (id, title, post_url, article, keywords)
VALUES (%s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
title = VALUES(title),
post_url = VALUES(post_url),
article = VALUES(article),
keywords = VALUES(keywords);"""
# Iterate over DataFrame rows and insert data into the database
for i, row in df.iterrows():
data_tuple = (row['ID'], row['Title'], row['Post URL'], row['article'], row['keywords'])
cursor.execute(insert_query, data_tuple)
conn.commit()
print("Data inserted successfully into posts table")
conn.close()
# Database Setup
conn = mysql.connector.connect(
host='localhost',
user='root',
password='dsci560',
database='lab4'
)
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS posts (
id VARCHAR(255) PRIMARY KEY,
title TEXT,
post_url TEXT,
article TEXT,
keywords TEXT
)
''')
# Example usage
csv_file_path = 'new_posts.csv'
read_csv_and_insert_to_db(csv_file_path)