This repository has been archived by the owner on Oct 24, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 20
/
db.py
148 lines (121 loc) · 3.97 KB
/
db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from __future__ import unicode_literals
import MySQLdb
import conf, getpass, json
from data.groups import groups
conn = None
debug = None
def make():
print "making db..."
conn.execute("drop database if exists %s" % conf.dbname)
conn.execute("create database %s" % conf.dbname)
conn.execute("use %s" % conf.dbname)
# region
conn.execute("""create table `region` (
name varchar(255) primary key,
parent varchar(255) references region(name),
type varchar(255)
) engine=MyISAM""")
conn.execute("""create table `group` (
name varchar(50) primary key,
label varchar(255),
color varchar(255),
keywords text,
description text,
icon varchar(255)
) engine=MyISAM""")
conn.execute("""create table `period` (
name varchar(255) primary key,
parent varchar(255) references period(name)
) engine=MyISAM""")
conn.execute("""create table `source` (
name varchar(255) primary key
) engine=MyISAM""")
conn.execute("""create table `dataset` (
name varchar(150) primary key,
source varchar(255) references source(name),
title varchar(255),
filename varchar(255),
raw_filename varchar(255),
html_filename varchar(255),
url text,
description text,
source_info text
) engine=MyISAM""")
conn.execute("""create table `dataset_group` (
dataset varchar(255) not null references dataset(name),
`group` varchar(50) not null references `group`(name),
constraint unique(dataset, `group`)
) engine=MyISAM""")
conn.execute("""create table `head` (
name varchar(150) primary key,
title varchar(255),
dataset varchar(255) references dataset(name)
) engine=MyISAM""")
conn.execute("""create table `data` (
id int(15) primary key auto_increment,
region varchar(255) references region(name),
period varchar(255) references period(name),
dataset varchar(255) references datatype(name),
unit varchar(255),
value decimal(25,2)
) engine=MyISAM""")
conn.execute("""create table `data_head` (
data varchar(150) not null references data(name),
`head` varchar(150) not null references `head`(name),
constraint unique (data, head)
) engine=MyISAM""")
def connect():
global conn
if not conf.rootdbpassword:
conf.rootdbpassword = getpass.getpass("MySQL root password: ")
connection = MySQLdb.connect(user="root", host="localhost", passwd=conf.rootdbpassword,
use_unicode=True, charset='utf8')
connection.converter[246]=float
conn = connection.cursor()
try:
conn.execute("use %s" % conf.dbname)
except Exception, e:
# not created?
pass
def insert(table, obj):
items = obj.items()
keys = [i[0] for i in items]
values = [i[1] for i in items]
values = [",".join(i) if type(i)==list else i for i in values]
if debug:
print """insert into `%s`(%s) values
(%s) on duplicate key update name=name""" % (table, ", ".join(["`%s`" % k for k in keys]),
", ".join(['%s'] * len(values))) % tuple(values)
conn.execute("""insert ignore into `%s`(%s) values
(%s)""" % (table, ", ".join(["`%s`" % k for k in keys]),
", ".join(['%s'] * len(values))), values)
def sql(query, values=None, as_dict=False):
if query[:6].lower() != "select":
query = "select " + query
conn.execute(query, values)
data = conn.fetchall()
if as_dict:
names = [i[0] for i in conn.description]
data = [dict(zip(names, d)) for d in data]
return data
def insert_dataset(d):
group_set = False
# find in name
for g in groups:
if has_keyword(d.get("title", d["name"]), g):
insert("dataset_group", {"dataset": d["name"], "group":g})
group_set = True
# find in description
if not group_set and d.get("description"):
if has_keyword(d["description"], g):
insert("dataset_group", {"dataset": d["name"], "group":g})
group_set = True
if not group_set:
insert("dataset_group", {"dataset": d["name"], "group": "Other"})
d["title"] = d["name"]
d["name"] = d["name"][:150]
insert("dataset", d)
def has_keyword(text, group):
if not text: return
t = text.lower().replace("_", " ").split()
return set(t).intersection(set(groups[group]["keywords"]))