Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

import logging 

import json 

import sqlite3 

 

import flask 

import pandas as pd 

 

from application import utils 

 

 

def get_db(): 

"""Create connection to SQLite database. 

""" 

logging.info("Connecting to database...") 

if "db" not in flask.g: 

flask.g.db = sqlite3.connect(str(utils.DATABASE)) 

return flask.g.db 

 

 

def close_db(e=None): 

"""Close connection to SQLite database. 

""" 

logging.info("Closing connection to database...") 

db = flask.g.pop("db", None) 

if db is not None: 

db.close() 

 

 

def _insert_into_textfiles(db, data): 

"""Insert data into textfiles table. 

""" 

for textfile in data: 

title, content = utils.load_textfile(textfile) 

logging.info("Insert '{}' into database...".format(title)) 

db.execute(""" 

INSERT INTO textfiles (title, content)  

VALUES(?, ?); 

""", [title, content]) 

 

def _insert_into_token_freqs(db, data): 

logging.info("Insert token frequencies into database...") 

db.execute(""" 

INSERT INTO token_freqs (content)  

VALUES(?); 

""", [data]) 

 

def insert_into(table, data): 

"""Insert data into database. 

""" 

db = get_db() 

if table in {"textfiles"}: 

_insert_into_textfiles(db, data) 

elif table in {"token_freqs"}: 

_insert_into_token_freqs(db, data) 

elif table in {"stopwords"}: 

_insert_into_stopwords(db, data) 

elif table in {"model"}: 

_insert_into_model(db, data) 

db.commit() 

close_db() 

 

 

def _insert_into_model(db, data): 

"""Insert data into model table. 

""" 

logging.info("Insert topic model output into database...") 

db.execute(""" 

INSERT INTO model (document_topic, topics, document_similarities, topic_similarities) 

VALUES(?, ?, ?, ?); 

""", 

[data["document_topic"], data["topics"], 

data["document_similarities"], data["topic_similarities"]]) 

 

 

def _insert_into_stopwords(db, data): 

"""Insert data into stopwords table. 

""" 

logging.info("Insert stopwords into database...") 

db.execute(""" 

INSERT INTO stopwords (content) 

VALUES(?); 

""", 

[data]) 

 

 

def select(value, **kwargs): 

"""Select values from database. 

""" 

db = get_db() 

cursor = db.cursor() 

if value in {"textfiles"}: 

return _select_textfiles(cursor) 

elif value in {"textfile_titles"}: 

return _select_textfile_titles(cursor) 

elif value in {"token_freqs"}: 

return _select_token_freqs(cursor) 

elif value in {"document_topic_distributions"}: 

return _select_document_topic_distributions(cursor) 

elif value in {"topics"}: 

return _select_topics(cursor) 

elif value in {"textfile"}: 

return _select_textfile(cursor, **kwargs) 

elif value in {"document_similarities"}: 

return _select_document_similarities(cursor) 

elif value in {"topic_similarities"}: 

return _select_topic_similarities(cursor) 

elif value in {"stopwords"}: 

return _select_stopwords(cursor) 

elif value in {"data_export"}: 

return _select_data_export(cursor) 

 

def _select_stopwords(cursor): 

logging.info("Selecting stopwords from database...") 

return cursor.execute(""" 

SELECT content  

FROM stopwords; 

""").fetchone()[0] 

 

def _select_document_similarities(cursor): 

logging.info("Selecting document similarity matrix from database...") 

return cursor.execute(""" 

SELECT document_similarities  

FROM model; 

""").fetchone()[0] 

 

 

def _select_topic_similarities(cursor): 

logging.info("Selecting topic similarity matrix from database...") 

return cursor.execute(""" 

SELECT topic_similarities  

FROM model; 

""").fetchone()[0] 

 

 

def _select_token_freqs(cursor): 

logging.info("Selecting token frequencies from database...") 

return cursor.execute(""" 

SELECT content  

FROM token_freqs; 

""").fetchone()[0] 

 

def _select_textfile_titles(cursor): 

"""Select textfile titles from database. 

""" 

logging.info("Selecting textfile titles from database...") 

cursor.execute(""" 

SELECT title  

FROM textfiles; 

""") 

return json.dumps([title[0] for title in cursor.fetchall()]) 

 

 

def _select_textfiles(cursor): 

"""Select textfiles from database. 

""" 

logging.info("Selecting textfiles from database...") 

cursor.execute(""" 

SELECT title, content  

FROM textfiles; 

""") 

return cursor.fetchall() 

 

 

def _select_document_topic_distributions(cursor): 

"""Select document-topic matrix form database. 

""" 

logging.info("Selecting document-topic distributions from database...") 

return cursor.execute(""" 

SELECT document_topic  

FROM model; 

""").fetchone()[0] 

 

 

def _select_topics(cursor): 

logging.info("Selecting topics from database...") 

return cursor.execute(""" 

SELECT topics  

FROM model; 

""").fetchone()[0] 

 

 

def _select_textfile(cursor, title): 

logging.info("Selecting '{}' from database...".format(title)) 

return cursor.execute(""" 

SELECT content  

FROM textfiles 

WHERE title is ?; 

""", [title]).fetchone()[0] 

 

def _select_data_export(cursor): 

"""Select model output from database. 

""" 

logging.info("Selectin stopwords from database...") 

stopwords = cursor.execute(""" 

SELECT content  

FROM stopwords; 

""").fetchone()[0] 

 

logging.info("Selecting model output from database...") 

model = cursor.execute(""" 

SELECT document_topic, topics, document_similarities, topic_similarities  

FROM model; 

""").fetchone() 

document_topic, topics, document_similarities, topic_similarities = model 

 

logging.info("Preparing document-topic distributions...") 

document_topic = pd.read_json(document_topic, orient="index") 

 

logging.info("Preparing topics...") 

topics = pd.read_json(topics) 

topics.index = ["Topic {}".format(n) for n in topics.index] 

topics.columns = ["Word {}".format(n) for n in topics.columns] 

 

logging.info("Preparing topic similarity matrix...") 

topic_similarities = pd.read_json(topic_similarities) 

 

logging.info("Preparing document similarity matrix...") 

document_similarities = pd.read_json(document_similarities) 

return {"document-topic-distribution": document_topic, 

"topics": topics, 

"topic-similarities": topic_similarities, 

"document-similarities": document_similarities, 

"stopwords": json.loads(stopwords)}