Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

import json 

import sqlite3 

 

import flask 

import pandas as pd 

 

import utils 

 

 

def get_db(): 

"""Create connection to SQLite database. 

""" 

if "db" not in flask.g: 

flask.g.db = sqlite3.connect(str(utils.DATABASE)) 

return flask.g.db 

 

 

def close_db(e=None): 

"""Close connection to SQLite database. 

""" 

db = flask.g.pop("db", None) 

if db is not None: 

db.close() 

 

 

def _insert_into_textfiles(db, data): 

"""Insert data into textfiles table. 

""" 

for textfile in data: 

title, content = utils.load_textfile(textfile) 

db.execute(""" 

INSERT INTO textfiles (title, content)  

VALUES(?, ?); 

""", [title, content]) 

 

 

def insert_into(table, data): 

"""Insert data into database. 

""" 

db = get_db() 

if table in {"textfiles"}: 

_insert_into_textfiles(db, data) 

elif table in {"stopwords"}: 

_insert_into_stopwords(db, data) 

elif table in {"model"}: 

_insert_into_model(db, data) 

db.commit() 

close_db() 

 

 

def _insert_into_model(db, data): 

"""Insert data into model table. 

""" 

db.execute(""" 

INSERT INTO model (document_topic, topics, document_similarities, topic_similarities) 

VALUES(?, ?, ?, ?); 

""", 

[data["document_topic"], data["topics"], 

data["document_similarities"], data["topic_similarities"]]) 

 

 

def _insert_into_stopwords(db, data): 

"""Insert data into stopwords table. 

""" 

db.execute(""" 

INSERT INTO stopwords (content) 

VALUES(?); 

""", 

[data]) 

 

 

def select(value, **kwargs): 

"""Select values from database. 

""" 

db = get_db() 

cursor = db.cursor() 

if value in {"textfiles"}: 

return _select_textfiles(cursor) 

elif value in {"titles"}: 

return _select_titles(cursor) 

elif value in {"document-topic"}: 

return _select_document_topic(cursor) 

elif value in {"topic-overview"}: 

return _select_topic_overview(cursor) 

elif value in {"document-overview"}: 

return _select_document_overview(cursor, **kwargs) 

elif value in {"model-output"}: 

return _select_model_output(cursor) 

 

 

def _select_titles(cursor): 

"""Select textfile titles from database. 

""" 

cursor.execute(""" 

SELECT title  

FROM textfiles; 

""") 

return [title[0] for title in cursor.fetchall()] 

 

 

def _select_model_output(cursor): 

"""Select model output from database. 

""" 

stopwords = cursor.execute(""" 

SELECT content  

FROM stopwords; 

""").fetchone() 

 

response = cursor.execute(""" 

SELECT document_topic, topics, document_similarities, topic_similarities  

FROM model; 

""").fetchone() 

document_topic, topics, document_similarities, topic_similarities = response 

document_topic = pd.read_json(document_topic) 

document_topic.columns = [col.replace(",", "") for col in document_topic.columns] 

document_topic.index = [ix.replace(",", "") for ix in document_topic.index] 

 

topics = pd.read_json(topics) 

topics.index = ["Topic {}".format(n) for n in topics.index] 

topics.columns = ["Word {}".format(n) for n in topics.columns] 

 

topic_similarities = pd.read_json(topic_similarities) 

topic_similarities.columns = [col.replace(",", "") for col in topic_similarities.columns] 

topic_similarities.index = [ix.replace(",", "") for ix in topic_similarities.index] 

 

document_similarities = pd.read_json(document_similarities) 

document_similarities.columns = [col.replace(",", "") for col in document_similarities.columns] 

document_similarities.index = [ix.replace(",", "") for ix in document_similarities.index] 

return {"document-topic-distribution": document_topic, 

"topics": topics, 

"topic-similarites": topic_similarities, 

"document-similarites": document_similarities, 

"stopwords": json.loads(stopwords[0])} 

 

 

def _select_textfiles(cursor): 

"""Select textfiles from database. 

""" 

cursor.execute(""" 

SELECT title, content  

FROM textfiles; 

""") 

return cursor.fetchall() 

 

 

def _select_document_topic(cursor): 

"""Select document-topic matrix form database. 

""" 

response = cursor.execute(""" 

SELECT document_topic  

FROM model; 

""").fetchone()[0] 

return pd.read_json(response) 

 

 

def _select_topic_overview(cursor): 

"""Select values for the topic overview page. 

""" 

response = cursor.execute(""" 

SELECT document_topic, topics, topic_similarities  

FROM model; 

""").fetchone() 

document_topic, topics, topic_similarities = response 

return pd.read_json(document_topic), json.loads(topics), pd.read_json(topic_similarities) 

 

 

def _select_document_overview(cursor, title): 

"""Select values for the document overview page. 

""" 

response = cursor.execute(""" 

SELECT document_topic, topics, document_similarities  

FROM model; 

""").fetchone() 

document_topic, topics, document_similarities = response 

text = cursor.execute(""" 

SELECT content  

FROM textfiles 

WHERE title is ?; 

""", [title]).fetchone()[0] 

return text, pd.read_json(document_topic).T, json.loads(topics), pd.read_json(document_similarities)