forked from mrchypark/dabrp_classnote
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcodeForclass4.R
More file actions
190 lines (148 loc) · 5.86 KB
/
codeForclass4.R
File metadata and controls
190 lines (148 loc) · 5.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
if (!require("data.table")) {install.packages("data.table") }
if (!require("DBI")){devtools::install_github("rstats-db/DBI")}
if (!require("RMySQL")){devtools::install_github("rstats-db/RMySQL")}
library(RMySQL)
# set data frame options
options(stringsAsFactors = F)
dir.create("./bankData")
# download bank data named test 105MB
test<-"http://rcoholic.duckdns.org/oc/index.php/s/3YQuCOH39HbI1nW/download"
download.file(test,destfile="./bankData/test_ver2.csv",mode='wb')
# download bank data named train 2.1GB
train<-"http://rcoholic.duckdns.org/oc/index.php/s/0ow1ZJId93uSgfc/download"
download.file(train,destfile="./bankData/train_ver2.csv",mode='wb')
# connect mysql
con <- dbConnect(RMySQL::MySQL(), dbname = "bank", user = "root",password="xxx")
# check number of connections
dbGetQuery(con, "show processlist")
# check encoding setting
dbGetQuery(con, "show variables like 'character_set_%'")
# if not like below, please try next line.
# this set is for windows 10
# mac might be not necessary.
# Variable_name Value
# 1 character_set_client utf8
# 2 character_set_connection utf8
# 3 character_set_database utf8
# 4 character_set_filesystem binary
# 5 character_set_results utf8
# 6 character_set_server latin1
# 7 character_set_system utf8
# 8 character_sets_dir C:\\Program Files\\MariaDB 10.1\\share\\charsets\\
dbSendQuery(con, 'set character set utf8')
# check again.
dbGetQuery(con, "show variables like 'character_set_%'")
# if mac user, try next line for encoding setting.
Sys.setlocale("LC_ALL","ko_KR.UTF-8")
# check db tables
dbListTables(con)
# write table to db
dbWriteTable(con, "mtcars", mtcars, overwrite=T)
dbListTables(con)
# get table data
dbReadTable(con, "mtcars")
# remove
dbRemoveTable(con,"mtcars")
dbListTables(con)
# you can write table from file directly.
system.time(dbWriteTable(con, "train", "./bankData/train_ver2.csv",row.names=F))
# 사용자 시스템 elapsed
# 0.68 7.36 391.80
# check table row number approximately. It's fast.
dbGetQuery(con,"select TABLE_ROWS from information_schema.tables where table_name = 'train'")
# check table row number correctly. It's super slow.
dbGetQuery(con,"select count(*) from train")
# dbSendQuery function just send query.
query <- dbSendQuery(con, "select * from train limit 10")
query
dbColumnInfo(query)
dbGetStatement(query)
dbGetRowCount(query)
dbHasCompleted(query)
train <- dbFetch(query)
query
dbColumnInfo(query)
dbGetStatement(query)
dbGetRowCount(query)
dbHasCompleted(query)
dim(train)
head(train)
dbClearResult(query)
query
dbColumnInfo(query)
dbGetStatement(query)
dbGetRowCount(query)
dbHasCompleted(query)
dim(train)
head(train)
rm(train)
query <- dbGetQuery(con, "select * from train where nomprov='MALAGA'")
dim(query)
head(query)
str(query)
summary(query)
rm(query)
dbDisconnect(con)
library(readr)
library(data.table)
## preprocessing data
# recomt<-"http://rcoholic.duckdns.org/oc/index.php/s/fk6gISGj9wKLUbq/download"
# download.file(recomt,destfile="./recom/transection.txt",mode='wb')
#
# chennel<-read_csv("./recom/chennel.txt")
# competitor<-read_csv("./recom/competitor.txt")
# customer<-read_csv("./recom/customer.txt")
# item<-read_csv("./recom/item.txt")
# membership<-read_csv("./recom/membership.txt")
# tran<-fread("./recom/transection.txt")
#
# names(chennel)<-c("cusID","partner","useCnt")
# names(competitor)<-c("cusID","partner","competitor","useDate")
# names(customer)<-c("cusID","sex","age","area")
# names(item)<-c("partner","cate_1","cate_2",'cate_3',
# "cate_2_name","cate_3_name")
# names(membership)<-c("cusID","memberShip","regDate")
# names(tran)<-c("partner","receiptNum","cate_1","cate_2","cate_3",
# "cusID","storeCode","date","time","amount")
#
# write_csv(chennel,"./recomen/chennel.csv")
# write_csv(competitor,"./recomen/competitor.csv")
# write_csv(customer,"./recomen/customer.csv")
# write_csv(item,"./recomen/item.csv")
# write_csv(membership,"./recomen/membership.csv")
# fwrite(tran,"./recomen/tran.csv")
# download processed data named tran 1.4GB
recoment<-"http://rcoholic.duckdns.org/oc/index.php/s/jISrPutj4ocLci2/download"
download.file(recoment,destfile="./recomen/tran.csv",mode='wb')
con <- dbConnect(RMySQL::MySQL(), dbname = "bank", user = "root")
dbListTables(con)
dbWriteTable(con, "chennel", "./recomen/chennel.csv",row.names=FALSE,overwrite=TRUE)
dbWriteTable(con, "competitor", "./recomen/competitor.csv",row.names=FALSE,overwrite=TRUE)
dbWriteTable(con, "customer", "./recomen/customer.csv",row.names=FALSE,overwrite=TRUE)
dbWriteTable(con, "item","./recomen/item.csv",row.names=FALSE,overwrite=TRUE)
dbWriteTable(con, "membership","./recomen/membership.csv",row.names=FALSE,overwrite=TRUE)
dbWriteTable(con, "tran","./recomen/tran.csv",row.names=FALSE,overwrite=TRUE)
dbListTables(con)
dbGetQuery(con, "select * from chennel limit 10")
dbGetQuery(con, "select * from competitor limit 10")
dbGetQuery(con, "select * from customer limit 10")
dbGetQuery(con, "select * from item limit 10")
dbGetQuery(con, "select * from membership limit 10")
dbGetQuery(con, "select * from tran limit 10")
# inner join
query <- dbGetQuery(con, "select * from membership as a
inner join customer as b on
a.cusID = b.cusID")
join1 <-query
dim(join1)
str(join1)
summary(join1)
# left join (please compare between join1 and join2)
query <- dbSendQuery(con, "select * from customer as a
left join membership as b on
a.cusID = b.cusID")
join2 <- dbFetch(query)
dim(join2)
str(join2)
summary(join2)
dbDisconnect(con)