refine_original<-tbl_df(read.csv(file.choose(),header=T))
co<-refine_original$company
co<-tolower(co)
co<-gsub('.lip.$','philips',co)
co<-gsub('phillps','philips',co)
co<-gsub('^ak.*','akzo',co)
co<-gsub('^van.*','van houten',co)
co<-gsub('^unil.*','unilever',co)
refine1<-cbind(co,refine_original)
refine1$company<-NULL
colnames(refine1)[1]<-‘company’
refine2<-refine1 %>% separate(Product.code...number,c('product_code','product_number'),sep='-')
pcode<-refine2$product_code
pcat<-ifelse(pcode=='p','Smartphone',ifelse(pcode=='v','TV',ifelse(pcode=='x','Laptop','Tablet')))
refine3<-cbind(refine2,product_cat=pcat)
refine4<-refine3 %>% unite('full_address',address,city,country,sep=', ')
company_philips<-ifelse(co=='philips',1,0)
company_akzo<-ifelse(co=='akzo',1,0)
company_van_houten<-ifelse(co=='van houten',1,0)
company_unilever<-ifelse(co=='unilever',1,0)
refine5<-cbind(refine4,company_philips,company_akzo,company_van_houten,company_unilever)
sum(company_akzo,company_philips,company_unilever,company_van_houten)
product_smartphone<-ifelse(pcat=='Smartphone',1,0)
product_tv<-ifelse(pcat=='TV',1,0)
product_laptop<-ifelse(pcat=='Laptop',1,0)
product_tablet<-ifelse(pcat=='Tablet',1,0)
refine6<-cbind(refine5,product_smartphone,product_tv,product_laptop,product_tablet)