library(cluster) #get csv file #survey<-survey[,2:11] items <- c("id","informed","info_trust","certification_trust","region","fair_price","small_diverse","agbio_conserv","price","reliability","taste") colnames(survey) <- items # store labels in separate variable and exclude id from dataset to do clustering id<-survey$id survey$id<-NULL #gower's distance, ward method, 5 clusters data(survey) d.sur <- daisy(survey, metric = "gower", stand = FALSE) d.sur surveyhclust<-hclust(d.sur, method="ward") plot(surveyhclust, main="Default from hclust") rect.hclust(surveyhclust,5) groups.5<-cutree(surveyhclust,5) sapply(unique(groups.5),function(g)id[groups.5==g]) a5=aggregate(survey,list(groups.5),FUN=mean) x<-data.frame(Cluster=a5[,1], Freq=as.vector(table(groups.5)),a5[,-1]) x write.table(x,file="gower ward five clus.csv", sep=",") #then export cluster and id info to match up with survey responses survey_2 <- data.frame(survey,groups.5,id) write.table(survey_2, file="survey with clusters.csv",sep=";") clusplot(survey, groups.5, color=TRUE, shade=TRUE, labels=2, lines=0, main= 'Customer groups') #then relationship between clusters and socioeconomic data library(psych) data<-read.csv(file location and name here) cluster<-data[,2] informed<-data[,3] info_trust<-data[,4] cert<-data[,5] region<-data[,6] fair<-data[,7] smalldiverse<-data[,8] conserv<-data[,9] price<-data[,10] convenience<-data[,11] taste<-data[,12] freq_eggs<-data[,13] where_eggs<-data[,14] freq_meat<-data[,15] where_meat<-data[,16] gender<-data[,17] age<-data[,18] household<-data[,19] mytable_freqegg <- table(cluster,freq_eggs) mytable_freqegg margin.table(mytable_freqegg, 1) # A frequencies (summed over B) margin.table(mytable_freqegg, 2) # B frequencies (summed over A) prop.table(mytable_freqegg, 1) # row percentages chisq.test(mytable_freqegg) mytable_whereegg <- table(cluster,where_eggs) mytable_whereegg margin.table(mytable_whereegg, 1) # A frequencies (summed over B) margin.table(mytable_whereegg, 2) # B frequencies (summed over A) prop.table(mytable_whereegg, 1) # row percentages chisq.test(mytable_whereegg) mytable_freqmeat <- table(cluster,freq_meat) mytable_freqmeat margin.table(mytable_freqmeat, 1) # A frequencies (summed over B) margin.table(mytable_freqmeat, 2) # B frequencies (summed over A) prop.table(mytable_freqmeat, 1) # row percentages chisq.test(mytable_freqmeat) mytable_wheremeat <- table(cluster,where_meat) mytable_wheremeat margin.table(mytable_wheremeat, 1) # A frequencies (summed over B) margin.table(mytable_wheremeat, 2) # B frequencies (summed over A) prop.table(mytable_wheremeat, 1) # row percentages chisq.test(mytable_wheremeat) mytable_gender <- table(cluster,gender) mytable_gender margin.table(mytable_gender, 1) # A frequencies (summed over B) margin.table(mytable_gender, 2) # B frequencies (summed over A) prop.table(mytable_gender, 1) # row percentages chisq.test(mytable_gender) mytable_age <- table(cluster,age) mytable_age margin.table(mytable_age, 1) # A frequencies (summed over B) margin.table(mytable_age, 2) # B frequencies (summed over A) prop.table(mytable_age, 1) # row percentages chisq.test(mytable_age) mytable_household <- table(cluster,household) mytable_household margin.table(mytable_household, 1) # A frequencies (summed over B) margin.table(mytable_household, 2) # B frequencies (summed over A) prop.table(mytable_household, 1) # row percentages chisq.test(mytable_household) mytable_edu <- table(cluster,edu) mytable_edu margin.table(mytable_edu, 1) # A frequencies (summed over B) margin.table(mytable_edu, 2) # B frequencies (summed over A) prop.table(mytable_edu, 1) # row percentages chisq.test(mytable_edu) mytable_income <- table(cluster,income) mytable_income margin.table(mytable_income, 1) # A frequencies (summed over B) margin.table(mytable_income, 2) # B frequencies (summed over A) prop.table(mytable_income, 1) # row percentages chisq.test(mytable_income)