본문 바로가기

Data Analytics

R script 20140129


help(kmeans)


iris

View(iris)

iris.features = iris

iris.features

iris.features$Speicies <- NULL

View(iris.features)


plot(iris)

resutls <- kmeans(iris.features, 3)


results


results$size

result$iter

results$iter

results$totss


results$cluster

head(iris)

table(iris$Species, results$cluster)

plot(iris[c(3,4)], col=results$cluster)

plot(iris[c(1,2)], col=results$cluster)




xvar*zvar/5

-2*xvar + xvzr*zvzr/5

-2*xvar + xvar*zvar/5

plot(aex(xvar))

plot(aes(xvar))

plot(aes(xvar), 1:20)

plot(aes(xvar), 1)

df

cor(df$x, df$y)

cor

cor(df$x, df$y)

cor(df$x, df$z)

cor(df$y, df$z)

cor(df)

round(cor(df), 2)

round(cor(df), 3)

fit <- lm(y ~ x, data=df)

fit <- lm(df$y ~ df$x)

fit

summary(fit)

df

ggplot(df, aes(x=x, y=y))

ggplot(df, aes(x=x, y=y)) +geom_point(shape=1)

ggplot(df, aes(x=x, y=y)) +geom_point(shape=1) +geom_smooth(method=lm)

ggplot(df, aes(x=x, y=z)) +geom_point(shape=1) +geom_smooth(method=lm)

ggplot(df, aes(x=y, y=z)) +geom_point(shape=1) +geom_smooth(method=lm)

set.seed(955)

vvar <- 1:20 + rnorm(20,sd=3)

wvar <- 1:20 + rnorm(20,sd=5)

xvar <- 20:1 + rnorm(20,sd=3)

yvar <- (1:20)/2 + rnorm(20, sd=10)

zvar <- rnorm(20, sd=6)

# A data frame with multiple variables

data <- data.frame(vvar, wvar, xvar, yvar, zvar)

#         vvar       wvar      xvar       yvar         zvar

# -4.252354091  5.1219288 16.021933 -15.156368  -4.08690435

#  1.702317971 -1.3234340 15.838169 -24.063902   3.46842292

#  4.323053753 -2.1570874 19.855167   2.306770  -3.04493067

# ...

# 17.220012698 19.5663648  4.887111  30.906004  -6.83781743

# 17.793359218 20.1976463 -2.880213  15.777335  -1.50437241

# 19.319909163 19.4131664 -5.236230  24.047036  -2.67059907

install.packages("ellipse")

library(ellipse)

# Make the correlation table

ctab <- cor(data)

# round(ctab, 2)

#       vvar  wvar  xvar  yvar  zvar

# vvar  1.00  0.61 -0.85  0.75 -0.21

# wvar  0.61  1.00 -0.81  0.54 -0.31

# xvar -0.85 -0.81  1.00 -0.63  0.24

# yvar  0.75  0.54 -0.63  1.00 -0.30

# zvar -0.21 -0.31  0.24 -0.30  1.00

# Make the graph

plotcorr(ctab)

# Do the same, but with colors corresponding to value

colorfun <- colorRamp(c("#CC0000","white","#3366CC"), space="Lab")

plotcorr(ctab, col=rgb(colorfun((ctab+1)/2), maxColorValue=255))

ctab

library(ellipse)

# Make the correlation table

ctab <- cor(data)

# round(ctab, 2)

#       vvar  wvar  xvar  yvar  zvar

# vvar  1.00  0.61 -0.85  0.75 -0.21

# wvar  0.61  1.00 -0.81  0.54 -0.31

# xvar -0.85 -0.81  1.00 -0.63  0.24

# yvar  0.75  0.54 -0.63  1.00 -0.30

# zvar -0.21 -0.31  0.24 -0.30  1.00

# Make the graph

plotcorr(ctab)

# Do the same, but with colors corresponding to value

colorfun <- colorRamp(c("#CC0000","white","#3366CC"))

plotcorr(ctab, col=rgb(colorfun((ctab+1)/2), maxColorValue=255))

library(ellipse)

# Make the correlation table

ctab <- cor(data)

# round(ctab, 2)

#       vvar  wvar  xvar  yvar  zvar

# vvar  1.00  0.61 -0.85  0.75 -0.21

# wvar  0.61  1.00 -0.81  0.54 -0.31

# xvar -0.85 -0.81  1.00 -0.63  0.24

# yvar  0.75  0.54 -0.63  1.00 -0.30

# zvar -0.21 -0.31  0.24 -0.30  1.00

# Make the graph

plotcorr(ctab)

# Do the same, but with colors corresponding to value

colorfun <- colorRamp(c("#CC0000","white","#3366CC"), space="Lab")

plotcorr(ctab, col=rgb(colorfun((ctab+1)/2), maxColorValue=255))

ggplot(r_master,aes(age))+geom_histogram(binwidth=10)+scale_x_continuous(limits=c(1,70))

ggplot(df,aes(age))+geom_histogram(binwidth=10)+scale_x_continuous(limits=c(1,70))

ggplot(df,aes(xvar))+geom_histogram(binwidth=10)+scale_x_continuous(limits=c(1,70))

savehistory("C:/Users/Administrator/Desktop/r20131222.Rhistory")

mydata <- read.csv("", sep="\t", header=T)

mydata <- read.csv("D:\contest_bigdata\1_movie_data\read_f.txt", sep="\t", header=T)

mydata <- read.csv("D:\\contest_bigdata\\1_movie_data\\read_f.txt", sep="\t", header=T)

mydata

pnorm?

;

pnorm();

pnorm();?

pnorm()?

;

?pnorm()

install.packages("RODBC")

names(mydata)

library(plyr)

sizes<-factor(c("small","large","large","small","medium"))

sizes

levels(sizes)

sizes1<-revalue(sizes, c(small="S",medium="M",large="L"))

sizes1

levels(sizes)[levels(sizes)=="large"]<-"L"

levels(sizes)[2]<-"M"

levels(sizes)<-c("L","M","S")

sizes

data(PlantGrowth)

head(PlantGrowth)

pg<-PlantGrowth[c(1,2,11,21,22),]

pg

oldvals<-c("ctrl","trt1","trt2")

newvals<-factor(c("No","Yes","Yes"))

pg$treatment<-newvals[match(pg$group,oldvals)]

pg

pg$newcol[pg$group=="ctrl" & pg$weight<5]<-"No_Small"

pg

pg$newcol[pg$group=="ctrl" & pg$weight>=5]<-"No_Large"

pg$newcol[pg$group=="trt1"| pg$group== "trt2"]<-"Yes"

pg

pg$grtrt<-interaction(pg$group, pg$treatment)

pg

pg<-PlantGrowth[c(1,2,11,21,22),]

pg

pg$class<-cut(pg$weight, breaks=c(0,5,6,Inf))

pg

pg$class<-cut(pg$weight,breaks=c(0,5,6,Inf),labels=c("small","medium","large"))

pg

pg$class<-cut(pg$weight, breaks=c(0,5,6,Inf), right=F)

pg

library(gcookbook);data(heightweight)

hw<-heightweight

head(hw)

hw$heightCm<-hw$heightIn*2.54

hw<-transform(hw,heightCm=heightIn*2.54,weightKg=weightLb/2.204)

hw<-transform(hw,bmi=weightKg/(heightCm/100)^2)

hw<-mutate(hw,heightCm=heightIn*2.54,weightKg=weightLb/2.204,bmi=weightKg/(heightCm/100)^2 )

library(MASS)

head(cabbages)

cb<-ddply(cabbages,"Cult", transform, DevWt=HeadWt-mean(HeadWt))

cb

cb2<-ddply(cabbages,c("Cult","Date"), transform, DevWt=HeadWt-mean(HeadWt),DevVitC=VitC-mean(VitC))

cb3<-ddply(cabbages,c("Cult","Date"),summarize,Weight_mean=mean(HeadWt),VicC_mean=mean(VitC))

c1<-cabbages

head(c1)

c1$HeadWt[c(1,20,45)]<-NA

ddply(c1, c("Cult","Date"),summarise,wt_mean=mean(HeadWt),sd=sd(HeadWt),n=length(HeadWt))

ddply(c1, c("Cult","Date"),summarise,wt_mean=mean(HeadWt,na.rm=T),sd=sd(HeadWt,na.rm=T),n=sum(!is.na(HeadWt)))

ddply(c1, c("Cult","Date"),summarise,wt_mean=mean(HeadWt,na.rm=T),sd=sd(HeadWt,na.rm=T),n=sum(!is.na(HeadWt)),se=sd/sqrt(n))

* MASS, reshape, plyr 3개 패키지를 활용하여 시각화를 위한 데이터 준비를 완료합니다.

[출처] R 데이터 핸들링_2|작성자 자유인 후

ls.str()

cs

print(pi)

print(sqrt(2))

sqrt(2)

pi*sqrt(2)

pi

matrix(1,2,3,4)

matrix(1,2,3,10)

matrix((1,2,3,4),2,2)

matrix((1,2,3,4),2,2))

matrix([1,2,3,4],2,2))

matrix(c(1,2,3,4),2,2))

print(matrix(c(1,2,3,4),2,2)))

print(matrix(c(1,2,3,4),2,2))

matrix(c(1,2,3,4),2,2)

matrix((1,2,3,4),2,2)

matrix(c(1,2,3,4),2,2)

matrix(c(1,2,3,4),2,4)

matrix(c(1,2,3,4),2,12)

matrix(c(1:12),2,12)

matrix(c(1:242),2,12)

matrix(c(1:24),2,12)

matrix(c(1:25),2,12)

matrix(c(1:23),2,12)

matrix(c(1:24),2,12)

matrix(c("a", "b"),2,12)

matrix(c("a", "b"),10,12)

matrix(c("a":"z"),10,12)

matrix(c("a","b", "c", 3, "z"),10,12)

list("a", "b", "c")

list_01 <- list("a", "b", "c")

list_01

list_01 <- list("a", "b", "c", cat(1:3))

list_01 <- cat(list("a", "b", "c", cat(1:3)))

list_01 <- list("a", "b", "c", cat(1:3))

list_01

list_01 <- list("a", "b", "c", 34)

list_01

list_01 <- list("a", "b", "c", c(1:3))

list_01

list_01[1]

list_01[4]

list_01[4][2]

list_01[4].1

list_01[4].[1]

list_01[4].[1]

list_01[4][1]

list_01[4][1][1]

list_01[4][1][2]

list_01[4][1].1

list_01[4][1]$1

list_02 <- list("a", "b", "c", c(1:3), d("x", "y"))

list_02 <- list("a", "b", "c", c(1:3), ("x", "y"))

list_02 <- list("a", "b", "c", c(1:3))

list_02 <- list("a", "b", "c", c(1:3), "d")

list_02 <- list("a", "b", "c", c(1:3), ("x","y)")

list_02 <- list("a", "b", "c", c(1:3), ("x","y") )

list_02 <- list("a", "b", "c", c(1:3), ("x""y") )

list_02 <- list("a", "b", "c", c(1:3), ("xy") )

list_02

list_02 <- list("a", "b", "c", c(1:3), cat("xy") )

list_02

list_02 <- list("a", "b", "c", c(1:3), ("xy"), matrix(99, 3,4) )

list_02

x <- 3

y < x*4

y <- x*4

z <- sqrt(x^2+y^2)

z

x^2

y^2

list_03 <- list("a", "b", "c", c(1:3), c("jacob", "fly", "to sky"), ("xy"), matrix(99, 3,4) )

list_03

list_02[1]

list_02[7]

list_03[7]

list_03[7][1]

list_03[7][1][1]

list_03[7][1][1][2]

list_03[7][1]

list_03[7][1][1,1]

list_03[7][1][3,4]

ls.str()

x <- c(0,1,1,2,3,34,5,546,4,735,67,458,3,56,45,64,56)

x

y <- log(x+1)

getwd()

y

log(0)

log(1)

log(2)

log(3)

log(4)

log(5)

log(6)

log(100)

log(10)

cor(x,y)

cov(x,y)

log(1000)

version

c(1:25)

mtcars

mtcars[["mpg"]]

mtcars[["mpg", "hp"]]

mtcars[[c("mpg", "hp")]]

mtcars[[c("mpg", "hp")]

;

mtcars[c("mpg", "hp")]

mtcars[c("mpg", "hp", "mpg"*"hp")]

mtcars[c("mpg", "hp")]

mtcars[ mtcars$mpg > 21, ]

install.packages("XML")

library(XML)

url<-"http://en.wikipedia.org/wiki/World_population"

url

tbl <- readHTMLTable(url, which=3)

head(tbl)

tbl

tbl

ggplot(tbl)

iris

head(iris)

nrow(iris)

ncol(iris)

nrow(iris);ncol(iris)

list.files()

data(iris)

iris

head(iris)

write.csv(iris, file="newiris.csv", row.name=FALSE)

write.csv(iris, file="newiris.csv", row.names=FALSE)

list.files()

getwd()

setwd("D:\\jacob\\R_working_dir")

getwd()

list.files()

write.csv(iris, file="newiris.csv", row.names=FALSE)

list.files()

newiris

newiris <- read.csv("newiris.csv")

newiris

newiris2 <- read.table("newiris.csv", sep=",", header=TRUE)

newiris2

write.csv(newiris2, file="newiris2.csv", row.names=FALSE)

write.csv(newiris2, file="newiris2.csv", row.names=TRUE)

#row.names=TRUE 인 경우 row 의 Numbering 값을 보여줌.

#it's like rownum() in oracle

summary(newiris2)

summary(newiris2[[2]])

newiris2[[2]]

newiris2[[1,]]

newiris2[[2,]]

newiris2[[2,1]]

newiris2[[1]]

newiris2[[2]]

newiris2[[3]]

newiris2[[4]]

newiris2[[5]]

summary(newiris2[[1]])

summary(newiris2[[2]])

summary(newiris2[[3]])

summary(newiris2[[5]])

cars

head(cars)

plot(cars)

plot(x, main="The Jacob", xlab="X-axis Label", yLab="Y-axis Label")

plot(x, main="The Jacob", xlab="X-axis Label", ylab="Y-axis Label")

warnings()

plot(x, main="The plotting x * y by Jacob", xlab="X-axis Label", ylab="Y-axis Label")

x

plot(cars, main="The plotting x * y by Jacob", xlab="X-axis Label", ylab="Y-axis Label")

plot(cars,

main="The plotting x * y by Jacob",

xlab="X-axis Label",

ylab="Y-axis Label")

plot(cars,

main="cars: Speed vs. Stopping Distance (1920) by Jacob",

xlab="Speed (MPH)",

ylab="Stopping Distance (ft)")

plot(cars,

main="cars: Speed vs. Stopping Distance (1920)",

xlab="Speed (MPH)",

ylab="Stopping Distance (ft)")

plot(cars,

main="cars: Speed vs. Stopping Distance (1920)",

xlab="Speed (MPH)",

ylab="Stopping Distance (ft)"

type="n")

plot(x,y, type="n")

grid()

points(x,y)

plot(cars,

main="cars: Speed vs. Stopping Distance (1920)",

xlab="Speed (MPH)",

ylab="Stopping Distance (ft)",

type="n")

grid()

points(cars)

plot(cars,

main="cars: Speed vs. Stopping Distance (1920)",

xlab="Speed (MPH)",

ylab="Stopping Distance (ft)",

type="n")

plot(USArrests,

main="cars: Speed vs. Stopping Distance (1920)",

xlab="Speed (MPH)",

ylab="Stopping Distance (ft)",

type="n")

plot(iris,

main="cars: Speed vs. Stopping Distance (1920)",

xlab="Speed (MPH)",

ylab="Stopping Distance (ft)",

type="n")

plot(iris,

main="cars: Speed vs. Stopping Distance (1920)",

xlab="Speed (MPH)",

ylab="Stopping Distance (ft)"

)

plot(cars,

main="cars: Speed vs. Stopping Distance (1920)",

xlab="Speed (MPH)",

ylab="Stopping Distance (ft)",

type="n")

grid()

points()

points(cars)

plot(x,y, pch=as.integer(f))

with(iris, plot(Petal.Length, Petal.Width))

head(iris)

with(iris, plot(Petal.Length, Petal.Width), pch=as.integer(Species))

iris

with(iris, plot(Petal.Length, Petal.Width), pch=as.integer(Species))

with(iris, plot(Petal.Length, Petal.Width), pch=as.integer(Species))

with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species))

with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species)))

with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species)))

with(iris, plot(Petal.Length, Petal.Width, Species)

dd

legend(1.5, 2.4, c("setosa", "versicolor", "virginica"))

legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), 1:3)

legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), pch=1:3)

legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), 1:3)

legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), pch=1:3)

legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), pch=1:3)

with(iris, plot(Petal.Length, Petal.Width, Species)

with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species)))

with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species)))

legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), pch=1:3)

legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), 1:3)

clear

clear()

with(iris, plot(Petal.Length, Petal.Width, Species))

with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species))

)

legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), 1:3)

with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species))

with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species)))

legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), pch=1:3)

f <- factor(iris$Species)

f

with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(f)))

legend(1.5, 2.4, as.character(levels(f)), pch=a:length(levels(f)))

legend(1.5, 2.4, as.character(levels(f)), pch=1:length(levels(f)))

legend(1.0, 2.4, as.character(levels(f)), pch=1:length(levels(f)))

legend(1.3, 2.5, as.character(levels(f)), pch=1:length(levels(f)))

legend(0.5, 95, c("Estimate", "Lower conf lim", "Upper conf lim"),

lty=c("solid"), "dashed", "dotted")

legend(0.5, 95, c("Estimate", "Lower conf lim", "Upper conf lim"),

lty=c("solid", "dashed", "dotted")

)

legend(0.5, 95, c("Estimate", "Lower conf lim", "Upper conf lim"),

lty=c("solid", "dashed", "dotted"))

legend(1.5, 2.4, c("Estimate", "Lower conf lim", "Upper conf lim"),

lty=c("solid", "dashed", "dotted"))

plot(c(1968,2010), c(0,10), type="n", xlab="Year", ylab="Expenditures/GDP(%)")

linessdf

plot(c(1968,2010), c(0,10), type="n", xlab="Year", ylab="Expenditures/GDP(%)")

lines(year, defnese, col="red", lwd=2.5)

lines(year, defense, col="red", lwd=2.5)

plot(c(1968,2010), c(0,10), type="n",

xlab="Year", ylab="Expenditures/GDP(%)")

lines(year, defense, col="red", lwd=2.5)

year

defense

lines(iris, defense, col="red", lwd=2.5)

lines(iris, Petal.Length, col="red", lwd=2.5)

m <- lm(y ~ x)

clear plot

clearplot()

clear()

m <- lm( y ~ x)

plot( y ~ x)

plot( x ~ y)

plot( y ~ x)

abline(m)

plot(m)

plot(m)

library(faraway)

install.packages(faraway)

install.packages("faraway")

library(faraway)

strongx

data(strongx)

str(strongx)

m <- lm(crossx ~ energy, data=strongx)

m <- lm(crossx ~ energy, data=strongx)

data(strongx)

m <- lm(crossx ~ energy, data=strongx)

m

plot(crossx ~ energy, data=strongx)

abline(m)

plot(crossx ~ energy, data=strongx)

abline(m)


'Data Analytics' 카테고리의 다른 글

프로세싱, 날개를 달다. [Learning Processing]  (0) 2014.02.02
why analyze data ?  (0) 2014.01.30
kmeans in R  (0) 2014.01.30
plot( cars, main ="", xlab="", ylab="", type="n")  (0) 2014.01.30
http://www.r-tutor.com/  (0) 2014.01.29