help(kmeans)
iris
View(iris)
iris.features = iris
iris.features
iris.features$Speicies <- NULL
View(iris.features)
plot(iris)
resutls <- kmeans(iris.features, 3)
results
results$size
result$iter
results$iter
results$totss
results$cluster
head(iris)
table(iris$Species, results$cluster)
plot(iris[c(3,4)], col=results$cluster)
plot(iris[c(1,2)], col=results$cluster)
xvar*zvar/5
-2*xvar + xvzr*zvzr/5
-2*xvar + xvar*zvar/5
plot(aex(xvar))
plot(aes(xvar))
plot(aes(xvar), 1:20)
plot(aes(xvar), 1)
df
cor(df$x, df$y)
cor
cor(df$x, df$y)
cor(df$x, df$z)
cor(df$y, df$z)
cor(df)
round(cor(df), 2)
round(cor(df), 3)
fit <- lm(y ~ x, data=df)
fit <- lm(df$y ~ df$x)
fit
summary(fit)
df
ggplot(df, aes(x=x, y=y))
ggplot(df, aes(x=x, y=y)) +geom_point(shape=1)
ggplot(df, aes(x=x, y=y)) +geom_point(shape=1) +geom_smooth(method=lm)
ggplot(df, aes(x=x, y=z)) +geom_point(shape=1) +geom_smooth(method=lm)
ggplot(df, aes(x=y, y=z)) +geom_point(shape=1) +geom_smooth(method=lm)
set.seed(955)
vvar <- 1:20 + rnorm(20,sd=3)
wvar <- 1:20 + rnorm(20,sd=5)
xvar <- 20:1 + rnorm(20,sd=3)
yvar <- (1:20)/2 + rnorm(20, sd=10)
zvar <- rnorm(20, sd=6)
# A data frame with multiple variables
data <- data.frame(vvar, wvar, xvar, yvar, zvar)
# vvar wvar xvar yvar zvar
# -4.252354091 5.1219288 16.021933 -15.156368 -4.08690435
# 1.702317971 -1.3234340 15.838169 -24.063902 3.46842292
# 4.323053753 -2.1570874 19.855167 2.306770 -3.04493067
# ...
# 17.220012698 19.5663648 4.887111 30.906004 -6.83781743
# 17.793359218 20.1976463 -2.880213 15.777335 -1.50437241
# 19.319909163 19.4131664 -5.236230 24.047036 -2.67059907
install.packages("ellipse")
library(ellipse)
# Make the correlation table
ctab <- cor(data)
# round(ctab, 2)
# vvar wvar xvar yvar zvar
# vvar 1.00 0.61 -0.85 0.75 -0.21
# wvar 0.61 1.00 -0.81 0.54 -0.31
# xvar -0.85 -0.81 1.00 -0.63 0.24
# yvar 0.75 0.54 -0.63 1.00 -0.30
# zvar -0.21 -0.31 0.24 -0.30 1.00
# Make the graph
plotcorr(ctab)
# Do the same, but with colors corresponding to value
colorfun <- colorRamp(c("#CC0000","white","#3366CC"), space="Lab")
plotcorr(ctab, col=rgb(colorfun((ctab+1)/2), maxColorValue=255))
ctab
library(ellipse)
# Make the correlation table
ctab <- cor(data)
# round(ctab, 2)
# vvar wvar xvar yvar zvar
# vvar 1.00 0.61 -0.85 0.75 -0.21
# wvar 0.61 1.00 -0.81 0.54 -0.31
# xvar -0.85 -0.81 1.00 -0.63 0.24
# yvar 0.75 0.54 -0.63 1.00 -0.30
# zvar -0.21 -0.31 0.24 -0.30 1.00
# Make the graph
plotcorr(ctab)
# Do the same, but with colors corresponding to value
colorfun <- colorRamp(c("#CC0000","white","#3366CC"))
plotcorr(ctab, col=rgb(colorfun((ctab+1)/2), maxColorValue=255))
library(ellipse)
# Make the correlation table
ctab <- cor(data)
# round(ctab, 2)
# vvar wvar xvar yvar zvar
# vvar 1.00 0.61 -0.85 0.75 -0.21
# wvar 0.61 1.00 -0.81 0.54 -0.31
# xvar -0.85 -0.81 1.00 -0.63 0.24
# yvar 0.75 0.54 -0.63 1.00 -0.30
# zvar -0.21 -0.31 0.24 -0.30 1.00
# Make the graph
plotcorr(ctab)
# Do the same, but with colors corresponding to value
colorfun <- colorRamp(c("#CC0000","white","#3366CC"), space="Lab")
plotcorr(ctab, col=rgb(colorfun((ctab+1)/2), maxColorValue=255))
ggplot(r_master,aes(age))+geom_histogram(binwidth=10)+scale_x_continuous(limits=c(1,70))
ggplot(df,aes(age))+geom_histogram(binwidth=10)+scale_x_continuous(limits=c(1,70))
ggplot(df,aes(xvar))+geom_histogram(binwidth=10)+scale_x_continuous(limits=c(1,70))
savehistory("C:/Users/Administrator/Desktop/r20131222.Rhistory")
mydata <- read.csv("", sep="\t", header=T)
mydata <- read.csv("D:\contest_bigdata\1_movie_data\read_f.txt", sep="\t", header=T)
mydata <- read.csv("D:\\contest_bigdata\\1_movie_data\\read_f.txt", sep="\t", header=T)
mydata
pnorm?
;
pnorm();
pnorm();?
pnorm()?
;
?pnorm()
install.packages("RODBC")
names(mydata)
library(plyr)
sizes<-factor(c("small","large","large","small","medium"))
sizes
levels(sizes)
sizes1<-revalue(sizes, c(small="S",medium="M",large="L"))
sizes1
levels(sizes)[levels(sizes)=="large"]<-"L"
levels(sizes)[2]<-"M"
levels(sizes)<-c("L","M","S")
sizes
data(PlantGrowth)
head(PlantGrowth)
pg<-PlantGrowth[c(1,2,11,21,22),]
pg
oldvals<-c("ctrl","trt1","trt2")
newvals<-factor(c("No","Yes","Yes"))
pg$treatment<-newvals[match(pg$group,oldvals)]
pg
pg$newcol[pg$group=="ctrl" & pg$weight<5]<-"No_Small"
pg
pg$newcol[pg$group=="ctrl" & pg$weight>=5]<-"No_Large"
pg$newcol[pg$group=="trt1"| pg$group== "trt2"]<-"Yes"
pg
pg$grtrt<-interaction(pg$group, pg$treatment)
pg
pg<-PlantGrowth[c(1,2,11,21,22),]
pg
pg$class<-cut(pg$weight, breaks=c(0,5,6,Inf))
pg
pg$class<-cut(pg$weight,breaks=c(0,5,6,Inf),labels=c("small","medium","large"))
pg
pg$class<-cut(pg$weight, breaks=c(0,5,6,Inf), right=F)
pg
library(gcookbook);data(heightweight)
hw<-heightweight
head(hw)
hw$heightCm<-hw$heightIn*2.54
hw<-transform(hw,heightCm=heightIn*2.54,weightKg=weightLb/2.204)
hw<-transform(hw,bmi=weightKg/(heightCm/100)^2)
hw<-mutate(hw,heightCm=heightIn*2.54,weightKg=weightLb/2.204,bmi=weightKg/(heightCm/100)^2 )
library(MASS)
head(cabbages)
cb<-ddply(cabbages,"Cult", transform, DevWt=HeadWt-mean(HeadWt))
cb
cb2<-ddply(cabbages,c("Cult","Date"), transform, DevWt=HeadWt-mean(HeadWt),DevVitC=VitC-mean(VitC))
cb3<-ddply(cabbages,c("Cult","Date"),summarize,Weight_mean=mean(HeadWt),VicC_mean=mean(VitC))
c1<-cabbages
head(c1)
c1$HeadWt[c(1,20,45)]<-NA
ddply(c1, c("Cult","Date"),summarise,wt_mean=mean(HeadWt),sd=sd(HeadWt),n=length(HeadWt))
ddply(c1, c("Cult","Date"),summarise,wt_mean=mean(HeadWt,na.rm=T),sd=sd(HeadWt,na.rm=T),n=sum(!is.na(HeadWt)))
ddply(c1, c("Cult","Date"),summarise,wt_mean=mean(HeadWt,na.rm=T),sd=sd(HeadWt,na.rm=T),n=sum(!is.na(HeadWt)),se=sd/sqrt(n))
* MASS, reshape, plyr 3개 패키지를 활용하여 시각화를 위한 데이터 준비를 완료합니다.
[출처] R 데이터 핸들링_2|작성자 자유인 후
ls.str()
cs
print(pi)
print(sqrt(2))
sqrt(2)
pi*sqrt(2)
pi
matrix(1,2,3,4)
matrix(1,2,3,10)
matrix((1,2,3,4),2,2)
matrix((1,2,3,4),2,2))
matrix([1,2,3,4],2,2))
matrix(c(1,2,3,4),2,2))
print(matrix(c(1,2,3,4),2,2)))
print(matrix(c(1,2,3,4),2,2))
matrix(c(1,2,3,4),2,2)
matrix((1,2,3,4),2,2)
matrix(c(1,2,3,4),2,2)
matrix(c(1,2,3,4),2,4)
matrix(c(1,2,3,4),2,12)
matrix(c(1:12),2,12)
matrix(c(1:242),2,12)
matrix(c(1:24),2,12)
matrix(c(1:25),2,12)
matrix(c(1:23),2,12)
matrix(c(1:24),2,12)
matrix(c("a", "b"),2,12)
matrix(c("a", "b"),10,12)
matrix(c("a":"z"),10,12)
matrix(c("a","b", "c", 3, "z"),10,12)
list("a", "b", "c")
list_01 <- list("a", "b", "c")
list_01
list_01 <- list("a", "b", "c", cat(1:3))
list_01 <- cat(list("a", "b", "c", cat(1:3)))
list_01 <- list("a", "b", "c", cat(1:3))
list_01
list_01 <- list("a", "b", "c", 34)
list_01
list_01 <- list("a", "b", "c", c(1:3))
list_01
list_01[1]
list_01[4]
list_01[4][2]
list_01[4].1
list_01[4].[1]
list_01[4].[1]
list_01[4][1]
list_01[4][1][1]
list_01[4][1][2]
list_01[4][1].1
list_01[4][1]$1
list_02 <- list("a", "b", "c", c(1:3), d("x", "y"))
list_02 <- list("a", "b", "c", c(1:3), ("x", "y"))
list_02 <- list("a", "b", "c", c(1:3))
list_02 <- list("a", "b", "c", c(1:3), "d")
list_02 <- list("a", "b", "c", c(1:3), ("x","y)")
list_02 <- list("a", "b", "c", c(1:3), ("x","y") )
list_02 <- list("a", "b", "c", c(1:3), ("x""y") )
list_02 <- list("a", "b", "c", c(1:3), ("xy") )
list_02
list_02 <- list("a", "b", "c", c(1:3), cat("xy") )
list_02
list_02 <- list("a", "b", "c", c(1:3), ("xy"), matrix(99, 3,4) )
list_02
x <- 3
y < x*4
y <- x*4
z <- sqrt(x^2+y^2)
z
x^2
y^2
list_03 <- list("a", "b", "c", c(1:3), c("jacob", "fly", "to sky"), ("xy"), matrix(99, 3,4) )
list_03
list_02[1]
list_02[7]
list_03[7]
list_03[7][1]
list_03[7][1][1]
list_03[7][1][1][2]
list_03[7][1]
list_03[7][1][1,1]
list_03[7][1][3,4]
ls.str()
x <- c(0,1,1,2,3,34,5,546,4,735,67,458,3,56,45,64,56)
x
y <- log(x+1)
getwd()
y
log(0)
log(1)
log(2)
log(3)
log(4)
log(5)
log(6)
log(100)
log(10)
cor(x,y)
cov(x,y)
log(1000)
version
c(1:25)
mtcars
mtcars[["mpg"]]
mtcars[["mpg", "hp"]]
mtcars[[c("mpg", "hp")]]
mtcars[[c("mpg", "hp")]
;
mtcars[c("mpg", "hp")]
mtcars[c("mpg", "hp", "mpg"*"hp")]
mtcars[c("mpg", "hp")]
mtcars[ mtcars$mpg > 21, ]
install.packages("XML")
library(XML)
url<-"http://en.wikipedia.org/wiki/World_population"
url
tbl <- readHTMLTable(url, which=3)
head(tbl)
tbl
tbl
ggplot(tbl)
iris
head(iris)
nrow(iris)
ncol(iris)
nrow(iris);ncol(iris)
list.files()
data(iris)
iris
head(iris)
write.csv(iris, file="newiris.csv", row.name=FALSE)
write.csv(iris, file="newiris.csv", row.names=FALSE)
list.files()
getwd()
setwd("D:\\jacob\\R_working_dir")
getwd()
list.files()
write.csv(iris, file="newiris.csv", row.names=FALSE)
list.files()
newiris
newiris <- read.csv("newiris.csv")
newiris
newiris2 <- read.table("newiris.csv", sep=",", header=TRUE)
newiris2
write.csv(newiris2, file="newiris2.csv", row.names=FALSE)
write.csv(newiris2, file="newiris2.csv", row.names=TRUE)
#row.names=TRUE 인 경우 row 의 Numbering 값을 보여줌.
#it's like rownum() in oracle
summary(newiris2)
summary(newiris2[[2]])
newiris2[[2]]
newiris2[[1,]]
newiris2[[2,]]
newiris2[[2,1]]
newiris2[[1]]
newiris2[[2]]
newiris2[[3]]
newiris2[[4]]
newiris2[[5]]
summary(newiris2[[1]])
summary(newiris2[[2]])
summary(newiris2[[3]])
summary(newiris2[[5]])
cars
head(cars)
plot(cars)
plot(x, main="The Jacob", xlab="X-axis Label", yLab="Y-axis Label")
plot(x, main="The Jacob", xlab="X-axis Label", ylab="Y-axis Label")
warnings()
plot(x, main="The plotting x * y by Jacob", xlab="X-axis Label", ylab="Y-axis Label")
x
plot(cars, main="The plotting x * y by Jacob", xlab="X-axis Label", ylab="Y-axis Label")
plot(cars,
main="The plotting x * y by Jacob",
xlab="X-axis Label",
ylab="Y-axis Label")
plot(cars,
main="cars: Speed vs. Stopping Distance (1920) by Jacob",
xlab="Speed (MPH)",
ylab="Stopping Distance (ft)")
plot(cars,
main="cars: Speed vs. Stopping Distance (1920)",
xlab="Speed (MPH)",
ylab="Stopping Distance (ft)")
plot(cars,
main="cars: Speed vs. Stopping Distance (1920)",
xlab="Speed (MPH)",
ylab="Stopping Distance (ft)"
type="n")
plot(x,y, type="n")
grid()
points(x,y)
plot(cars,
main="cars: Speed vs. Stopping Distance (1920)",
xlab="Speed (MPH)",
ylab="Stopping Distance (ft)",
type="n")
grid()
points(cars)
plot(cars,
main="cars: Speed vs. Stopping Distance (1920)",
xlab="Speed (MPH)",
ylab="Stopping Distance (ft)",
type="n")
plot(USArrests,
main="cars: Speed vs. Stopping Distance (1920)",
xlab="Speed (MPH)",
ylab="Stopping Distance (ft)",
type="n")
plot(iris,
main="cars: Speed vs. Stopping Distance (1920)",
xlab="Speed (MPH)",
ylab="Stopping Distance (ft)",
type="n")
plot(iris,
main="cars: Speed vs. Stopping Distance (1920)",
xlab="Speed (MPH)",
ylab="Stopping Distance (ft)"
)
plot(cars,
main="cars: Speed vs. Stopping Distance (1920)",
xlab="Speed (MPH)",
ylab="Stopping Distance (ft)",
type="n")
grid()
points()
points(cars)
plot(x,y, pch=as.integer(f))
with(iris, plot(Petal.Length, Petal.Width))
head(iris)
with(iris, plot(Petal.Length, Petal.Width), pch=as.integer(Species))
iris
with(iris, plot(Petal.Length, Petal.Width), pch=as.integer(Species))
with(iris, plot(Petal.Length, Petal.Width), pch=as.integer(Species))
with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species))
with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species)))
with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species)))
with(iris, plot(Petal.Length, Petal.Width, Species)
dd
legend(1.5, 2.4, c("setosa", "versicolor", "virginica"))
legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), 1:3)
legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), pch=1:3)
legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), 1:3)
legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), pch=1:3)
legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), pch=1:3)
with(iris, plot(Petal.Length, Petal.Width, Species)
with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species)))
with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species)))
legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), pch=1:3)
legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), 1:3)
clear
clear()
with(iris, plot(Petal.Length, Petal.Width, Species))
with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species))
)
legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), 1:3)
with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species))
with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(Species)))
legend(1.5, 2.4, c("setosa", "versicolor", "virginica"), pch=1:3)
f <- factor(iris$Species)
f
with(iris, plot(Petal.Length, Petal.Width, pch=as.integer(f)))
legend(1.5, 2.4, as.character(levels(f)), pch=a:length(levels(f)))
legend(1.5, 2.4, as.character(levels(f)), pch=1:length(levels(f)))
legend(1.0, 2.4, as.character(levels(f)), pch=1:length(levels(f)))
legend(1.3, 2.5, as.character(levels(f)), pch=1:length(levels(f)))
legend(0.5, 95, c("Estimate", "Lower conf lim", "Upper conf lim"),
lty=c("solid"), "dashed", "dotted")
legend(0.5, 95, c("Estimate", "Lower conf lim", "Upper conf lim"),
lty=c("solid", "dashed", "dotted")
)
legend(0.5, 95, c("Estimate", "Lower conf lim", "Upper conf lim"),
lty=c("solid", "dashed", "dotted"))
legend(1.5, 2.4, c("Estimate", "Lower conf lim", "Upper conf lim"),
lty=c("solid", "dashed", "dotted"))
plot(c(1968,2010), c(0,10), type="n", xlab="Year", ylab="Expenditures/GDP(%)")
linessdf
plot(c(1968,2010), c(0,10), type="n", xlab="Year", ylab="Expenditures/GDP(%)")
lines(year, defnese, col="red", lwd=2.5)
lines(year, defense, col="red", lwd=2.5)
plot(c(1968,2010), c(0,10), type="n",
xlab="Year", ylab="Expenditures/GDP(%)")
lines(year, defense, col="red", lwd=2.5)
year
defense
lines(iris, defense, col="red", lwd=2.5)
lines(iris, Petal.Length, col="red", lwd=2.5)
m <- lm(y ~ x)
clear plot
clearplot()
clear()
m <- lm( y ~ x)
plot( y ~ x)
plot( x ~ y)
plot( y ~ x)
abline(m)
plot(m)
plot(m)
library(faraway)
install.packages(faraway)
install.packages("faraway")
library(faraway)
strongx
data(strongx)
str(strongx)
m <- lm(crossx ~ energy, data=strongx)
m <- lm(crossx ~ energy, data=strongx)
data(strongx)
m <- lm(crossx ~ energy, data=strongx)
m
plot(crossx ~ energy, data=strongx)
abline(m)
plot(crossx ~ energy, data=strongx)
abline(m)
'Data Analytics' 카테고리의 다른 글
프로세싱, 날개를 달다. [Learning Processing] (0) | 2014.02.02 |
---|---|
why analyze data ? (0) | 2014.01.30 |
kmeans in R (0) | 2014.01.30 |
plot( cars, main ="", xlab="", ylab="", type="n") (0) | 2014.01.30 |
http://www.r-tutor.com/ (0) | 2014.01.29 |