folha de dicas do ggplot
1.0.0
cheatsheet para ggplot2, compilado principalmente do livro ggplot2: Elegant Graphics for Data Analysis
A documentação do ggplot está disponível aqui.
Índice
geom_point
geom_bar
geom_line
geom_area
geom_path
geom_text
geom_tile
geom_polygon
geom_histogram
e geom_freqpoly
geom_boxplot
geom_jitter
geom_density
opts_chunk $ set( warning = FALSE , message = FALSE , fig.width = 8 , fig.height = 4 )
library( ggplot2 )
Carregar os dados de amostra
set.seed( 1410 ) # make the sample reproducible
head( diamonds )
## carat cut color clarity depth table price x y z
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
dsmall <- diamonds [sample(nrow( diamonds ), 100 ), ]
qplot( carat , price , data = diamonds )
qplot( carat , price , data = dsmall , color = color , shape = cut , alpha = I( 1 / 2 ))
qplot( carat , price , data = dsmall , geom = c( " point " , " smooth " ))
Existem muitos suavizadores diferentes que podem ser usados com o argumento method
.
qplot( carat , price , data = dsmall , geom = c( " point " , " smooth " ), method = " lm " )
qplot( color , price / carat , data = diamonds , geom = " jitter " )
qplot( color , price / carat , data = diamonds , geom = " boxplot " )
qplot( carat , data = diamonds , geom = " histogram " , fill = color )
qplot( carat , data = diamonds , geom = " density " , color = color )
Altere a quantidade de suavização com o argumento binwidth
.
qplot( carat , data = diamonds , geom = " histogram " , binwidth = 1 )
qplot( carat , data = diamonds , geom = " histogram " , binwidth = 0.1 )
qplot( carat , data = diamonds , geom = " histogram " , binwidth = 0.01 )
qplot( color , data = diamonds , geom = " bar " )
# bar plot of diamond color weighted by carat
qplot( color , data = diamonds , geom = " bar " , weight = carat ) +
scale_y_continuous( " carat " )
head( economics )
## date pce pop psavert uempmed unemploy
## 1 1967-06-30 507.8 198712 9.8 4.5 2944
## 2 1967-07-31 510.9 198911 9.8 4.7 2945
## 3 1967-08-31 516.7 199113 9.0 4.6 2958
## 4 1967-09-30 513.3 199311 9.8 4.9 3143
## 5 1967-10-31 518.5 199498 9.7 4.7 3066
## 6 1967-11-30 526.2 199657 9.4 4.8 3018
qplot( date , unemploy / pop , data = economics , geom = " line " )
qplot( carat , data = diamonds , facets = color ~ . ,
geom = " histogram " , binwidth = 0.1 , xlim = c( 0 , 3 ))
xlim
e ylim
: definem limites para os eixos x e y (por exemplo, xlim=c(0,20)
)main
: título principal da tramaxlab
e ylab
: rótulos para os eixos x e y qplot( carat , price , data = dsmall ,
xlab = " Price ($) " ,
ylab = " Weight (carats) " ,
main = " Price-weight relationship " )
Gráficos multicamadas mais complicados podem ser gerados usando ggplot()
.
df <- data.frame ( x = c( 3 , 1 , 5 ), y = c( 2 , 4 , 6 ), label = c( " a " , " b " , " c " ))
p <- ggplot( df , aes( x , y , label = label )) + xlab( NULL ) + ylab( NULL )
geom_point
p + geom_point() + ggtitle( " geom_point " )
geom_bar
p + geom_bar( stat = " identity " ) + ggtitle( " geom_bar(stat= " identity " ) " )
geom_line
p + geom_line() + ggtitle( " geom_line " )
geom_area
p + geom_area() + ggtitle( " geom_area " )
geom_path
p + geom_path() + ggtitle( " geom_path " )
geom_text
p + geom_text() + ggtitle( " geom_text " )
geom_tile
p + geom_tile() + ggtitle( " geom_tile " )
geom_polygon
p + geom_polygon() + ggtitle( " geom_polygon " )
Para dados 1d, o geom é o histograma.
geom_histogram
e geom_freqpoly
depth_dist <- ggplot( diamonds , aes( depth )) + xlim( 58 , 68 )
depth_dist + geom_histogram()
Para comparar a distribuição entre grupos, algumas opções
depth_dist + geom_histogram(aes( y = ..density.. ), binwidth = 0.1 ) +
facet_grid( cut ~ . )
depth_dist + geom_histogram(aes( fill = cut ), binwidth = 0.1 , position = " fill " )
depth_dist + geom_freqpoly(aes( y = ..density.. , color = cut ), binwidth = 0.1 )
geom_boxplot
qplot( cut , depth , data = diamonds , geom = " boxplot " )
library( plyr )
qplot( carat , depth , data = diamonds , geom = " boxplot " ,
group = round_any( carat , 0.1 , floor ), xlim = c( 0 , 3 ))
geom_jitter
qplot( class , cty , data = mpg , geom = " jitter " )
qplot( class , drv , data = mpg , geom = " jitter " )
geom_density
qplot( depth , data = diamonds , geom = " density " , xlim = c( 54 , 70 ))
qplot( depth , data = diamonds , geom = " density " , xlim = c( 54 , 70 ), fill = cut , alpha = I( 0.2 ))
df <- data.frame ( x = rnorm( 2000 ), y = rnorm( 2000 ))
norm <- ggplot( df , aes( x , y ))
norm + geom_point()
norm + geom_point( shape = 1 )
norm + geom_point( shape = " . " ) # pixel-sized
library( scales )
norm + geom_point( color = alpha( " black " , 1 / 3 ))
norm + geom_point( color = alpha( " black " , 1 / 5 ))
norm + geom_point( color = alpha( " black " , 1 / 10 ))
td <- ggplot( diamonds , aes( table , depth )) + xlim( 50 , 70 ) + ylim( 50 , 70 )
td + geom_point()
td + geom_jitter()
jit <- position_jitter( width = 0.5 )
td + geom_jitter( position = jit )
td + geom_jitter( position = jit , color = alpha( " black " , 1 / 10 ))
td + geom_jitter( position = jit , color = alpha( " black " , 1 / 50 ))
td + geom_jitter( position = jit , color = alpha( " black " , 1 / 200 ))
library( maps )
data( us.cities )
big_cities <- subset( us.cities , pop > 500000 )
qplot( long , lat , data = big_cities ) + borders( " state " , size = 0.5 )
states <- map_data( " state " )
arrests <- USArrests
names( arrests ) <- tolower(names( arrests ))
arrests $ region <- tolower(rownames( USArrests ))
choro <- merge( states , arrests , by = " region " )
# reorder the rows because order matters when drawing polygons and merge
# destroys the original ordering
choro <- choro [order( choro $ order ), ]
qplot( long , lat , data = choro , group = group , fill = assault , geom = " polygon " )
qplot( long , lat , data = choro , group = group , fill = assault / murder , geom = " polygon " )
Apenas dados extras
unemp <- qplot( date , unemploy , data = economics , geom = " line " ,
xlab = " " , ylab = " No. unemployed (1000s) " )
presidential <- presidential [ - ( 1 : 3 ), ]
yrng <- range( economics $ unemploy )
xrng <- range( economics $ date )
unemp + geom_vline(aes( xintercept = as.numeric( start )), data = presidential )
unemp + geom_rect(aes( NULL , NULL , xmin = start , xmax = end , fill = party ),
ymin = yrng [ 1 ], ymax = yrng [ 2 ], data = presidential ) +
scale_fill_manual( values = alpha(c( " blue " , " red " ), 0.2 ))
last_plot() + geom_text(aes( x = start , y = yrng [ 1 ], label = name ),
data = presidential , size = 3 , hjust = 0 , vjust = 0 )
caption <- paste(strwrap( " Unemployment rates in the US have varied
alot over the years " , 40 ), collapse = " n " )
unemp + geom_text(aes( x , y , label = caption ),
data = data.frame ( x = xrng [ 2 ], y = yrng [ 2 ]),
hjust = 1 , vjust = 1 , size = 4 )
highest <- subset( economics , unemploy == max( unemploy ))
unemp + geom_point( data = highest , size = 3 , color = alpha( " red " , 0.3 ))
qplot( cty , hwy , data = mpg ) + facet_grid( . ~ cyl )
qplot( cty , data = mpg , geom = " histogram " , binwidth = 2 ) + facet_grid( cyl ~ . )
qplot( cty , hwy , data = mpg ) + facet_grid( drv ~ cyl )
p <- qplot( displ , hwy , data = mpg ) + geom_smooth( method = " lm " , se = F )
p + facet_grid( cyl ~ drv )
p + facet_grid( cyl ~ drv , margins = T )
library( plyr )
movies $ decade <- round_any( movies $ year , 10 , floor )
qplot( rating , ..density.. , data = subset( movies , decade > 1890 ),
geom = " histogram " , binwidth = 0.5 ) +
facet_wrap( ~ decade , ncol = 6 )