ggplot2 - geom_ribbon bug?
This code throws an error and I can't figure out why...
library( plyr )
library( ggplot2 )
library( grid )
library( proto )
# the master dataframe
myDF = structure(list(Agg52WkPrceRange = c(2L, 2L, 2L, 2L, 2L, 2L, 3L,
5L, 3L, 5L, 3L, 5L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 3L, 4L, 3L, 4L, 4L, 4L, 4L), OfResidualPntReturn52CWk = c(0.201477324,
0.22350293, 0.248388728, 0.173871456, 0.201090654, 0.170666183,
0.18681883, 0.178840521, 0.159744891, 0.129811042, 0.13209741,
0.114989407, 0.128347625, 0.100945992, 0.057017002, 0.081123718,
0.018900252, 0.021784814, 0.081931816, 0.059067844, 0.095879746,
0.038977508, 0.078895248, 0.051344317, 0.077515295, 0.011776214,
0.099216033, 0.054714439, 0.022879951, -0.079558277, -0.050889584,
-0.006934821, -0.003407085, 0.032545474, -0.003387139, 0.030418511,
0.053942523, 0.051398537, 0.073482355, 0.087963039, 0.079555591,
-0.040490418, -0.130754663, -0.125826649, -0.141766316, -0.150708718,
-0.171906882, -0.174623614, -0.212945405, -0.174480554), IndependentVariableBinned = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 3L, 10L, 3L, 10开发者_运维知识库L, 4L, 10L, 4L, 2L, 4L, 4L,
4L, 5L, 2L, 2L, 2L, 3L, 3L, 5L, 5L, 5L, 5L, 6L, 3L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 8L, 9L, 9L, 9L, 9L,
10L, 10L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10"), class = "factor")), .Names = c("Agg52WkPrceRange",
"OfResidualPntReturn52CWk", "IndependentVariableBinned"), row.names = 28653:28702, class = "data.frame")
# secondary data frame
meansByIndependentVariableBin = ddply( myDF , .( IndependentVariableBinned ) , function( df ) mean( df[[ "OfResidualPntReturn52CWk" ]] ) )
# construct the plot
thePlot = ggplot( myDF , aes_string( x = "IndependentVariableBinned" , y = "OfResidualPntReturn52CWk" ) )
thePlot = thePlot + geom_point( data = meansByIndependentVariableBin , aes( x = IndependentVariableBinned , y = V1 ) )
thePlot = thePlot + geom_line( data = meansByIndependentVariableBin , aes( x = IndependentVariableBinned , y = V1 , group = 1 ) )
thePlot = thePlot + geom_ribbon( data = meansByIndependentVariableBin , aes( group = 1 , x = IndependentVariableBinned , ymin = V1 - 1 , ymax = V1 + 1 ) )
# print - error!
print( thePlot )
I've tried with/without group=1. The error is:
Error in eval(expr, envir, enclos) :
object 'OfRelStrength52CWk' not found
but not sure how that is relevant?? I must be missing something obvious. Take away the last geom (ribbon) and it plots just fine!
There is no bug in geom_ribbon
. Your error is because you are defining y = OfResidualPntReturn52CWk
in your ggplot call as a result of which geom_ribbon
is looking for it. Since you are passing a different data frame to geom_ribbon
, there is confusion and hence an error. From your plotting call, although you are using y = OfResidualPntReturn52CWk
in your ggplot call, there is no layer where you are calling it, and hence it is immaterial to the plot.
Here is how to do it correctly (if I am understanding what you intend to do in this plot)
MIVB = meansByIndependentVariableBin
thePlot = ggplot(myDF , aes(x = IndependentVariableBinned)) +
geom_point(aes(y = OfResidualPntReturn52CWk)) +
geom_point(data = MIVB, aes(y = V1), colour = 'red') +
geom_line(data = MIVB , aes(y = V1, group = 1), colour = 'red') +
geom_ribbon(data = MIVB, aes(group = 1, ymin = V1 - 1 , ymax = V1 + 1),
alpha = 0.2)
Here is the output it produces
Here is another way to do it, without computing the means in advance. Also I have used mean +- standard errors in the ribbon as I find the choice of +- 1 to be arbitrary
myDF$IndependentVariableBinned = as.numeric(myDF$IndependentVariableBinned)
thePlot = ggplot(myDF , aes(x = IndependentVariableBinned, y =
OfResidualPntReturn52CWk)) +
geom_point() +
geom_point(stat = 'summary', fun.y = 'mean', colour = 'red') +
geom_line(stat = 'summary', fun.y = 'mean', colour = 'red') +
geom_ribbon(stat = 'summary', fun.data = 'mean_se', alpha = 0.2)
This produces
@Ramnath is spot on. Your initial call to ggplot
is not needed as all of the layers you are plotting come from the summarized data.frame
made by ddply()
. You can also simplify your call to ddply()
by using the summarize
function:
meansByIndependentVariableBin2 = ddply( myDF , .( IndependentVariableBinned )
, summarize, means = mean(OfResidualPntReturn52CWk) )
I would then plot your graph as such:
ggplot(meansByIndependentVariableBin2, aes(x = as.numeric(IndependentVariableBinned), y = means)) +
geom_ribbon(aes(ymin = (means - 1), ymax = (means + 1)), alpha = .4) +
geom_point() +
geom_line()
Is that what you had in mind? I added an alpha to the ribbon layer so we can see the lines and points clearly.
精彩评论