Reshaping data into panel format in R

2023-02-01 17:56 问答作者：

I have a quite long and (for me) complex question. I have voting data from the Council of the European Union, where the voting behaviour for each country has been coded according to a nominal scale:

0: yes
1: no
2: Abstention
3: no with a negative statement
4: Abstention with a negative statement
5: yes with a negative statement

The data is in the following format (see the end of the post for a dump of 20 observations from the data set):

Country1 Country2 Country3 ... Date
1        0        0        ... 2004-12-12
1        2        0        ... 2003-02-14
2        0        1        ... 2004-05-22
...      ...      ...      ... ...

First of all I would like to aggregate the data into monthly intervals, where for each month we have a sum of how many 0,1,2 etc there were for each country. Ideally the data should look like this:

Month    Country   sum of 0s    sum of 1s   Sum of 2s
January  Country1  2            0           1  
January  Country2  4            0           0
...      ...       ...          ...         ...

Once this has been done I would like to put the data into panel format like this:

Country   Month    sum of 0s   sum of 1s   sum of 2s
Country1  January  2           0           1 
Country1  February 0           1           3
...       ...      ...         ...         ...
Country2  January  4           0           0
Country2  February 2           2           0
...       ...      ...         ...         ...

I am sorry if this is a very time consuming question, but I have been playing around with aggregate, by and different apply functions forever, without being able to get the desired result. Any help will be greatly appreciated!

20 observations from the data set (output from the dput() function):

    structure(list(Recitals = c(29L, 13L, 2L, 20L, 10L, 18L, 29L, 
6L, 4L, 16L, 7L, 6L, 12L, 23L, 6L, 10L, 2L, 6L, 9L, 8L, 7L), 
    Voting_Rule = structure(c(4L, 4L, 5L, 5L, 5L, 5L, 5L, 4L, 
    4L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 5L, 4L, 4L, 4L), .Label = c("0", 
    "Qualified Majority", "Simple Majority", "Unanimity", "Qualified majority", 
    "Simple majority"), class = "factor"), Belgium = c(0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), Denmark = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), 
    Czech.Republic = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Germany = c(0L, 
    0L, 0L, 0L, 0L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L), Estonia = c(0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
    ), Greece = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Spain = c(0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), France = c(0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Ireland = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L), Italy = c(0L, 0L, 0L, 0L, 0L, 0L, 5L, 
    0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), 
    Cyprus = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Latvia = c(0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L), Lithuania = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Luxembourg = c(0L, 
    0L, 0L, 0L, 0L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L), Hungary = c(0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
    ), Malta = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Netherlands = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L), Austria = c(0L, 0L, 0L, 1L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
    ), Poland = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Portugal = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L), Slovenia = c(0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 开发者_C百科0L, 0L, 0L, 0L, 0L
    ), Slovakia = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Finland = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L), Sweden = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), 
    UK = c(0L, 0L, 0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 5L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Dates = structure(c(12716, 
    12716, 12716, 12674, 12674, 12698, 12705, 12724, 12738, 12738, 
    12716, 12741, 12744, 12754, 12754, 12758, 12758, 12758, 12759, 
    12759, 12759), class = "Date")), .Names = c("Recitals", "Voting_Rule", 
"Belgium", "Denmark", "Czech.Republic", "Germany", "Estonia", 
"Greece", "Spain", "France", "Ireland", "Italy", "Cyprus", "Latvia", 
"Lithuania", "Luxembourg", "Hungary", "Malta", "Netherlands", 
"Austria", "Poland", "Portugal", "Slovenia", "Slovakia", "Finland", 
"Sweden", "UK", "Dates"), row.names = c(752L, 753L, 762L, 774L, 
775L, 776L, 777L, 780L, 789L, 790L, 793L, 794L, 797L, 816L, 817L, 
818L, 819L, 820L, 824L, 825L, 826L), class = "data.frame")

Hadley's reshape2 and plyr package would greatly help your work:

library(reshape2)

# load your data into variable d1
# d1 <- struct( ... your data ...)

# first, melt the data.
# molten data is very very useful for further transformation.
d2 <- reshape2:::melt.data.frame(d1[,-(1:2)], # drop unused variables
                                 id.vars="Dates",
                                 variable.name="Country")

# create a variable "Month" from Date
d2$Month <- months(d2$Date)

# cast the data to the desired format using appropriate function (here, length())
d3 <- dcast(d2, Country+Month~value, length)

# finally change the names of newly created variables
names(d3)[-(1:2)] <- sprintf("sum of %ss", names(d3)[-(1:2)])

继续阅读：aggregate dataset panel r reshape

Reshaping data into panel format in R

更多精彩内容

精彩评论

最新问答

央视是哪个频道？

请问买过的朋友，舒提啦旅行箱实际使用体验如何？？

检查不孕不育需要的费用？

海信ULED电视画质有什么不同的地方?？

钉子可以挂的住画框幕布吗？

问答排行榜

河神2九牛入海钓河妖是第几集河妖什么来历可活吞牛？

性激素六项检查的最佳时间是多久？多少钱？？

Easiest way to get words of one line from istream into a vector?

《梦在燃烧 (《三国演义》动画片主题曲)》MP3歌词-汤子星？

抽烟只抽炫赫门？