2016-07-01 14 views
2

Ich möchte die verschiedenen Zustände einer Sequenz abrufen, die Wiederholungen loswerden, aber immer noch die Reihenfolge beibehalten. Lassen Sie mich das mit einem Beispiel erklären.r - abrufen eindeutiges Sequenzmuster

Meine 2-Sequenzen wie folgt aussehen:

library(reshape2) 
library(dplyr)  

dt %>% melt(id.vars = 'id') %>% arrange(id) 

    id variable   value 
1 1  t1  Domestic 
2 1  t2  Domestic 
3 1  t3  Domestic 
4 1  t4  Domestic 
5 1  t5  Domestic 
6 1  t6   Eat 
7 1  t7   Eat 
8 1  t8   Eat 
9 1  t9   Eat 
10 1  t10   Eat 
11 1  t11  Domestic 

12 2  t1 SocialContacts 
13 2  t2   Travel 
14 2  t3   Travel 
15 2  t4  Domestic 
16 2  t5   Travel 
17 2  t6   Travel 
18 2  t7  Domestic 
19 2  t8  Domestic 
20 2  t9  Domestic 
21 2  t10   Travel 
22 2  t11   Travel 

Was würde Ich mag es, diese

 id    value 
    (int)   (chr) 
     1    Domestic 
     1    Eat 
     1    Domestic 

     2    SocialContacts 
     2    Travel 
     2    Domestic 
     2    Travel 
     2    Domestic 
     2    Travel 

dieses Bisher habe ich nur erreicht (Ausgang wollte):

dt %>% melt(id.vars = 'id') %>% group_by(id, value) %>% arrange(id) %>% distinct() 

    id variable   value 
    (int) (fctr)   (chr) 
1  1  t1  Domestic 
2  1  t6   Eat 
3  2  t4  Domestic 
4  2  t1 SocialContacts 
5  2  t2   Travel 

Irgendeine Idee?

dt = structure(list(t1 = c("Domestic", "SocialContacts"), t2 = c("Domestic", 
"Travel"), t3 = c("Domestic", "Travel"), t4 = c("Domestic", "Domestic" 
), t5 = c("Domestic", "Travel"), t6 = c("Eat", "Travel"), t7 = c("Eat", 
"Domestic"), t8 = c("Eat", "Domestic"), t9 = c("Eat", "Domestic" 
), t10 = c("Eat", "Travel"), t11 = c("Domestic", "Travel"), id = 1:2), .Names= c("t1", 
"t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "t10", "t11", 
"id"), row.names = 1:2, class = "data.frame") 
+0

Entspricht eine der Antworten Ihrem Bedarf? Bitte kreuzen Sie das an, das Sie akzeptieren. Vielen Dank. – r2evans

Antwort

3

A dplyr Alternative zu @ Psidom Antwort:

input <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), variable = c("t1", 
"t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "t10", "t11", 
"t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "t10", 
"t11"), value = c("Domestic", "Domestic", "Domestic", "Domestic", 
"Domestic", "Eat", "Eat", "Eat", "Eat", "Eat", "Domestic", "SocialContacts", 
"Travel", "Travel", "Domestic", "Travel", "Travel", "Domestic", 
"Domestic", "Domestic", "Travel", "Travel")), .Names = c("id", 
"variable", "value"), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22")) 

Der Code:

library(dplyr) 
input %>% 
    mutate(grp = cumsum(value != lag(value, default = value[1]))) %>% 
    group_by(grp) %>% 
    slice(1) 
# Source: local data frame [9 x 4] 
# Groups: grp [9] 
#  id variable   value grp 
# <int> <chr>   <chr> <int> 
# 1  1  t1  Domestic  0 
# 2  1  t6   Eat  1 
# 3  1  t11  Domestic  2 
# 4  2  t1 SocialContacts  3 
# 5  2  t2   Travel  4 
# 6  2  t4  Domestic  5 
# 7  2  t5   Travel  6 
# 8  2  t7  Domestic  7 
# 9  2  t10   Travel  8 
2

Verwenden rleid von data.table:

library(data.table) 
library(dplyr) 
dt %>% melt(id.vars = 'id') %>% arrange(id) %>% group_by(id, rleid = rleid(value)) %>% 
     summarise(value = unique(value)) %>% select(-rleid) 

# id   value 
# 1 1  Domestic 
# 6 1   Eat 
# 11 1  Domestic 
# 12 2 SocialContacts 
# 13 2   Travel 
# 15 2  Domestic 
# 16 2   Travel 
# 18 2  Domestic 
# 21 2   Travel 

Ein ähnlicher Ansatz data.table mit:

library(data.table) 
unique(melt(setDT(dt), id.vars = 'id')[order(id)] 
    [, .(value), .(id, rleid(value))])[, -'rleid', with=F] 

# id   value 
# 1: 1  Domestic 
# 2: 1   Eat 
# 3: 1  Domestic 
# 4: 2 SocialContacts 
# 5: 2   Travel 
# 6: 2  Domestic 
# 7: 2   Travel 
# 8: 2  Domestic 
# 9: 2   Travel 
2

Hier ist eine data.table Lösung:

library(data.table)  
setDT(dt) 
# get secondary id with rleid 
dt[, id2:=rleid(value)] 
# subset to first rows in secondary id 
dt[dt[, .I[1L], by="id2"]$V1,][, id2 := NULL][] 

Welche

id variable   value 
1: 1  t1  Domestic 
2: 1  t6   Eat 
3: 1  t11  Domestic 
4: 2  t1 SocialContacts 
5: 2  t2   Travel 
6: 2  t4  Domestic 
7: 2  t5   Travel 
8: 2  t7  Domestic 
9: 2  t10   Travel 

Daten ausdruckt

dt <- read.table(header=T, text=" id variable   value 
1 1  t1  Domestic 
2 1  t2  Domestic 
3 1  t3  Domestic 
4 1  t4  Domestic 
5 1  t5  Domestic 
6 1  t6   Eat 
7 1  t7   Eat 
8 1  t8   Eat 
9 1  t9   Eat 
10 1  t10   Eat 
11 1  t11  Domestic 
12 2  t1 SocialContacts 
13 2  t2   Travel 
14 2  t3   Travel 
15 2  t4  Domestic 
16 2  t5   Travel 
17 2  t6   Travel 
18 2  t7  Domestic 
19 2  t8  Domestic 
20 2  t9  Domestic 
21 2  t10   Travel 
22 2  t11   Travel")