2012-04-03 6 views
0

könnte jemand bitte, was ich falsch mache in Schritt 4 meiner Codes bitte (strsplit)?Anwenden von Strsplit-Funktion für mehrere Dateien

Ich habe 3 Dateien im Verzeichnis mit Namen ‚101_E45_N66.csv‘, ‚102_E46_N76.csv‘, ‚103_E47_N86.csv‘

ich sie in einer Liste lese und zu versuchen, die Namen zu teilen und füge 3 neue coloumns in jeder Datei zum Beispiel ‚id‘ = 101, ‚Easting‘ = E904690, ‚Northing‘ = N936410

ich von meiner vorherigen Frage wissen, wie ich für einzelne Datei ausführen könnte, die wie folgt:

filename <- '101_E45_N66.csv' 
    ## spliting name "101_E45_N66.csv" to create 3 new coloumn within dataframe 
      s = strsplit(filename,"_",,fixed=TRUE)[[1]] 
    df1= cbind(df[,c("type","date")],ID=s[1],name1=s[2],name2=s[3],df[,3:ncol(df)]) 

Aber wenn ich versuche, eine lapply Funktion zu erstellen, um auf einer lis anzuwenden t Ich bin mir nicht sicher, was ich falsch mache, da es anscheinend nur 1 Dateiname verwendet und in allen 3 Dateien denselben Namen hinzufügt, wenn ich die Dateien zurück in das Verzeichnis schreibe (siehe Schritt 4 meines Codes) , die Codes, die ich verwende, sind:

mycsv = dir(pattern=".csv") 

n <- length(mycsv) 

## step 1 - read file 
mylist <- vector("list", n) 

for(i in 1:n) mylist[[i]] <- read.csv(mycsv[i],header = TRUE,skip =5, nrow = length(count.fields(mycsv[i])) - 12) 

## step 2 - Change coloumn name 
mylist <- lapply(mylist, function(x) {names(x) <- c("type","date","v1","v2","v3","v4","v5","v6","v7") ; return(x)}) 

## step 3 - changing type coloumn for weekday/weekend 

mylist <- lapply(mylist, function(x) { 
    f = c("wd", "we", "we", "wd", "wd", "wd", "wd") 
    x$type = rep(f,52, length.out = 30) 
    return(x) 
}) 

## Step 4 - spliting filename to create 3 new coloumn within dataframe 

    mylist <- lapply(mylist, function(x) { 
      s = strsplit(mycsv[i],"_",,fixed=TRUE)[[1]] 
    d = cbind(x[,c("type","date")],ID=s[1],Easting=s[2],Northing=s[3],x[,3:ncol(x)]) 
    return(d) 
}) 
    ## Step 5- writre the data back in the directory 
     for(i in 1:n)  
     write.csv(file = paste("file", i, ".csv", sep = ""), mylist[i], row.names = F) 

Könnten Sie bitte vorschlagen, was ich in Schritt 4 falsch mache?

Vielen Dank,
Ayan

meine Beispieldaten:

> dput (mylist) 
list(structure(list(type = c("wd", "we", "we", "wd", "wd", "wd", 
"wd", "wd", "we", "we", "wd", "wd", "wd", "wd", "wd", "we", "we", 
"wd", "wd", "wd", "wd", "wd", "we", "we", "wd", "wd", "wd", "wd", 
"wd", "we"), date = structure(c(1L, 11L, 18L, 9L, 26L, 30L, 22L, 
5L, 14L, 15L, 6L, 23L, 27L, 19L, 2L, 10L, 16L, 7L, 24L, 28L, 
20L, 3L, 12L, 17L, 8L, 25L, 29L, 21L, 4L, 13L), .Label = c("Fri 1 Jan", 
"Fri 15 Jan", "Fri 22 Jan", "Fri 29 Jan", "Fri 8 Jan", "Mon 11 Jan", 
"Mon 18 Jan", "Mon 25 Jan", "Mon 4 Jan", "Sat 16 Jan", "Sat 2 Jan", 
"Sat 23 Jan", "Sat 30 Jan", "Sat 9 Jan", "Sun 10 Jan", "Sun 17 Jan", 
"Sun 24 Jan", "Sun 3 Jan", "Thu 14 Jan", "Thu 21 Jan", "Thu 28 Jan", 
"Thu 7 Jan", "Tue 12 Jan", "Tue 19 Jan", "Tue 26 Jan", "Tue 5 Jan", 
"Wed 13 Jan", "Wed 20 Jan", "Wed 27 Jan", "Wed 6 Jan"), class = "factor"), 
    v1 = c(322L, 89L, 242L, NA, 136L, 113L, 70L, 134L, 232L, 
    NA, 127L, 124L, 120L, 162L, 179L, 374L, 477L, NA, 147L, 136L, 
    152L, 196L, 384L, 491L, 136L, NA, 143L, 172L, 226L, 509L), 
    v2 = c(409L, 71L, 206L, NA, 104L, 57L, NA, 98L, 201L, NA, 
    74L, 94L, 69L, 98L, 117L, 277L, 445L, NA, 131L, 90L, 83L, 
    NA, 329L, 473L, 73L, NA, 104L, 113L, 136L, 427L), v3 = c(367L, 
    54L, 211L, NA, 107L, 69L, 51L, 63L, 157L, NA, 56L, 115L, 
    96L, 100L, 118L, 250L, 388L, NA, 124L, 85L, 96L, 118L, 313L, 
    431L, 79L, NA, 93L, 135L, 134L, 290L), v4 = c(343L, 60L, 
    183L, NA, 110L, 53L, 32L, 77L, 123L, NA, 37L, 100L, 64L, 
    68L, 99L, 199L, 333L, NA, 107L, 71L, 81L, 89L, 219L, 393L, 
    43L, NA, 72L, 96L, 127L, NA), v5 = c(231L, 42L, 79L, NA, 
    74L, 58L, 48L, 59L, 78L, NA, 62L, 74L, 63L, 59L, 74L, 110L, 
    134L, NA, 74L, 82L, 59L, 73L, 135L, 170L, 53L, NA, 61L, 72L, 
    67L, 186L), v6 = c(140L, 41L, 57L, NA, 104L, 92L, 89L, 94L, 
    68L, NA, 116L, 131L, NA, 110L, 125L, 89L, 89L, NA, 113L, 
    124L, 115L, 116L, 95L, 77L, 126L, NA, 110L, 122L, 119L, 122L 
    ), v7 = c(90L, 104L, 82L, NA, 368L, 258L, NA, 289L, 117L, 
    NA, 395L, 416L, 397L, 391L, 400L, 132L, 101L, NA, 397L, 426L, 
    418L, 411L, 151L, 66L, 396L, NA, 457L, 437L, 428L, 128L)), .Names = c("type", 
"date", "v1", "v2", "v3", "v4", "v5", "v6", "v7"), row.names = c(NA, 
-30L), class = "data.frame"), structure(list(type = c("wd", "we", 
"we", "wd", "wd", "wd", "wd", "wd", "we", "we", "wd", "wd", "wd", 
"wd", "wd", "we", "we", "wd", "wd", "wd", "wd", "wd", "we", "we", 
"wd", "wd", "wd", "wd", "wd", "we"), date = structure(c(1L, 11L, 
18L, 9L, 26L, 30L, 22L, 5L, 14L, 15L, 6L, 23L, 27L, 19L, 2L, 
10L, 16L, 7L, 24L, 28L, 20L, 3L, 12L, 17L, 8L, 25L, 29L, 21L, 
4L, 13L), .Label = c("Fri 1 Jan", "Fri 15 Jan", "Fri 22 Jan", 
"Fri 29 Jan", "Fri 8 Jan", "Mon 11 Jan", "Mon 18 Jan", "Mon 25 Jan", 
"Mon 4 Jan", "Sat 16 Jan", "Sat 2 Jan", "Sat 23 Jan", "Sat 30 Jan", 
"Sat 9 Jan", "Sun 10 Jan", "Sun 17 Jan", "Sun 24 Jan", "Sun 3 Jan", 
"Thu 14 Jan", "Thu 21 Jan", "Thu 28 Jan", "Thu 7 Jan", "Tue 12 Jan", 
"Tue 19 Jan", "Tue 26 Jan", "Tue 5 Jan", "Wed 13 Jan", "Wed 20 Jan", 
"Wed 27 Jan", "Wed 6 Jan"), class = "factor"), v1 = c(322L, 89L, 
242L, NA, 136L, 113L, 70L, 134L, 232L, NA, 127L, 124L, 120L, 
162L, 179L, 374L, 477L, NA, 147L, 136L, 152L, 196L, 384L, 491L, 
136L, NA, 143L, 172L, 226L, 509L), v2 = c(409L, 71L, 206L, NA, 
104L, 57L, NA, 98L, 201L, NA, 74L, 94L, 69L, 98L, 117L, 277L, 
445L, NA, 131L, 90L, 83L, NA, 329L, 473L, 73L, NA, 104L, 113L, 
136L, 427L), v3 = c(367L, 54L, 211L, NA, 107L, 69L, 51L, 63L, 
157L, NA, 56L, 115L, 96L, 100L, 118L, 250L, 388L, NA, 124L, 85L, 
96L, 118L, 313L, 431L, 79L, NA, 93L, 135L, 134L, 290L), v4 = c(343L, 
60L, 183L, NA, 110L, 53L, 32L, 77L, 123L, NA, 37L, 100L, 64L, 
68L, 99L, 199L, 333L, NA, 107L, 71L, 81L, 89L, 219L, 393L, 43L, 
NA, 72L, 96L, 127L, NA), v5 = c(231L, 42L, 79L, NA, 74L, 58L, 
48L, 59L, 78L, NA, 62L, 74L, 63L, 59L, 74L, 110L, 134L, NA, 74L, 
82L, 59L, 73L, 135L, 170L, 53L, NA, 61L, 72L, 67L, 186L), v6 = c(140L, 
41L, 57L, NA, 104L, 92L, 89L, 94L, 68L, NA, 116L, 131L, NA, 110L, 
125L, 89L, 89L, NA, 113L, 124L, 115L, 116L, 95L, 77L, 126L, NA, 
110L, 122L, 119L, 122L), v7 = c(90L, 104L, 82L, NA, 368L, 258L, 
NA, 289L, 117L, NA, 395L, 416L, 397L, 391L, 400L, 132L, 101L, 
NA, 397L, 426L, 418L, 411L, 151L, 66L, 396L, NA, 457L, 437L, 
428L, 128L)), .Names = c("type", "date", "v1", "v2", "v3", "v4", 
"v5", "v6", "v7"), row.names = c(NA, -30L), class = "data.frame"), 
    structure(list(type = c("wd", "we", "we", "wd", "wd", "wd", 
    "wd", "wd", "we", "we", "wd", "wd", "wd", "wd", "wd", "we", 
    "we", "wd", "wd", "wd", "wd", "wd", "we", "we", "wd", "wd", 
    "wd", "wd", "wd", "we"), date = structure(c(1L, 11L, 18L, 
    9L, 26L, 30L, 22L, 5L, 14L, 15L, 6L, 23L, 27L, 19L, 2L, 10L, 
    16L, 7L, 24L, 28L, 20L, 3L, 12L, 17L, 8L, 25L, 29L, 21L, 
    4L, 13L), .Label = c("Fri 1 Jan", "Fri 15 Jan", "Fri 22 Jan", 
    "Fri 29 Jan", "Fri 8 Jan", "Mon 11 Jan", "Mon 18 Jan", "Mon 25 Jan", 
    "Mon 4 Jan", "Sat 16 Jan", "Sat 2 Jan", "Sat 23 Jan", "Sat 30 Jan", 
    "Sat 9 Jan", "Sun 10 Jan", "Sun 17 Jan", "Sun 24 Jan", "Sun 3 Jan", 
    "Thu 14 Jan", "Thu 21 Jan", "Thu 28 Jan", "Thu 7 Jan", "Tue 12 Jan", 
    "Tue 19 Jan", "Tue 26 Jan", "Tue 5 Jan", "Wed 13 Jan", "Wed 20 Jan", 
    "Wed 27 Jan", "Wed 6 Jan"), class = "factor"), v1 = c(322L, 
    89L, 242L, NA, 136L, 113L, 70L, 134L, 232L, NA, 127L, 124L, 
    120L, 162L, 179L, 374L, 477L, NA, 147L, 136L, 152L, 196L, 
    384L, 491L, 136L, NA, 143L, 172L, 226L, 509L), v2 = c(409L, 
    71L, 206L, NA, 104L, 57L, NA, 98L, 201L, NA, 74L, 94L, 69L, 
    98L, 117L, 277L, 445L, NA, 131L, 90L, 83L, NA, 329L, 473L, 
    73L, NA, 104L, 113L, 136L, 427L), v3 = c(367L, 54L, 211L, 
    NA, 107L, 69L, 51L, 63L, 157L, NA, 56L, 115L, 96L, 100L, 
    118L, 250L, 388L, NA, 124L, 85L, 96L, 118L, 313L, 431L, 79L, 
    NA, 93L, 135L, 134L, 290L), v4 = c(343L, 60L, 183L, NA, 110L, 
    53L, 32L, 77L, 123L, NA, 37L, 100L, 64L, 68L, 99L, 199L, 
    333L, NA, 107L, 71L, 81L, 89L, 219L, 393L, 43L, NA, 72L, 
    96L, 127L, NA), v5 = c(231L, 42L, 79L, NA, 74L, 58L, 48L, 
    59L, 78L, NA, 62L, 74L, 63L, 59L, 74L, 110L, 134L, NA, 74L, 
    82L, 59L, 73L, 135L, 170L, 53L, NA, 61L, 72L, 67L, 186L), 
     v6 = c(140L, 41L, 57L, NA, 104L, 92L, 89L, 94L, 68L, 
     NA, 116L, 131L, NA, 110L, 125L, 89L, 89L, NA, 113L, 124L, 
     115L, 116L, 95L, 77L, 126L, NA, 110L, 122L, 119L, 122L 
     ), v7 = c(90L, 104L, 82L, NA, 368L, 258L, NA, 289L, 117L, 
     NA, 395L, 416L, 397L, 391L, 400L, 132L, 101L, NA, 397L, 
     426L, 418L, 411L, 151L, 66L, 396L, NA, 457L, 437L, 428L, 
     128L)), .Names = c("type", "date", "v1", "v2", "v3", 
    "v4", "v5", "v6", "v7"), row.names = c(NA, -30L), class = "data.frame")) 

Antwort

1

Das Problem ist, dass Sie indizieren Sie Dateinamen von i versuchen, die nicht im lapply Anruf nicht existiert.

Wie Sie einen gemeinsamen Index für mylist und mycsv benötigen, könnten Sie eine einfache Schleife verwenden oder wenn Sie lapply beharren würde hier dann wie etwas anprobieren Aufruf:

lapply(1:length(mylist), function(i) { 
    mylist.i <- mylist[[i]] 
    s = strsplit(mycsv[i], "_" , fixed = TRUE)[[1]] 
    d = cbind(mylist.i[, c("type", "date")], ID = s[1], Easting = s[2], Northing = s[3], mylist.i[, 3:ncol(mylist.i)]) 
    return(d) 
}) 

Bitte, dass ich ohne mycsv beachten Sie konnte teste dies nicht und habe nicht versucht, andere Teile deines Codes zu überprüfen.

+0

Vielen Dank! es funktioniert gut, wenn ich es lief :)))) – Achak

+0

Kein Problem, ich bin wirklich glücklich, dass @Ayan :) zu hören – daroczig