[R] Data Frame Operation: Replace values based on contraints

arun smartpink111 at yahoo.com
Mon Oct 7 15:16:20 CEST 2013


Hi,
Try:
datNew <- read.table(text=as.character(mydata$NATIONALITY),sep="_")
 mydata2 <- within(mydata,{NATIONALITY <- as.character(datNew[,1]);YEAR <- datNew[,2]})
 head(mydata2)
#     PROVINCE  AGE5 ZONA91OK NATIONALITY FREQUENCY YEAR
#1           1 10-14      101       SPAIN       600 1998
#501         4 30-34     4079       SPAIN      1000 1998
#1001        7 50-54      712       SPAIN       500 1998
#1501        8 40-44     8205       SPAIN      2000 1998
#2001       11 35-39    11022       SPAIN      1000 1998
#2501       14 40-44    14021       SPAIN     10000 1998


A.K.




I have a dataset in which I want to split a variable in two variables, one telling me the nationality and the other the year. 

YEAR: four digit number with the year 
NATIONALITY: code for the nationality (e.g. SPAIN, EU15DC, etc.) 


  row PROVINCE	AGE5	ZONA91OK	NATIONALITY	FREQUENCY  	YEAR 
  1	1	10-14	101	SPAIN_1998	614.000000	Nationality 
  501	4	30-34	4079	SPAIN_1998	1943.000000	Nationality 
  1001	7	50-54	712	SPAIN_1998	596.053095	Nationality 
  1501	8	40-44	8205	SPAIN_1998	2012.674138	Nationality 

so I would like to replace for instance: 
NATIONALITY: SPAIN_1998 

with 

NATIONALITY: SPAIN 

and for the corresponding rows 

YEAR: 1998 

Any help on the issue is very appreciated! 

Here is a reproducible sample: 
mydata<-  structure(list(PROVINCE = c(1L, 4L, 7L, 8L, 11L, 14L, 17L, 20L, 
24L, 28L, 30L, 33L, 36L, 41L, 44L, 46L, 48L, 3L, 6L, 8L, 10L, 
13L, 15L, 18L, 23L, 26L, 29L, 31L, 35L, 38L, 41L, 46L, 47L, 2L, 
4L, 8L, 8L, 11L, 15L, 17L, 21L, 24L, 28L, 30L, 33L, 37L, 41L, 
45L, 46L, 49L, 3L, 6L, 8L, 10L, 13L, 15L, 19L, 23L, 27L, 29L, 
32L, 36L, 39L, 43L, 46L, 48L, 2L, 5L, 8L, 8L, 12L, 15L, 18L, 
21L, 24L, 28L, 30L, 33L, 37L, 41L, 45L, 46L, 50L, 3L, 7L, 8L, 
10L, 14L, 16L, 20L, 23L, 27L, 29L, 32L, 36L, 39L, 43L, 46L, 48L, 
3L, 6L, 8L, 8L, 12L, 15L, 18L, 21L, 25L, 28L, 31L, 34L, 38L, 
41L, 45L, 46L, 50L, 3L, 7L, 8L, 11L, 14L, 17L, 20L, 23L, 27L, 
29L, 33L, 36L, 40L, 43L, 46L, 48L, 3L, 6L, 8L, 9L, 12L, 15L, 
18L, 22L, 25L, 28L, 31L, 35L, 38L, 41L, 45L, 46L, 50L, 4L, 7L, 
8L, 11L, 14L, 17L, 20L, 24L, 28L, 30L, 33L, 36L, 41L, 43L, 46L, 
48L, 3L, 6L, 8L, 10L, 13L, 15L, 18L, 22L, 26L, 28L, 31L, 35L, 
38L, 41L, 46L, 47L, 1L, 4L, 8L, 8L, 11L, 14L, 17L, 20L, 24L, 
28L, 30L, 33L, 36L, 41L, 44L, 46L, 49L, 3L, 6L), AGE5 = structure(c(1L, 
5L, 9L, 7L, 6L, 7L, 5L, 8L, 3L, 3L, 3L, 5L, 8L, 2L, 3L, 6L, 9L, 
5L, 7L, 4L, 3L, 5L, 8L, 8L, 2L, 8L, 2L, 9L, 7L, 9L, 9L, 2L, 7L, 
2L, 9L, 1L, 8L, 8L, 1L, 8L, 1L, 6L, 4L, 6L, 7L, 2L, 3L, 1L, 7L, 
5L, 6L, 9L, 5L, 6L, 8L, 9L, 3L, 4L, 3L, 4L, 4L, 1L, 3L, 1L, 2L, 
2L, 6L, 6L, 2L, 9L, 2L, 2L, 1L, 5L, 9L, 5L, 8L, 9L, 7L, 4L, 3L, 
7L, 2L, 8L, 2L, 6L, 9L, 1L, 5L, 1L, 6L, 6L, 6L, 7L, 3L, 6L, 3L, 
3L, 4L, 1L, 1L, 2L, 9L, 6L, 4L, 3L, 8L, 3L, 7L, 1L, 5L, 2L, 6L, 
6L, 8L, 5L, 9L, 5L, 6L, 2L, 3L, 1L, 4L, 8L, 9L, 8L, 1L, 5L, 1L, 
6L, 4L, 6L, 2L, 3L, 3L, 5L, 9L, 5L, 5L, 4L, 7L, 8L, 4L, 2L, 5L, 
7L, 8L, 9L, 8L, 3L, 7L, 7L, 5L, 6L, 3L, 6L, 1L, 2L, 2L, 3L, 7L, 
1L, 9L, 5L, 8L, 4L, 5L, 4L, 1L, 3L, 7L, 7L, 9L, 3L, 9L, 7L, 5L, 
7L, 8L, 1L, 4L, 4L, 6L, 1L, 8L, 7L, 8L, 6L, 8L, 4L, 3L, 4L, 5L, 
9L, 2L, 6L, 6L, 1L, 5L, 7L), .Label = c("10-14", "15-19", "20-24", 
"25-29", "30-34", "35-39", "40-44", "45-49", "50-54"), class = "factor"), 
    ZONA91OK = c(101L, 4079L, 712L, 8205L, 11022L, 14021L, 1714L, 
    20067L, 2414L, 2810L, 300799L, 3305L, 36026L, 41024L, 4405L, 
    4607L, 48015L, 308L, 610L, 8121L, 1006L, 1307L, 1511L, 1813L, 
    2308L, 2605L, 2910L, 310799L, 35026L, 3811L, 411199L, 4601L, 
    4708L, 202L, 405L, 8015L, 837L, 11033L, 1502L, 1702L, 2112L, 
    2408L, 28047L, 30015L, 3305L, 3709L, 410199L, 4511L, 1202L, 
    490699L, 3063L, 610L, 827L, 1006L, 1301L, 15036L, 1901L, 
    2310L, 2709L, 29025L, 3201L, 36008L, 390899L, 4301L, 46184L, 
    4805L, 206L, 504L, 817L, 813L, 12135L, 1519L, 1810L, 2104L, 
    2402L, 28130L, 30030L, 3305L, 3707L, 411399L, 45165L, 46181L, 
    5008L, 305L, 7026L, 803L, 1006L, 1413L, 16078L, 200999L, 
    2312L, 2712L, 29069L, 3210L, 3616L, 391199L, 4313L, 46105L, 
    4805L, 310L, 6153L, 8252L, 8205L, 1205L, 1505L, 1808L, 2110L, 
    2508L, 2810L, 311399L, 3405L, 3807L, 41024L, 4507L, 46102L, 
    500599L, 3014L, 706L, 8121L, 11028L, 14042L, 1712L, 20045L, 
    2314L, 27031L, 29901L, 33024L, 3614L, 400199L, 4307L, 46021L, 
    4805L, 3066L, 6153L, 8015L, 901L, 12040L, 1522L, 1806L, 2203L, 
    2508L, 28047L, 311099L, 35004L, 3801L, 410199L, 4515L, 46017L, 
    501199L, 407L, 7027L, 827L, 1102L, 1404L, 17155L, 200599L, 
    24089L, 2812L, 30019L, 33024L, 3612L, 41038L, 4301L, 4628L, 
    4805L, 307L, 6153L, 817L, 1004L, 1309L, 1508L, 1804L, 2206L, 
    2606L, 28130L, 310799L, 35011L, 38022L, 411399L, 4622L, 4701L, 
    1036L, 4079L, 807L, 803L, 1108L, 1410L, 1708L, 201399L, 2410L, 
    28058L, 30043L, 33024L, 3610L, 410399L, 4401L, 4621L, 490499L, 
    3059L, 6153L), NATIONALITY = structure(c(1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
    7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
    8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
    9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
    11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
    11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
    12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L), .Label = c("SPAIN_1998", 
    "EU15DC_1998", "ROE_1998", "MAGREB_1998", "SSA_1998", "LA_1998", 
    "ASIA_1998", "ROW_1998", "Total_1998", "SPAIN_1999", "EU15DC_1999", 
    "ROE_1999", "MAGREB_1999", "SSA_1999", "LA_1999", "ASIA_1999", 
    "ROW_1999", "Total_1999", "SPAIN_2000", "EU15DC_2000", "ROE_2000", 
    "MAGREB_2000", "SSA_2000", "LA_2000", "ASIA_2000", "ROW_2000", 
    "Total_2000", "SPAIN_2001", "EU15DC_2001", "ROE_2001", "MAGREB_2001", 
    "SSA_2001", "LA_2001", "ASIA_2001", "ROW_2001", "Total_2001", 
    "SPAIN_2002", "EU15DC_2002", "ROE_2002", "MAGREB_2002", "SSA_2002", 
    "LA_2002", "ASIA_2002", "ROW_2002", "Total_2002", "SPAIN_2003", 
    "EU15DC_2003", "ROE_2003", "MAGREB_2003", "SSA_2003", "LA_2003", 
    "ASIA_2003", "ROW_2003", "Total_2003", "SPAIN_2004", "EU15DC_2004", 
    "ROE_2004", "MAGREB_2004", "SSA_2004", "LA_2004", "ASIA_2004", 
    "ROW_2004", "Total_2004", "SPAIN_2005", "EU15DC_2005", "ROE_2005", 
    "MAGREB_2005", "SSA_2005", "LA_2005", "ASIA_2005", "ROW_2005", 
    "Total_2005", "SPAIN_2006", "EU15DC_2006", "ROE_2006", "MAGREB_2006", 
    "SSA_2006", "LA_2006", "ASIA_2006", "ROW_2006", "Total_2006", 
    "SPAIN_2007", "EU15DC_2007", "ROE_2007", "MAGREB_2007", "SSA_2007", 
    "LA_2007", "ASIA_2007", "ROW_2007", "Total_2007", "SPAIN_2008", 
    "EU15DC_2008", "ROE_2008", "MAGREB_2008", "SSA_2008", "LA_2008", 
    "ASIA_2008", "ROW_2008", "Total_2008", "SPAIN_2009", "EU15DC_2009", 
    "ROE_2009", "MAGREB_2009", "SSA_2009", "LA_2009", "ASIA_2009", 
    "ROW_2009", "Total_2009", "SPAIN_2010", "EU15DC_2010", "ROE_2010", 
    "MAGREB_2010", "SSA_2010", "LA_2010", "ASIA_2010", "ROW_2010", 
    "Total_2010", "SPAIN_2011", "EU15DC_2011", "ROE_2011", "MAGREB_2011", 
    "SSA_2011", "LA_2011", "ASIA_2011", "ROW_2011", "Total_2011", 
    "SPAIN_2012", "EU15DC_2012", "ROE_2012", "MAGREB_2012", "SSA_2012", 
    "LA_2012", "ASIA_2012", "ROW_2012", "Total_2012", "NOTSPAIN_1998", 
    "NOTSPAIN_1999", "NOTSPAIN_2000", "NOTSPAIN_2001", "NOTSPAIN_2002", 
    "NOTSPAIN_2003", "NOTSPAIN_2004", "NOTSPAIN_2005", "NOTSPAIN_2006", 
    "NOTSPAIN_2007", "NOTSPAIN_2008", "NOTSPAIN_2009", "NOTSPAIN_2010", 
    "NOTSPAIN_2011", "NOTSPAIN_2012", "AFRICA_1998", "AFRICA_1999", 
    "AFRICA_2000", "AFRICA_2001", "AFRICA_2002", "AFRICA_2003", 
    "AFRICA_2004", "AFRICA_2005", "AFRICA_2006", "AFRICA_2007", 
    "AFRICA_2008", "AFRICA_2009", "AFRICA_2010", "AFRICA_2011", 
    "AFRICA_2012", "DWC_1998", "DWC_1999", "DWC_2000", "DWC_2001", 
    "DWC_2002", "DWC_2003", "DWC_2004", "DWC_2005", "DWC_2006", 
    "DWC_2007", "DWC_2008", "DWC_2009", "DWC_2010", "DWC_2011", 
    "DWC_2012"), class = "factor"), FREQUENCY = c(600, 1000, 
    500, 2000, 1000, 10000, 900, 
    1000, 600, 1000, 1000, 600, 600, 900, 500, 
    1000, 1000, 10, 0, 10,0, 0, 
    0, 0, 0, 0, 20, 0, 10, 0, 1, 0, 10, 0, 0, 0, 10, 0, 
    0, 0, 0, 0, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 
    0, 0, 0, 0, 0, 10, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 2, 11, 0, 0, 0, 3, 2, 1, 5, 
    60, 1, 50, 1, 0, 0, 10, 0, 1, 1, 1, 0, 0, 0, 
    0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 213, 97, 989, 88, 
    42, 10, 17, 0, 60, 100, 60, 
    1761, 700, 600, 400, 1000, 400, 900, 700, 
    1000, 1000, 700, 900, 700, 3000, 1000, 1000, 10000, 500, 
    344, 67, 82, 87, 1, 0, 10, 0, 1, 3, 3, 0, 0, 1, 8, 3, 
    12, 0, 2, 1, 0, 4, 0, 0, 0, 0, 0, 0, 1, 100, 0, 7, 0, 0, 0, 
    0, 0, 5, 2), YEAR = c("Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality", "Nationality", "Nationality", "Nationality", 
    "Nationality")), .Names = c("PROVINCE", "AGE5", "ZONA91OK", 
"NATIONALITY", "FREQUENCY", "YEAR"), row.names = c(1L, 501L, 
1001L, 1501L, 2001L, 2501L, 3001L, 3501L, 4001L, 4501L, 5001L, 
5501L, 6001L, 6501L, 7001L, 7501L, 8001L, 8501L, 9001L, 9501L, 
10001L, 10501L, 11001L, 11501L, 12001L, 12501L, 13001L, 13501L, 
14001L, 14501L, 15001L, 15501L, 16001L, 16501L, 17001L, 17501L, 
18001L, 18501L, 19001L, 19501L, 20001L, 20501L, 21001L, 21501L, 
22001L, 22501L, 23001L, 23501L, 24001L, 24501L, 25001L, 25501L, 
26001L, 26501L, 27001L, 27501L, 28001L, 28501L, 29001L, 29501L, 
30001L, 30501L, 31001L, 31501L, 32001L, 32501L, 33001L, 33501L, 
34001L, 34501L, 35001L, 35501L, 36001L, 36501L, 37001L, 37501L, 
38001L, 38501L, 39001L, 39501L, 40001L, 40501L, 41001L, 41501L, 
42001L, 42501L, 43001L, 43501L, 44001L, 44501L, 45001L, 45501L, 
46001L, 46501L, 47001L, 47501L, 48001L, 48501L, 49001L, 49501L, 
50001L, 50501L, 51001L, 51501L, 52001L, 52501L, 53001L, 53501L, 
54001L, 54501L, 55001L, 55501L, 56001L, 56501L, 57001L, 57501L, 
58001L, 58501L, 59001L, 59501L, 60001L, 60501L, 61001L, 61501L, 
62001L, 62501L, 63001L, 63501L, 64001L, 64501L, 65001L, 65501L, 
66001L, 66501L, 67001L, 67501L, 68001L, 68501L, 69001L, 69501L, 
70001L, 70501L, 71001L, 71501L, 72001L, 72501L, 73001L, 73501L, 
74001L, 74501L, 75001L, 75501L, 76001L, 76501L, 77001L, 77501L, 
78001L, 78501L, 79001L, 79501L, 80001L, 80501L, 81001L, 81501L, 
82001L, 82501L, 83001L, 83501L, 84001L, 84501L, 85001L, 85501L, 
86001L, 86501L, 87001L, 87501L, 88001L, 88501L, 89001L, 89501L, 
90001L, 90501L, 91001L, 91501L, 92001L, 92501L, 93001L, 93501L, 
94001L, 94501L, 95001L, 95501L, 96001L, 96501L, 97001L, 97501L, 
98001L, 98501L, 99001L, 99501L), class = "data.frame")



More information about the R-help mailing list