[R] Can I index a dataframe with a reference from/to a second dataframe?

jim holtman jholtman at gmail.com
Sat Feb 9 00:48:34 CET 2008


try this:

Bos$type <- tree$Type[match(Bos$spp, tree$spp)]



On Feb 8, 2008 3:17 PM, Thompson, David (MNR)
<David.John.Thompson at ontario.ca> wrote:
> Hello,
>
> I am unable to figure out how to code a new column in a data frame based
> on an existing column that matches a column in a reference data frame,
> in a relational-db fashion. I would like this to maintain a minimum set
> of reference tables that may be reused over several similar datasets.
>
> Specifically, I have two data frames as listed below, 'Bos' and 'tree.'
> For each case in 'Bos' I want to look up the matching 'spp' code in
> 'tree' and insert the associated 'type' code into a new 'type' column in
> 'Bos' as in:
>
>    # add type and keep factors from reference list
>    Bos$type <- tree[as.character(tree$spp)==as.character(Bos$spp),
> 'type']
>    Bos$keep <- tree[tree$spp==Bos$spp, 'keep']
>
> And I know I have seen this before but, can't remember where. I have
> filtered through many of the threads referencing
> 'as.numeric(levels(Bos$spp))[as.integer(Bos$spp)]' handling of factors,
> any kind of sql reference I could think of, anything 'ODBC'-ish but, I
> think this may be an indexing issue. I am trying to compare elements of
> two different sized (list) objects (different type objects even?) and
> not cycling through Bos$spp to find matches in tree$spp as expected, . .
> . , has this an apply solution?
>
> My data frames:
> > dput(head(Bos, 30))
> structure(list(oplt = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), rplt = c(3,
> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> 0, 0, 0, 0, 0, 0, 0, 0), tree = c(32, 101, 102, 103, 104, 105,
> 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
> 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129), spp =
> structure(c(10L,
> 10L, 12L, 14L, 10L, 10L, 14L, 10L, 15L, 10L, 9L, 3L, 10L, 10L,
> 12L, 10L, 13L, 12L, 12L, 10L, 12L, 10L, 10L, 8L, 5L, 2L, 10L,
> 2L, 12L, 10L), .Label = c("AW", "BD", "BE", "BF", "BW", "BY",
> "CB", "HE", "IW", "MH", "MR", "OR", "PO", "SW", "SA"), class =
> "factor"),
>    dbh = c(12.1, 10.1, 63.3, 9, 7.1, 12.1, 13.9, 6.3, 6.1, 7.9,
>    5.1, 9.8, 7.1, 18.7, 44.2, 28.7, 19.8, 28, 46.6, 9, 61.6,
>    3.3, 9.1, 8.7, 5.8, 3.1, 11.1, 12.3, 28, 8.6), cc = structure(c(2L,
>    2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
>    2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L), .Label =
> c("dom",
>    "sup"), class = "factor"), ba = c(114.990145103020,
> 80.1184666481737,
>    3147.0040469356, 63.6172512351933, 39.5919214168654,
> 114.990145103020,
>    151.746779150021, 31.1724531052447, 29.2246656600190,
> 49.0166993776348,
>    20.4282062299676, 75.429639612691, 39.5919214168654,
> 274.645883758454,
>    1534.38526793979, 646.924613208844, 307.907495978336,
> 615.7521601036,
>    1705.53923570736, 63.6172512351933, 2980.24045490142,
> 8.55298599939821,
>    65.0388219109427, 59.4467869875528, 26.4207942166902,
> 7.54767635024948,
>    96.7689077121996, 118.8228881404, 615.7521601036, 58.0880481648753
>    )), .Names = c("oplt", "rplt", "tree", "spp", "dbh", "cc",
> "ba"), row.names = c("1", "2", "3", "4", "5", "6", "7", "8",
> "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
> "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30"
> ), class = "data.frame")
>
> > dput(tree)
> structure(list(spp = structure(1:33, .Label = c("AB", "AS", "AW",
> "BD", "BE", "BF", "BW", "BY", "CA", "CB", "CC", "CE", "DL", "DP",
> "EA", "HE", "IW", "LC", "MH", "MM", "MR", "OR", "PO", "PR", "PV",
> "PW", "RS", "SA", "SB", "SM", "SW", "VC", "VL"), class = "factor"),
>    spp.orig = structure(c(1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 12L,
>    10L, 13L, 11L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
>    23L, 24L, 25L, 26L, 27L, 28L, 31L, 29L, 30L, 32L, 33L, 34L
>    ), .Label = c("AB", "AMEHUM", "AMESPP", "AW", "BD", "BE",
>    "BF", "BW", "BY", "CB", "CE", "CORALT", "CORCOR", "DIELON",
>    "DIRPAL", "EA", "HE", "IW", "LONCAN", "MH", "MM", "MR", "OR",
>    "PO", "PR", "PRUVIR", "PW", "RIBSPP", "SB", "SM", "SORAME",
>    "SW", "VIBACE", "VIBALN"), class = "factor"), OPL = structure(c(15L,
>
>    7L, 14L, 29L, 13L, 2L, 9L, 8L, 10L, 23L, 1L, 28L, 11L, 12L,
>    31L, 30L, 17L, 16L, 5L, 6L, 4L, 25L, 22L, 20L, 24L, 21L,
>    26L, 27L, 19L, 3L, 18L, 32L, 33L), .Label = c("HCORCAN",
>    "WABIBAL", "WACEPEN", "WACERUB", "WACESAS", "WACESPI", "WAMESPP",
>    "WBETALL", "WBETPAP", "WCORALT", "WDIELON", "WDIRPAL", "WFAGGRA",
>    "WFRAAME", "WFRANIG", "WLONCAN", "WOSTVIR", "WPICGLA", "WPICMAR",
>    "WPINRES", "WPINSTR", "WPOPTRE", "WPRUSER", "WPRUVIV", "WQUERUB",
>    "WRIBAME", "WSORAME", "WTHUOCC", "WTILAME", "WTSUCAN", "WULMAME",
>    "WVIBACE", "WVIBLAO"), class = "factor"), form = c(1.1, 1.2,
>    1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.2, 1.1, 2, 1.1, 1.2, 1.2,
>    1.1, 1.1, 1.1, 1.2, 1.1, 1.2, 1.1, 1.1, 1.1, 1.1, 1.2, 1.1,
>    1.2, 1.1, 1.1, 1.2, 1.1, 1.2, 1.2), Type = structure(c(2L,
>    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
>    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
>    2L, 2L), .Label = c("H", "W"), class = "factor"), keep =
> structure(c(1L,
>    1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
>    2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
>    1L, 1L), .Label = c("no", "yes"), class = "factor"), Sname =
> structure(c(15L,
>    6L, 14L, 29L, 13L, 1L, 8L, 7L, 9L, 23L, 10L, 28L, 11L, 12L,
>    31L, 30L, 17L, 16L, 4L, 5L, 3L, 25L, 22L, 20L, 24L, 21L,
>    26L, 27L, 19L, 2L, 18L, 32L, 33L), .Label = c("Abies balsamea",
>    "Acer pensylvanicum", "Acer rubrum", "Acer saccharum", "Acer
> spicatum",
>    "Amelanchier", "Betula alleghaniensis", "Betula papyrifera",
>    "Cornus alternifolia", "Cornus canadensis", "Diervilla lonicera",
>    "Dirca palustris", "Fagus grandifolia", "Fraxinus americana",
>    "Fraxinus nigra", "Lonicera canadensis", "Ostrya virginiana",
>    "Picea glauca", "Picea mariana", "Pinus resinosa", "Pinus strobus",
>    "Populus tremuloides", "Prunus serotina", "Prunus virginiana",
>    "Quercus rubra", "Ribes ", "Sorbus americana", "Thuja occidentalis",
>
>    "Tilia americana", "Tsuga canadensis", "Ulmus americana",
>    "Viburnum acerifolium", "Viburnum lantanoides"), class = "factor"),
>    Cname = structure(c(7L, 27L, 30L, 2L, 3L, 6L, 31L, 33L, 1L,
>    8L, 10L, 15L, 11L, 21L, 4L, 14L, 20L, 17L, 18L, 23L, 25L,
>    24L, 29L, 26L, 12L, 16L, 13L, 5L, 9L, 28L, 32L, 22L, 19L), .Label =
> c("Alternate-leaved Dogwood",
>    "American Basswood", "American Beech", "American Elm", "American
> Mountain-ash",
>    "Balsam Fir", "Black Ash", "Black Cherry", "Black Spruce",
>    "Bunchberry", "Bush Honeysuckle", "Choke Cherry", "Currant",
>    "Eastern Hemlock", "Eastern White Cedar", "Eastern White Pine",
>    "Fly Honeysuckle", "Hard Maple", "Hobblebush", "Ironwood",
>    "Leatherwood", "Maple-leaved Viburnum", "Mountain Maple",
>    "Northern Red Oak", "Red Maple", "Red Pine", "Serviceberry",
>    "Striped Maple", "Trembling Aspen", "White Ash", "White Birch",
>    "White Spruce", "Yellow Birch"), class = "factor")), .Names =
> c("spp",
> "spp.orig", "OPL", "form", "Type", "keep", "Sname", "Cname"), row.names
> = c("1",
> "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
> "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
> "26", "27", "28", "29", "30", "31", "32", "33", "34"), class =
> "data.frame")
>
> Thanks, DaveT.
> *************************************
> Silviculture Data Analyst
> Ontario Forest Research Institute
> Ontario Ministry of Natural Resources
> david.john.thompson at ontario.ca
> http://ofri.mnr.gov.on.ca
>
> ______________________________________________
> R-help at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>



-- 
Jim Holtman
Cincinnati, OH
+1 513 646 9390

What is the problem you are trying to solve?



More information about the R-help mailing list