[R] rake() error message
Michael Willmorth
MWillmorth at clearwater-research.com
Sun Jun 8 21:12:19 CEST 2014
Here is some code with a subset of 50 cases that produces a similar set
of errors to what I got with the full data set (31,690 cases):
##########
# Load "survey" package
#########
library(survey)
##########
# Create raking margins
##########
pop.m01 <- data.frame(m01=1:14,
Freq=c(1013620, 1366250, 1246162, 1380230, 1148476, 668834, 486537,
984026, 1390836, 1293432, 1460100, 1266403, 812223, 789978))
pop.m02 <- data.frame(m02=1:6,
Freq=c(9134953, 2168974, 2561303, 1222942, 42514, 176421))
pop.m03 <- data.frame(m03=1:4,
Freq=c(2347607, 4225178, 4124366, 4609957))
pop.m04 <- data.frame(m04=1:3, Freq=c(6855946, 5094143, 3357017))
pop.m05 <- data.frame(m05=1:2, Freq=c(8311759.101, 6995347.899))
pop.m06 <- data.frame(m06c=c(1, 2, 3, 4, 7, 8, 9, 10),
Freq=c(4405337, 975699, 1248910, 680163, 4729616, 1193275, 1312393,
761714))
pop.m07 <- data.frame(m07c=c(1, 2, 3, 4, 7, 8, 9, 10, 13, 14, 15, 16),
Freq=c(2457662, 733975, 1030595, 532500, 3088896, 796220, 956879,
537929, 3588395, 638779, 573829, 371448))
pop.m08 <- data.frame(m08=1:2, Freq=c(12291607, 3015500))
pop.m09 <- data.frame(m09=1:8,
Freq=c(1215876, 1002016, 1138654, 237269, 1201567, 1760980,
6551511, 2199234))
pop.m10 <- data.frame(
m10c=c(1, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
38, 39, 40, 41, 42, 43, 45, 47, 50, 53, 55),
Freq=c(346455, 177926, 231670, 210245, 128888, 120692, 291243,
150882, 192203, 171500, 105349, 90839, 184875, 178779,
166734, 210432, 185412, 112573, 99849, 99772, 45332, 40956,
26492, 24717, 161902, 178638, 181969, 230485, 209149,
129941, 109483, 475277, 297864, 360707, 293674, 179154,
154304, 2300506, 2269687, 1981318, 939737, 829522, 429975))
pop.m11 <- data.frame(m11=1:16,
Freq=c(586940, 628936, 483989, 518027, 559004, 579650, 115631,
121638, 591331, 610236, 852971, 908009, 3061203, 3490308,
1059040, 1140194))
pop.m12 <- data.frame(
m12c=c(1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16),
Freq=c(1032707, 183169, 837901, 164115, 1002380, 136274, 237269,
1057294, 144273, 1208839, 552141, 2355498, 4196013, 1572446,
626788))
pop.m13 <- data.frame(
m13=c(1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35,
36, 37, 38, 39, 40, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52,
53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63),
Freq=c(246865, 38202, 159105, 61255, 62872, 105191, 69317, 39045,
66392, 50337, 39510, 38257, 234133, 726116, 31810, 41440,
43168, 47212, 39748, 4038, 50728, 90352, 20743, 52311,
57355, 584496, 38543, 1045866, 170388, 183670, 363158,
85188, 276129, 33867, 94775, 50888, 77361, 126755, 230003,
88963, 173956, 120663, 25884, 14790, 28074, 76630, 1153368,
60013, 39119, 86528, 146917, 52673, 50150, 71815, 736424,
33645, 19395, 6551511))
pop.m14 <- data.frame(
m14c=c(1, 3, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33,
35, 37, 39, 41, 43, 45, 49, 51, 53, 55, 57, 59, 63, 65, 67,
69, 71, 73, 75, 77, 79, 83, 87, 89, 91, 93, 95, 97, 99, 101,
103, 105, 107, 109, 111, 113, 115, 117, 119, 120, 121, 123,
125, 126),
Freq=c(246865, 38202, 159105, 61255, 62872, 105191, 69317, 39045,
66392, 50337, 39510, 38257, 234133, 726116, 31810, 41440,
43168, 47212, 39748, 4038, 50728, 90352, 20743, 52311,
57355, 584496, 38543, 1045866, 170388, 183670, 363158,
85188, 276129, 33867, 94775, 50888, 77361, 126755, 230003,
88963, 173956, 120663, 25884, 14790, 28074, 76630, 1153368,
60013, 39119, 86528, 146917, 52673, 50150, 71815, 442624,
293800, 33645, 19395, 2351378, 4200133))
pop.m15 <- data.frame(
m15c=c(1, 3, 5, 8, 22, 26, 29, 36, 43, 50, 57, 64, 71, 78, 85, 92,
99, 102, 104, 106, 113, 120, 127, 134, 141, 148, 155, 169,
176, 183, 190, 192, 194, 197, 204, 218, 225, 229, 232, 236,
239, 246, 250, 253, 260, 267, 274, 288, 292, 302, 309, 316,
319, 321, 323, 327, 330, 337, 344, 351, 356, 358, 362, 365,
372, 379, 386, 393, 400, 407, 414, 418, 421, 428, 435, 437,
440),
Freq=c(83110, 78832, 84923, 38202, 98781, 60324, 61255, 62872,
105191, 69317, 39045, 66392, 50337, 39510, 38257, 234133,
318650, 259522, 147944, 31810, 41440, 43168, 47212, 39748,
4038, 50728, 90352, 20743, 52311, 57355, 181148, 197737,
205611, 38543, 1045866, 170388, 113287, 70383, 234455,
128703, 85188, 187670, 88459, 33867, 94775, 50888, 77361,
82095, 44660, 230003, 88963, 74361, 66803, 32792, 76814,
43849, 25884, 14790, 28074, 60060, 16570, 749637, 403731,
60013, 39119, 86528, 146917, 52673, 50150, 71815, 469711,
266713, 33645, 19395, 2300506, 2269687, 1981318))
pop.m16 <- data.frame(
m16c=c(1, 2, 3, 7, 8, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29,
30, 31, 33, 35, 37, 39, 41, 43, 45, 49, 51, 53, 55, 56, 57,
59, 63, 65, 66, 67, 68, 69, 71, 73, 75, 77, 79, 83, 87, 89,
91, 92, 93, 95, 97, 99, 101, 102, 103, 105, 107, 109, 111,
113, 115, 117, 119, 120, 121, 123, 125, 126),
Freq=c(117698, 129167, 38202, 77086, 82019, 61255, 62872, 105191,
69317, 39045, 66392, 50337, 39510, 38257, 234133, 345807,
380309, 31810, 41440, 43168, 47212, 39748, 4038, 50728,
90352, 20743, 52311, 57355, 277767, 306729, 38543, 1045866,
170388, 90719, 92951, 172428, 190730, 85188, 276129, 33867,
94775, 50888, 77361, 126755, 230003, 88963, 84847, 89109,
120663, 25884, 14790, 28074, 37793, 38837, 1153368, 60013,
39119, 86528, 146917, 52673, 50150, 71815, 348252, 388172,
33645, 19395, 3061203, 3490308))
##########
# Create test data
##########
data473t <- structure(list(UNIQUEID = c(1012014089180, 1022013238298,
1042013241039,
1052013004507, 1072013249467, 1082014008028, 1102013092794,
1122013259076,
1132013100359, 1152013102251, 1162013104498, 1182013273804,
1192013114724,
1212014031349, 1232013123973, 1242014037607, 1262013294983,
1272013033924,
1292013037388, 1312013148206, 1322013154681, 1342013161674,
1352013315374,
1372013177797, 1382013322571, 1402014055466, 1412013328023,
1432014101258,
1442013334151, 1462013338479, 1482014069742, 1492013206558,
1512013212611,
1522014076485, 1542013358421, 1562014082968, 1572013366565,
2012013001961,
2012014000821, 2022013048057, 2022013086770, 2032014011659,
2032013103103,
2042013120142, 2052014014716, 2052013025234, 2052014005041,
2062013097623,
2062013080902, 2082013084666), wt2ctr = c(325.795101348449,
70.583847491458,
53.5737152891645, 87.3420548804146, 97.7774085298657, 55.7519795014608,
54.9473333894856, 129.432485616598, 83.6325849328425, 236.156871839518,
93.8174699019665, 155.458343257822, 1120.17376408564, 101.74176747363,
160.940587929738, 121.569937640962, 134.943277024256, 596.666876414474,
741.828100861109, 130.455392502077, 96.6389182239747, 26.4247473045969,
105.630503418385, 43.9170425271054, 23.0636168826181, 95.6396766959478,
49.0569303152236, 414.236075685919, 112.887129394625, 154.760416704869,
569.501845222299, 46.3700705303052, 88.3535743322299, 279.097109402007,
232.787696027288, 180.928665939598, 103.020256291952, 106.979086343284,
1063.72009129969, 700.758625136715, 898.276244958075, 695.86666592582,
871.431921393644, 387.322293202373, 701.031310697548, 420.535216267998,
649.03695813162, 1393.79948753652, 1282.55016779916, 4924.88352884971
), m02 = c(1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 2L, 4L), m03 = c(4L, 3L, 3L, 1L, 2L, 3L, 3L, 4L,
3L, 2L, 2L, 4L, 4L, 2L, 4L, 4L, 2L, 4L, 1L, 2L, 3L, 2L, 2L, 4L,
4L, 2L, 3L, 4L, 3L, 4L, 3L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 3L, 4L,
3L, 2L, 3L, 4L, 2L, 3L, 2L, 2L, 4L, 4L), m04 = c(1L, 3L, 3L,
3L, 2L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L,
2L, 1L, 3L, 1L, 2L, 3L, 1L, 3L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L,
3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 1L, 3L, 1L),
m05 = c(2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 2L, 1L, 1L), m08 = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), m09 = c(1L, 3L, 5L, 5L, 3L, 6L, 5L, 3L, 5L, 5L, 5L, 5L,
7L, 6L, 6L, 6L, 6L, 6L, 8L, 3L, 4L, 4L, 3L, 2L, 2L, 3L, 3L,
3L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L,
3L, 4L, 5L, 5L, 5L, 6L, 6L, 8L), m01 = c(11L, 12L, 5L, 14L,
9L, 12L, 12L, 5L, 14L, 6L, 14L, 6L, 13L, 11L, 4L, 13L, 8L,
11L, 14L, 7L, 14L, 5L, 14L, 4L, 12L, 6L, 14L, 10L, 10L, 12L,
5L, 13L, 6L, 4L, 12L, 4L, 12L, 10L, 10L, 2L, 11L, 3L, 10L,
4L, 2L, 1L, 11L, 1L, 11L, 12L), m11 = c(2L, 6L, 9L, 10L,
6L, 12L, 10L, 5L, 10L, 9L, 10L, 9L, 14L, 12L, 11L, 12L, 12L,
12L, 16L, 5L, 8L, 7L, 6L, 3L, 4L, 5L, 6L, 6L, 4L, 4L, 3L,
4L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 4L, 5L, 6L, 7L, 9L, 9L,
10L, 11L, 12L, 16L), m13 = c(15L, 45L, 17L, 10L, 22L, 14L,
18L, 33L, 57L, 58L, 1L, 42L, 63L, 53L, 14L, 40L, 44L, 36L,
30L, 34L, 9L, 54L, 55L, 51L, 51L, 12L, 6L, 38L, 50L, 35L,
28L, 26L, 19L, 37L, 15L, 7L, 2L, 2L, 7L, 59L, 28L, 38L, 33L,
4L, 1L, 10L, 42L, 14L, 60L, 30L), m06c = c(7L, 7L, 1L, 7L,
7L, 10L, 7L, 1L, 7L, 1L, 7L, 1L, 9L, 7L, 1L, 7L, 7L, 7L,
7L, 1L, 7L, 1L, 7L, 4L, 7L, 1L, 7L, 7L, 7L, 7L, 1L, 7L, 1L,
1L, 7L, 1L, 7L, 7L, 7L, 1L, 7L, 1L, 7L, 1L, 1L, 2L, 7L, 1L,
8L, 10L), m07c = c(7L, 13L, 13L, 13L, 1L, 16L, 13L, 13L,
13L, 13L, 13L, 13L, 15L, 7L, 7L, 13L, 1L, 7L, 13L, 13L, 13L,
13L, 13L, 10L, 13L, 13L, 13L, 7L, 7L, 13L, 13L, 13L, 13L,
7L, 13L, 7L, 13L, 7L, 7L, 1L, 7L, 7L, 7L, 7L, 1L, 2L, 7L,
1L, 8L, 16L), m10c = c(4L, 19L, 33L, 35L, 16L, 40L, 33L,
19L, 35L, 34L, 35L, 34L, 47L, 39L, 39L, 41L, 36L, 39L, 55L,
21L, 28L, 26L, 21L, 11L, 12L, 20L, 21L, 17L, 10L, 12L, 12L,
13L, 6L, 4L, 5L, 4L, 5L, 3L, 3L, 8L, 11L, 17L, 17L, 25L,
30L, 29L, 32L, 36L, 39L, 53L), m12c = c(1L, 5L, 9L, 9L, 5L,
12L, 9L, 5L, 9L, 9L, 9L, 9L, 14L, 11L, 11L, 11L, 11L, 11L,
15L, 5L, 7L, 7L, 5L, 4L, 3L, 5L, 5L, 5L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 5L, 5L, 7L, 9L, 10L,
9L, 11L, 12L, 16L), m14c = c(29L, 89L, 33L, 19L, 43L, 27L,
35L, 65L, 113L, 115L, 1L, 83L, 126L, 105L, 27L, 79L, 87L,
71L, 59L, 67L, 17L, 107L, 109L, 101L, 101L, 23L, 11L, 75L,
99L, 69L, 55L, 51L, 37L, 73L, 29L, 13L, 3L, 3L, 13L, 117L,
55L, 75L, 65L, 7L, 1L, 19L, 83L, 27L, 120L, 59L), m15c = c(99L,
309L, 113L, 64L, 148L, 92L, 120L, 225L, 393L, 400L, 5L, 288L,
439L, 365L, 92L, 274L, 302L, 246L, 204L, 232L, 57L, 372L,
379L, 351L, 351L, 78L, 36L, 260L, 344L, 239L, 190L, 176L,
127L, 253L, 99L, 43L, 8L, 8L, 43L, 407L, 190L, 260L, 225L,
22L, 1L, 64L, 288L, 92L, 414L, 204L), m16c = c(30L, 89L,
33L, 19L, 43L, 27L, 35L, 65L, 113L, 115L, 2L, 83L, 126L,
105L, 27L, 79L, 87L, 71L, 59L, 67L, 17L, 107L, 109L, 101L,
102L, 23L, 11L, 75L, 99L, 69L, 55L, 51L, 37L, 73L, 30L, 13L,
3L, 3L, 13L, 117L, 56L, 75L, 66L, 7L, 1L, 19L, 83L, 27L,
120L, 59L), ststr = c(361011L, 361021L, 361041L, 361051L,
361071L, 361081L, 361101L, 361121L, 361131L, 361151L, 361161L,
361181L, 361191L, 361211L, 361231L, 361241L, 361261L, 361272L,
361291L, 361311L, 361321L, 361341L, 361351L, 361371L, 361381L,
361401L, 361411L, 361431L, 361442L, 361461L, 361481L, 361491L,
361511L, 361522L, 361541L, 361561L, 361571L, 362012L, 362019L,
362029L, 362029L, 362039L, 362039L, 362049L, 362059L, 362059L,
362059L, 362069L, 362069L, 362089L)), .Names = c("UNIQUEID",
"wt2ctr", "m02", "m03", "m04", "m05", "m08", "m09", "m01", "m11",
"m13", "m06c", "m07c", "m10c", "m12c", "m14c", "m15c", "m16c",
"ststr"), row.names = c(1L, 647L, 1294L, 1941L, 2587L, 3234L,
3881L, 4528L, 5174L, 5821L, 6468L, 7114L, 7761L, 8408L, 9055L,
9701L, 10348L, 10995L, 11641L, 12288L, 12935L, 13581L, 14228L,
14875L, 15522L, 16168L, 16815L, 17462L, 18109L, 18755L, 19402L,
20049L, 20695L, 21342L, 21989L, 22636L, 23282L, 23929L, 24576L,
25222L, 25869L, 26516L, 27162L, 27809L, 28456L, 29103L, 29749L,
30396L, 31043L, 31690L), class = "data.frame")
##########
# Check first few cases of test data
##########
head(testd473t)
##########
# Create survey object
##########
sdes473t <- svydesign(id=~UNIQUEID, strata=~ststr, weights=~wt2ctr,
data=data473t)
##########
# Create new survey object with raked weights
##########
d473.raked <- rake(sdes473t,
sample=list(~m01, ~m02, ~m03, ~m04, ~m05, ~m06c, ~m07c, ~m08, ~m09,
~m10c, ~m11, ~m12c, ~m13, ~m14c, ~m15c, ~m16c),
population=list(pop.m01, pop.m02, pop.m03, pop.m04, pop.m05,
pop.m06, pop.m07, pop.m08, pop.m09, pop.m10,
pop.m11, pop.m12, pop.m13, pop.m14, pop.m15,
pop.m16))
-----Original Message-----
From: Michael Willmorth
Sent: Saturday, June 07, 2014 9:23 AM
To: r-help at R-project.org
Subject: rake() error message
I'm teaching myself how to use rake() in the R "survey" package, using
Thomas Lumley's "Complex Surveys" book. Working with one of my data
sets, I received an error that has me stumped.
Here is the code I used:
d473.raked <- rake(sdes473,
sample=list(~m01, ~m02, ~m03, ~m04, ~m05, ~m06c, ~m07c, ~m08, ~m09,
~m10c, ~m11, ~m12c, ~m13, ~m14c, ~m15c, ~m16c),
population=list(pop.m01, pop.m02, pop.m03, pop.m04, pop.m05,
pop.m06, pop.m07, pop.m08, pop.m09, pop.m10,
pop.m11, pop.m12, pop.m13, pop.m14, pop.m15,
pop.m16))
Here is the error I received:
Error in array(dim = extent, dimnames = namelist) : vector is too large
In addition: Warning messages:
1: In ngroup * (as.integer(index) - one) :
NAs produced by integer overflow
2: In group + ngroup * (as.integer(index) - one) :
NAs produced by integer overflow
3: In ngroup * nlevels(index) : NAs produced by integer overflow
Any guidance on this would be much appreciated.
Thanks,
Michael Willmorth
More information about the R-help
mailing list