[R] parsing a complex file

Glenn Schultz glennmschultz at me.com
Sat Aug 27 22:56:59 CEST 2016


All,

I have a complex file I would like to parse in R a sample is described below

The header is 1:200 and the detail is 1 to 200.  I have written code to parse the file so far.  As follows:

numchar <- nchar(x = data, type = "chars")
start <- c(seq(1, numchar, 398))
end <- c(seq(398, numchar, 398))
quartile <- NULL
final <- str_sub(data, start[1:length(start)], end[1:length(end)])
quartile <- append(quartile, final)
write(quartile, Result)
data2 <- readLines(Result)

The function gets me to data2.  All is well so far. However, I need to send the header which begins with 1 at byte location 1 to a file and the detail which begins with 2 at byte location 1 to another file.  When I look at data2 in RStudio  I see the following.  The file is 185 meg, I have the lines but I am stuck as to the next step.  Any ideas are appreciated.

Glenn


dput of the data

"1176552 CL20031031367RBV319920901                                                                                                                                                                      217655208875{08875{08875{08875{08875{08875{22D22D22D22D22D22D13C13C13C13C13C13C0000604000{0000604000{0000604000{0000604000{0000604000{0000604000{36{36{36{36{36{36{08500{08500{08500{08500{08500{08500{1254240 CL20031031371KLV120020201                                                                                                                                                                      225424007484{07250{07375{07500{07625{08625{33F06H33H33I34{34A02A01I02{02{02A03B0001121957C0000123500{0000920000{0001280000{0001741000{0003849000{35I30{36{36{36{36{07000{07000{07000{07000{07000{07000{1254253 CL20031031371KMA620020301                                                                                                                                                                      225425306715{06250{06500{06750{06875{07000{33C23G33C33I34{34A02{01I02{02{02A02C0000946646A0000350000{0000850000{0001030000{0001205000{0001300000{35H30{36{36{36{36{06000{06000{06000{06000{06000{06000{1259455 CL20031031371RE4420020501                                                                                                                                                                      225945507045{06750{06875{07000{07250{07375{34{28B34A34B34B34C01H01G01H01H01H02C0000934444E0000360000{0000765000{0000995000{0001384000{0002184000{35I30{36{36{36{36{06500{06500{06500{06500{06500{06500{1261060 CI20031031371S5V219940101                                                                                                                                                                      226106006637{06500{06500{06625{06750{06875{05B00C04H05I06B06B11H11G11G11H11H11I0001169090I0000650000{0000950000{0001250000{0001328000{0001900000{18{18{18{18{18{18{06000{06000{06000{06000{06000{06000{1335271 CI20031031375HMU519960101                                                                                                                                                                      233527107500{07500{07500{07500{07500{07500{08B06B08E08F08F08F09D09D09D09D09E09E0000717375{0000464000{0000550000{0000770000{0001085500{0001085500{18{18{18{18{18{18{07000{07000{07000{07000{07000{07000{1440840 CL20031031380HV9519981101                                                                                                                                                                      244084006707{06500{06625{06750{06875{06875{27D03C28C29H30{30A06{05I06{06{06{06A0000615172I0000250000{0000621000{0000673000{0000750000{0000791000{36{36{36{36{36{36{06000{06000{06000{06000{06000{06000{1521993 CI20031031384E3A620000101                                                                                                                                                                      252199306937{06875{06875{06875{07000{07000{12H02H12H13{13D13E04E04E04E04E04F04F0001129428F0000700000{0000955000{0001000000{0002087000{0002087000{18{18{18{18{18{18{06500{06500{06500{06500{06500{06500{1538080 CL20031031384YXH420000501                                                                                                                                                                      253808008875{08875{08875{08875{08875{08875{31I31I31I31I31I31I04A04A04A04A04A04A0001419300{0001419300{0001419300{0001419300{0001419300{0001419300{36{36{36{36{36{36{07000{07000{07000{07000{07000{07000{1659123 CI20031031390XG8720020801                                                                                                                                                                      265912306909{06750{06750{06875{07000{07125{16E15I16C16E16F16F01E01D01D01E01E01G0000998541G0000162000{0000792000{0001156500{0001600000{0001990000{18{18{18{18{18{18{06000{06000{06000{06000{06000{06000{"


dput data2
c("1176552 CL20031031367RBV319920901 217655208875{08875{08875{08875{08875{08875{22D22D22D22D22D22D13C13C13C13C13C13C0000604000{0000604000{0000604000{0000604000{0000604000{0000604000{36{36{36{36{36{36{08500{08500{08500{08500{08500{08500{", 
"1254240 CL20031031371KLV120020201 225424007484{07250{07375{07500{07625{08625{33F06H33H33I34{34A02A01I02{02{02A03B0001121957C0000123500{0000920000{0001280000{0001741000{0003849000{35I30{36{36{36{36{07000{07000{07000{07000{07000{07000{", 
"1254253 CL20031031371KMA620020301 225425306715{06250{06500{06750{06875{07000{33C23G33C33I34{34A02{01I02{02{02A02C0000946646A0000350000{0000850000{0001030000{0001205000{0001300000{35H30{36{36{36{36{06000{06000{06000{06000{06000{06000{", 
"1259455 CL20031031371RE4420020501 225945507045{06750{06875{07000{07250{07375{34{28B34A34B34B34C01H01G01H01H01H02C0000934444E0000360000{0000765000{0000995000{0001384000{0002184000{35I30{36{36{36{36{06500{06500{06500{06500{06500{06500{", 
"1261060 CI20031031371S5V219940101 226106006637{06500{06500{06625{06750{06875{05B00C04H05I06B06B11H11G11G11H11H11I0001169090I0000650000{0000950000{0001250000{0001328000{0001900000{18{18{18{18{18{18{06000{06000{06000{06000{06000{06000{", 
"1335271 CI20031031375HMU519960101 233527107500{07500{07500{07500{07500{07500{08B06B08E08F08F08F09D09D09D09D09E09E0000717375{0000464000{0000550000{0000770000{0001085500{0001085500{18{18{18{18{18{18{07000{07000{07000{07000{07000{07000{", 
"1440840 CL20031031380HV9519981101 244084006707{06500{06625{06750{06875{06875{27D03C28C29H30{30A06{05I06{06{06{06A0000615172I0000250000{0000621000{0000673000{0000750000{0000791000{36{36{36{36{36{36{06000{06000{06000{06000{06000{06000{", 
"1521993 CI20031031384E3A620000101 252199306937{06875{06875{06875{07000{07000{12H02H12H13{13D13E04E04E04E04E04F04F0001129428F0000700000{0000955000{0001000000{0002087000{0002087000{18{18{18{18{18{18{06500{06500{06500{06500{06500{06500{", 
"1538080 CL20031031384YXH420000501 253808008875{08875{08875{08875{08875{08875{31I31I31I31I31I31I04A04A04A04A04A04A0001419300{0001419300{0001419300{0001419300{0001419300{0001419300{36{36{36{36{36{36{07000{07000{07000{07000{07000{07000{", 
"1659123 CI20031031390XG8720020801 265912306909{06750{06750{06875{07000{07125{16E15I16C16E16F16F01E01D01D01E01E01G0000998541G0000162000{0000792000{0001156500{0001600000{0001990000{18{18{18{18{18{18{06000{06000{06000{06000{06000{06000{"
) 

Data 2

 [1] "1176552 CL20031031367RBV319920901 217655208875{08875{08875{08875{08875{08875{22D22D22D22D22D22D13C13C13C13C13C13C0000604000{0000604000{0000604000{0000604000{0000604000{0000604000{36{36{36{36{36{36{08500{08500{08500{08500{08500{08500{"
[2] "1254240 CL20031031371KLV120020201 225424007484{07250{07375{07500{07625{08625{33F06H33H33I34{34A02A01I02{02{02A03B0001121957C0000123500{0000920000{0001280000{0001741000{0003849000{35I30{36{36{36{36{07000{07000{07000{07000{07000{07000{" 


More information about the R-help mailing list