When purchasing historical data from a data vendor, you'll usually get it in CSV formats, JSON formats, or proprietary formats such as NX2. Since most data vendors change their data formats all the time, there are innumerable format variants around. Most formats can be directly converted to the Zorro data format with the dataParse or dataParseJSON functions and a supplied format string. Sometimes a two-step process is needed for modifying, splitting, or merging data fields that are very different to the target format. Some data has poor quality and requires detecting and removing bad records. If the data file contains multiple symbols, they must be parsed out in a loop and stored in multiple history files.
Below you'll find a collection of example scripts for non-trivial data conversion tasks. Example records of the source format are displayed in the initial comments. If you don't want to write a conversion script yourself, contact info@opgroup.de for hiring our data conversion service.
// BitMex crypto to t1 //id, exchange, market, tradeString, price, quantity, total, time_local, type //12311611 BMEX XBT/USD 14838153 957.57 940 900115.8 2016-12-31 00:01:29 BUY string Format = "+0||||f|||%Y-%m-%d %H:%M:%S|s"; string InName = "CSV\\BMEX_XBTUSD_20170101_20171231.txt"; string OutName = "XBTUSD_2017.t1"; void main() { dataNew(1,0,0); int Records = dataParse(1,Format,InName); printf("\n%d records read",Records); for(i=0; i<Records; i++) { T1* t1 = dataAppendRow(2,2); t1->time = dataVar(1,i,0); t1->fVal = dataVar(1,i,1); string buysell = dataStr(1,i,2); if(buysell[0] != 'B') t1->fVal = -t1->fVal; // negative bid quote // display progress bar and check [Stop] button if(!progress(100*i/Records,0)) break; } if(Records) dataSave(2,OutName); }
// Bittrex multiple cryptos to M1 t6 //id,exchange,symbol,unix date,price,amount,sell //1667839,BT,SLSBTC,1475314100533,0.00087836,0.85360253,false string Format = "+0,,,%t,f,f,s"; // merge ask + bid to a single T6 record with spread void addToT6(T6* Tick,DATE Time,float Price,float Vol) { static var Bid = 0, Ask = 0; if(Price < 0) { // bid? convert to ask & spread Bid = Price; Price = ifelse(Ask > 0,Ask,-Bid); } else Ask = Price; if(Tick->time == 0.) { // new record Tick->time = Time; Tick->fOpen = Tick->fHigh = Tick->fLow = Price; } Tick->fClose = Price; Tick->fLow = min(Tick->fLow,Price); Tick->fHigh = max(Tick->fHigh,Price); if(Bid < 0) Tick->fVal = max(0.,Price + Bid); Tick->fVol += Vol; } void main() { var N = assetList("AssetsBittrex"); string Name; while(Name = loop(Assets)) { string InName = strf("CSV\\%s.csv",Name); string OutName = strf("History\\%s.t6",Name); dataNew(1,0,0); int Records = dataParse(1,Format,InName); printf("\n%s: %d records read",Name,Records); dataNew(2,0,0); T6* Tick = dataAppendRow(2,7); int Minutes = 0; for(i=0; i<Records-1; i++) { var Time = dataVar(1,i,0), var Price = dataVar(1,i,1), var Vol = dataVar(1,i,2); string Sell = dataStr(1,i,3); if(Sell[0] == 't') Price = -Price; // bid quote if(Tick->time - 1./1440 > Time) { // 1 min distance Tick = dataAppendRow(2,7); Minutes++; } addToT6(Tick,Time,Price,Vol); } printf(", %d minutes",Minutes); if(Minutes) dataSave(2,OutName); } }
// Tickdata .csv to .t1, ask/bid //DateTime,Ask,Bid,Ask Volume,Bid Volume //04.05.2003 21:00:00.914,1.12354,1.12284,0,0 string Format = "+%d.%m.%Y %H:%M:%S,f1,f2"; string InName = "CSV\\tick_mxnusd.csv"; string OutName = "History\\MXNUSD_%.t1"; void main() { int i,Year; for(Year = 2010; Year <= 2022; Year++) { dataNew(1,0,0); // read only records from current year int Records = dataParse(1,Format,InName,strf(".%i",Year)); printf("\n%i - %d rows read",Year,Records); if(!Records) return; dataNew(2,0,0); for(i=0; i<Records; i++) { var Time = dataVar(1,i,0); var Price = dataVar(1,i,2); // bid price T2* Quote = dataAppendRow(2,2); Quote->time = Time; Quote->fVal = -Price; Price = dataVar(1,i,1); // ask price Quote = dataAppendRow(2,2); Quote->time = Time; Quote->fVal = Price; } dataSave(2,strf(OutName,Year)); // store year dataset printf("\n%s saved",strf(OutName,Year)); } }
// t2 order book data to t1 tick data string InName = "History\\BTCUSD_%i.t2"; string OutName = "History\\BTCUSD_%i.t1"; int StartYear = 2017, EndYear = 2022; void main() { int Year; for(Year = EndYear; Year >= StartYear; Year--) { int Records = dataLoad(1,strf(InName,j),3); printf("\n%i records",Records); if(!Records) return; T2* Quotes = dataStr(1,0,0); var LowAsk = 100000; var HighBid = -100000; var CurrentTime = 0; int i,N=0; for(i=0; i<Records; i++,Quotes++) { if(CurrentTime > Quotes->time) { T1* t1 = dataAppendRow(2,2); t1->time = CurrentTime; t1->fVal = HighBid; T1* t1 = dataAppendRow(2,2); t1->time = CurrentTime; t1->fVal = LowAsk; N+=2; LowAsk = 100000; HighBid = -100000; } CurrentTime = Quotes->time; if (Quotes->fVal > 0) // ask LowAsk= min(Quotes->fVal,LowAsk); else if (Quotes->fVal < 0) // bid HighBid = max(Quotes->fVal,HighBid); } dataSave(2,strf(OutName,j),0,N); printf("=> %i bars",N); } }
// Append OptionsDX data to t8 //[QUOTE_UNIXTIME], [QUOTE_READTIME], [QUOTE_DATE], [QUOTE_TIME_HOURS], [UNDERLYING_LAST], [EXPIRE_DATE], [EXPIRE_UNIX], [DTE], [C_DELTA], [C_GAMMA], [C_VEGA], [C_THETA], [C_RHO], [C_IV], [C_VOLUME], [C_LAST], [C_SIZE], [C_BID], [C_ASK], [STRIKE], [P_BID], [P_ASK], [P_SIZE], [P_LAST], [P_DELTA], [P_GAMMA], [P_VEGA], [P_THETA], [P_RHO], [P_IV], [P_VOLUME], [STRIKE_DISTANCE], [STRIKE_DISTANCE_PCT] //1638392400, 2021-12-01 16:00, 2021-12-01, 16.000000, 450.440000, 2021-12-01, 1638392400, 0.000000, 0.985740, 0.000470, 0.007560, -0.415280, 0.006070, 3.964130, 0.000000, 147.870000, 300 x 300, 139.610000, 142.100000, 310.000000, 0.000000, 0.010000, 0 x 4522, 0.030000, 0.000000, 0.000030, 0.000430, -0.004940, 0.000000, 2.521350, 0.000000, 140.400000, 0.312000 string Format = "+,%Y-%m-%d %H:%M,,,f5,i7,,,,,,,,,f4,,,f2,f1,f6,f8,f9,,,,,,,,,f10,,"; string InName = "CSV\\spy_eod_%d%02d.txt"; string OutName = "History\\SPY_2022.t8"; // file to append to void parse(int Year,int Month) { int Records = dataParse(2,Format,strf(InName,Year,Month)); if(!Records) return; printf("\n%d new records",Records); for(i=0; i<Records; i++) { // any record holds a call and a put contract, so split it in two CONTRACT* C = dataAppendRow(1,9); C->Type = CALL; C->time = dataVar(2,i,0); C->fAsk = dataVar(2,i,1); C->fBid = dataVar(2,i,2); C->fVol = dataVar(2,i,4); C->fUnl = dataVar(2,i,5); C->fStrike = dataVar(2,i,6); C->Expiry = dataInt(2,i,7); C = dataAppendRow(1,9); C->Type = PUT; C->time = dataVar(2,i,0); C->fAsk = dataVar(2,i,9); C->fBid = dataVar(2,i,8); C->fVol = dataVar(2,i,10); C->fUnl = dataVar(2,i,5); C->fStrike = dataVar(2,i,6); C->Expiry = dataInt(2,i,7); } } int Year = 2022,Month1 = 4,Month2 = 9; void main() { dataNew(1,0,0); for(j=Month1; j<=Month2; j++) parse(Year,j); dataSort(1); int N = dataLoad(2,OutName,9); printf("\n%i previous records",N); N = dataMerge(2,1); printf("\n%i total records",N); if(N) { dataSave(2,OutName,0,0); printf("\nOk!"); } }
// Append HistoricalOptionsData to t8 //underlying, underlying_last, exchange, optionroot, optionext, type, expiration, quotedate,strike,last,bid,ask,volume,openinterest,impliedvol,delta,gamma,theta,vega,optionalias,IVBid,IVAsk //SPY,290.13,*,SPY190621C00295000,,call,06/21/2019,04/12/2019,295,3.59,3.59,3.62,8192,34153,0.102,0.4048,0.03,-16.1222,49.0036,,0.1017,0.1023 string Format = "+,f5,,,,s8,i7,%m/%d/%Y,f6,,f2,f1,f4,f3"; string InName = "CSV\\SPY_2021.csv"; string OutName = "History\\SPY_2021.t8"; void parse() { int Records = dataParse(1,Format,InName); if(!Records) return; printf("\n%d new records",Records); for(i=0; i<Records; i++) { string PC = dataStr(1,i,8); dataSet(1,i,8,(int)ifelse(*PC=='p',PUT,CALL)); // fix expiry that is stored in reverse order int Expiry = dataInt(1,i,7); int MMDD = Expiry/10000; int YYYY = Expiry-MMDD*10000; dataSet(1,i,7,YYYY*10000+MMDD); } } void main() { dataNew(1,0,0); parse(); dataSort(1); int N = dataLoad(2,OutName,9); printf("\n%d old SPY records",N); N = dataMerge(2,1); printf("\n%d total SPY records",N); if(N) { dataSave(2,OutName,0,0); printf("\nOk!"); } }
// iVolatility H1 to t8 //ReceivingDate ReceivingTime,ID,Underl Symbol,Expiry,Strike,C/P,A/E,Symbol,Bid,Ask,Bid Time,Ask Time, Bid Size,Ask Size,Bid Exchange,Ask Exchange,Volume,?,Underlying, //2014-06-24 9:30, 23083, AMZN7, 2014-06-27 0:00, 250, C, A, AMZN7, 140627C00250000, 74.85, 79.45, 2014-06-24 16:02, 2014-06-24 16:02, 0, 0, *, *, 0, 0.263868, 327.24 string Name = "AMZN"; string Format = "+%Y-%m-%d %H:%M:%S,,,i7,f6,s8,,,f2,f1,,,,,,,f4,,f5,"; string InName = "CSV\\options_%d-%02d_000.csv"; string OutName = "History\\%s_%d.t8"; int StartMonth = 1, EndMonth = 12, Year = 2014; void main() { int i,j,k; for(Year = 2014; Year <= 2019; Year++) { dataNew(2,0,0); for(i = EndMonth; i >= StartMonth; i--) { string FileName = strf(InName,Year,i); if(!file_date(FileName)) continue; dataNew(1,0,0); int Records = dataParse(1,Format,FileName); printf("\n%d records",Records); var LastValidPrice = dataVar(1,0,5); int Dropped = 0; for(k = 0; k < Records; k++) { CONTRACT* C = dataStr(1,k,0); if(C->fUnl < 0.7*LastValidPrice) { Dropped++; continue; // remove outlier } LastValidPrice = C->fUnl; C->Type = ifelse(dataStr(1,k,8) == "C",CALL,PUT); C->Expiry /= 10; // remove the '0' from the time dataAppend(2,1,k,1); // append dataset } printf(" - %d dropped",Dropped); if(!progress(100*(EndMonth-i)/(EndMonth-StartMonth+1),0)) return; } dataSort(2); dataSave(2,strf(OutName,Name,Year)); }
// Remove outlier spikes from .t1 data int isOutlier(var Price,var Prev,var Next) { Price = abs(Price); // could be negative bid if(Price > abs(Prev)/Outlier && Price < abs(Prev)*Outlier) return 0; else if(Price > abs(Next)/Outlier && Price < abs(Next)*Outlier) return 0; else { printf("\nOutlier %i: %.2f-%.2f-%.2f", ++NumOutliers,Prev,Price,Next); return 1; } } void main() { Outlier = 1.1; // detect outliers above 10% string Name = file_select("History","T1 History\0*.t1\0"); if(!Name) return; int Records = dataLoad(1,Name,2); printf("\n %i records",Records); if(!Records) return; T1 *Tick = (T1*)dataStr(1,1,0); for(i=1; i<Records-1; i++) { if(isOutlier(Tick->fVal,(Tick-1)->fVal,(Tick+1)->fVal)) { Records = dataDelete(1,i,i); i--; } else Tick++; } if(NumOutliers) { file_copy(strx(Name,".","o."),Name); // keep a copy of the original dataSave(1,Name); } }
// Compress hi-res t6 files by removing records with similar timestamps void main() { string Name = "GER30"; var Similarity = 500; // milliseconds, min distance int Year = 2015; // first year for(; Year <= 2022; Year++) { int Records = dataCompress(1,strf("History\\%s_%4d.t6",Name,Year),7,Similarity); printf("\n Records = %i",Records); if(Records) dataSave(1,strf("History\\%s_%4d.t6",Name,Year)); } }
// merge two historical data files void main() { int Rec1 = dataLoad(1,"History\\EURUSD_2016_1.t1"),2); // earlier int Rec2 = dataLoad(2,"History\\EURUSD_2016_2.t1"),2); // later printf("\n%i + %i",Rec1,Rec2); int Rec3 = dataMerge(1,2); printf(" = %i",Rec3); dataSave(1,"History\\EURUSD_2016.t1"); }
// Split single .t6 in multi-year files void main() { string Name = file_select("History","T6 History\0*.t6\0"); if(!Name) return; int Records = dataLoad(1,Name,7); if(!Records) return; printf("\n%i records",Records); int i,j; for(i=0,j=0; i<Records; i++) { T6* Candle = dataStr(1,i,0); int Year = ymd(Candle->time)/10000, NextYear = 0; if(i < Records-1) NextYear = ymd((Candle+1)->time)/10000; if(Year != NextYear) { string OutName = strx(Name,".",strf("_%i.",Year)); dataSave(1,OutName,j,i-j+1); j = i+1; } if(!progress(100*i/Records,0)) break; } }