Data operations (fill, min, max) on CSV data in JavaScript?
I'm loading different indicator CSV files into JavaScript, ex开发者_开发技巧ample:
CSV for population:
id,year,value
AF,1800,3280000
AF,1820,3280000
AF,1870,4207000
AG,1800,37000
AG,1851,37000
AG,1861,37000
For each indicator file I need to:
- Gap fill missing years for each entity (id)
- Find the time span for each entity
- Find the min and max for each entity
- Find the time span for the indicator
- Find the min and max for the indicator
What is an inexpensive way of performing these operations? Alternatively, is there a good JavaScript library for performing these kind of common data operations and storing the data effectively in various object representations?
I'd like the final representation of the above file to look something like:
data = {
population : {
entities :
AF : {
data : {
1800 : 3280000,
1801 : 3280000,
},
entity_meta : {
start : 1800,
end :
min :
max :
},
[...]
indicator_meta : {
start : 1700,
end :
min :
max :
}
[...]
Thanks!
Lets Assume that you have the CSV data in a 2d array:
var data = [[AF,1800,3280000],
[AF,1820,3280000],
[AF,1870,4207000],
[AG,1800,37000],
[AG,1851,37000],
[AG,1861,37000]]
For this example I will use jQuerys utility functions as it will make the job a lot easier without any real overhead.
// we will loop thru all the rows
// if the id does not belong to the entities then we will add the property.
// if the property does exist then we update the values
var entities = {}
$.each(data, function (i, n) {
// set property
if (!entities[n[0]]) {
entities[n[0]] = {
data : {
n[1]: n[2]
},
entity_meta: {
start: n[1],
end: n[1]
min: n[1]
max: n[1]
}
}
// update property
} else {
// add new data property
entities[n[0]]['data'][n[1]] = n[2];
// if the property should change then update it
if ( entities[n[0]]['entity_meta']['min'] > n[1] ) {
entities[n[0]]['entity_meta']['min'] = n[1];
}
}
});
That obviously isn't all the code but it should explain clearly the approach that should be taken.
Also not that your intended final object structure is very much over complicated you should really use arrays where appropriate, especially for entities
and data
.
Use jQuery AJAX to get the CSV file.
$.get("test_csv.csv", function(result){
csvParseAndCalc(result);
});
Use a simple JavaScript to parse the CSV and perform the calculations
// assumes your sample data is how all data will look
// proper csv parsing (by the spec) is not used is favor is speed
function csvParseAndCalc(result) {
var entities = {};
var indicator_meta = {"start":null, "end":null, "min":null, "max":null};
var rows = result.split('\n'); //your data doesnt need proper (to spec) csv parsing
// run calculations ignore header row
for(var i=1; i<rows.length; i++) {
var r = rows[i].split(',');
var id = r[0];
var yr = parseInt(r[1]);
var val = parseInt(r[2]);
var entity = entities[id];
var edata;
var emeta;
// create entity if it doesn't exist
if(entity == null) {
entities[id] = { "data": {}, "entity_meta": {"start":null, "end":null, "min":null, "max":null} };
entity = entities[id];
}
// entity data
edata = entity.data;
edata[yr] = val;
// entity meta
emeta = entity.entity_meta
if(emeta.start == null || emeta.start > yr) emeta.start = yr;
if(emeta.end == null || emeta.end < yr) emeta.end = yr;
if(emeta.min == null || emeta.min > val) emeta.min = val;
if(emeta.max == null || emeta.max < val) emeta.max = val;
// calc indicator_meta
if(indicator_meta.start==null || indicator_meta.start > yr)
indicator_meta.start = yr;
if(indicator_meta.end==null || indicator_meta.end < yr)
indicator_meta.end = yr;
if(indicator_meta.min==null || indicator_meta.min > val)
indicator_meta.min = val;
if(indicator_meta.max==null || indicator_meta.max < val)
indicator_meta.max = val;
}
// fill gaps on entity data
for(var id in entities) {
var entity = entities[id];
var emeta = entity.entity_meta;
var edata = entity.data;
for(var i=emeta.start + 1; i<emeta.end; i++) {
if(edata[i] == null) edata[i] = edata[i-1];
}
}
return {"population": {"entities":entities, "indicator_meta":indicator_meta} };
}
Maybe, YUI would be helpful for some bulk operations. http://yuilibrary.com/yui/docs/dataschema/dataschema-text.html
There are javascript sql database libraries. TaffyDB comes to mind.
精彩评论