Javascript Getting a string into kb format
I am new to javascript and I just wanted to convert a string into a format that a person like me can read. Here is an example of what I am trying to do...
string2size(string){ //some awesome coding I have no clue how to make return awesomeAnswer }
now the return should give me something like 56 bytes or 12kb or 1mb depending how much the string is.
so if the string is... string = "there was an old woman who lived in a shoe"; then string2size(string) should return something like 3kb.
Now I know there has been开发者_开发问答 a utf8 talk and I wouldn't object to and addition of that to the function.
I have tried google and Yahoo searches but they talk of using php but I really need it for javascript. I do thank anyone for their time. -Teske
First list the units you want to use. For example:
// 1024-based units. Kibibyte, Mebibyte etc.
//
var BINARY_UNITS= [1024, 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yo'];
// SI units, also Hard Disc Manufacturers' rip-off kilobytes
//
var SI_UNITS= [1000, 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'];
Then make a function to find and divide by the biggest suitable unit for a number:
function unitify(n, units) {
for (var i= units.length; i-->1;) {
var unit= Math.pow(units[0], i);
if (n>=unit)
return Math.floor(n/unit)+units[i];
}
return n; // no prefix, single units
}
Then call on a length:
var desc= 'File, '+unitify(content.length, UNITS_BINARY)+'B';
desc+= ' or in SI, '+unitify(content.length, UNITS_SI)+'B';
// eg. File, 977KiB or in SI 1MB
I'm not sure what you mean with UTF-8, but if you want to find out the length of a character string as encoded to bytes you'll have to encode that string to UTF-8 yourself. Luckily there is a cheap trick to get a UTF-8 encoder in JavaScript:
var bytes= unescape(encodeURIComponent(chars));
alert(unitify(bytes, BINARY_UNITS)+'B');
Something like this will help you.
function getStringBytes(string) {
var bytes = 0;
var i;
for (i = 0; i < string.length; i++) {
var c = fixedCharCodeAt(string, i);
// in accordance with http://en.wikipedia.org/wiki/UTF-8#Description
bytes += c === false ? 0 :
c <= 0x007f ? 1 :
c <= 0x07FF ? 2 :
c <= 0xFFFF ? 3 :
c <= 0x1FFFFF ? 4 :
c <= 0x3FFFFFF ? 5 : 6;
}
return bytes;
}
function fixedCharCodeAt (str, idx) {
// ex. fixedCharCodeAt ('\uD800\uDC00', 0); // 65536
// ex. fixedCharCodeAt ('\uD800\uDC00', 1); // false
idx = idx || 0;
var code = str.charCodeAt(idx);
var hi, low;
if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters)
hi = code;
low = str.charCodeAt(idx + 1);
if (isNaN(low)) {
throw new Error('High surrogate not followed by low surrogate');
}
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
}
if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate
return false;
}
return code;
}
精彩评论