making a UUID (rfc4122) what namespace for hashing full content?
I'm learning to write blog software, so I read up on Atom which requires a unique ID. Seems like what you're supposed to do is a urn:uuid: type IRI.
What makes sense to me to get a globally unique identifier is to hash the content of the post.
I've written code (see below) to generate an rfc-4122-compliant UUID (version 5), except I'm not sure what to put in for the namespace.
the RFC says it's out of its scope to say what namespace I should use, and shows a few examples, none of which seem to be the right one. I googled a bit and didn't see any recommendations beyond the ones in the RFC.
The namespace is itself supposed to be a UUID, so I'm not supposed to just put in something like "hash of the entire post" as the namespace.
To generate [all but 6 bits of] a version 5 uuid, you concatenate the namespace UUID (in raw form) with your "name".
So... here's my question: Is there a namespace UUID for using the entire contents of your document as the "name"?
Or should I make a random (v4) UUID and use that as my own personal "entire post" namespace?
Or something else?
Thanks, - Jason
P.S. I've written a UUID generator for node which is using the ns:URL namespace for now. Here's the code if you're interested:
// Copyright 2011 Jason Woofenden -- CC0
//
// An almost correct rfc-4122 v5 UUID generator for node (see http://node.js)
//
// To test, run this with node, then compare the out put of these:
//
// curl http://localhost:8129/foo
// uuid -v 5 ns:URL foo
//
// Replace "foo" with any string and they should still be the same.
var
http = require('http'),
crypto = require('crypto'),
url = require('url');
hex_high_10 = { // set the highest bit and clear the next highest
开发者_运维百科 '0': '8',
'1': '9',
'2': 'a',
'3': 'b',
'4': '8',
'5': '9',
'6': 'a',
'7': 'b',
'8': '8',
'9': '9',
'a': 'a',
'b': 'b',
'c': '8',
'd': '9',
'e': 'a',
'f': 'b'
}
http.createServer(function (req, res) {
var sum = crypto.createHash('sha1');
// namespace in raw form. FIXME using ns:URL for now, what should it be?
sum.update(new Buffer('a6e4EZ2tEdGAtADAT9QwyA==', 'base64'));
// add HTTP path
sum.update(url.parse(req.url).pathname.substr(1));
// get sha1 hash in hex form
var uuid = sum.digest('hex');
// format as UUID (add dashes, version bits and reserved bits)
uuid =
uuid.substr(0, 8) + '-' + // time_low
uuid.substr(8, 4) + '-' + // time_mid
'5' + // time_hi_and_version high 4 bits (version)
uuid.substr(13, 3) + '-' + // time_hi_and_version low 4 bits (time high)
hex_high_10[uuid.substr(16, 1)] + uuid.substr(17, 1) + // cloc_seq_hi_and_reserved
uuid.substr(18, 2) + '-' + // clock_seq_low
uuid.substr(20, 12); // node
// spit it out
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end(uuid + '\n');
}).listen(8129, "127.0.0.1");
console.log('Server running at http://127.0.0.1:8129/');
I faced the same question some time ago and came to the conclusion that using a v4 UUID for the namespace is the right thing to do. Basically I wanted to generate an UUID from a String, and here's what I do (in Java, but it's simple enough to translate to JS):
public final class FooIdGen {
/**
* The namespace id for generating Foo - UUIDs from the foo - id
* strings.
*/
public final static String NAMESPACE =
"0416141a-5229-4d16-94cc-43d546ef1118"; //NOI18N
private final static byte[] NS_BYTES =
uuidToBytes(UUID.fromString(NAMESPACE));
/**
* Generates a UUID for a given foo - id.
*
* @param fooId the reporter ID to get the UUID for
* @return the UUID for the specified foo ID
*/
public static UUID uuidForFooId(String fooId) {
final byte[] idBytes;
try {
idBytes = fooId.getBytes("US-ASCII"); //NOI18N
} catch (UnsupportedEncodingException ex) {
/* pretty sure US-ASCII is ok, so this can't happen */
throw new AssertionError(ex.toString());
}
final byte[] tmp = Arrays.copyOf(
NS_BYTES, idBytes.length + NS_BYTES.length);
System.arraycopy(idBytes, 0, tmp, NS_BYTES.length, idBytes.length);
return UUID.nameUUIDFromBytes(tmp);
}
/* want it to align, so that's ok */
@SuppressWarnings("PointlessBitwiseExpression")
private static byte[] uuidToBytes(UUID id) {
final long h = id.getMostSignificantBits();
final long l = id.getLeastSignificantBits();
final byte[] result = new byte[16];
int i=0;
result[i++] = (byte) ((h >> 56) & 0xff);
result[i++] = (byte) ((h >> 48) & 0xff);
result[i++] = (byte) ((h >> 40) & 0xff);
result[i++] = (byte) ((h >> 32) & 0xff);
result[i++] = (byte) ((h >> 24) & 0xff);
result[i++] = (byte) ((h >> 16) & 0xff);
result[i++] = (byte) ((h >> 8) & 0xff);
result[i++] = (byte) ((h >> 0) & 0xff);
result[i++] = (byte) ((l >> 56) & 0xff);
result[i++] = (byte) ((l >> 48) & 0xff);
result[i++] = (byte) ((l >> 40) & 0xff);
result[i++] = (byte) ((l >> 32) & 0xff);
result[i++] = (byte) ((l >> 24) & 0xff);
result[i++] = (byte) ((l >> 16) & 0xff);
result[i++] = (byte) ((l >> 8) & 0xff);
result[i++] = (byte) ((l >> 0) & 0xff);
return result;
}
private FooIdGen() {
/* no instances */
}
}
Here's some CoffeeScript code I use to generate a v4 UUID using the Sha-1 hash of something. (Convert to Javascript using http://js2coffee.org.)
# Compute a uuid v4 from the Sha-1 hash of data.
crypto = require('crypto')
exports.uuidsha1 = (data) ->
sha1sum = crypto.createHash('sha1')
sha1sum.update(data)
s = sha1sum.digest('hex')
i = -1
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, (c) ->
i += 1
switch c
when 'x'
return s[i]
when 'y'
# take 8 + low order 3 bits of hex number.
return ((parseInt('0x'+s[i],16)&0x3)|0x8).toString(16)
)
You might want to check out node-uuid. I'm not using it yet, but I plan to look at it closer when I need this type of feature. It may not suite your needs, but I thought you should be aware of it. https://github.com/broofa/node-uuid
精彩评论