Consistent Hachi Distribution

hash_ring

#- *- coding: utf-8 -*-
"""
hash_ring
~~~~~~~~~~~~~
Implements consistent hashing that can be used when
the number of server nodes can increase or decrease (like in memcached) .

Consistent hashing is a scheme that provides a hash table functionality
in a way that the adding or removing of one slot
does not significantly change the mapping of keys to slots.

More information about consistent hashing can be read in these articles:

"Web Caching with Consistent Hashing":
http://www8.org/w8- papers/2a-webserver/caching/paper2.html

"Consistent hashing and random trees:
Distributed caching protocols for relieving hot spots on the World Wide Web (1997)":
http://citeseerx.ist.psu.edu/legacymapper?did=38148


Example of usage::

memcache_servers = ['192.168.0.246: 11212',
'192.168.0.247:11212',
'192.168.0.249:11212']

ring = HashRing(memcache_servers)
server = ring.get_node ('my_key')

:copyright: 2008 by Amir Salihefendic.
:license: BSD
"" "

import math
< span style="color: #0000ff">import sys
from bisect import bisect

if sys.version_info >= (2, 5):
import hashlib
md5_constructor
= hashlib.md5
else:
import md5
md5_constructor
= md5.new

class HashRing(object):

def __init__(self, nodes=None, weights=None):
< span style="color: #800000">"""
`nodes` is a list of objects that have a proper __str__ representation.
`weights` is dictionary that sets weights to the nodes. The default
weight is that all nodes are equal.
"""< span style="color: #0000 00">
self.ring = dict()
self._sorted_keys
= []

self.nodes
= nodes

if not weights:
weights
=< span style="color: #000000"> {}
self.weights = weights

self._generate_circle()< br />
def _generate_circle(self):
"""Generates the circle.
"""
total_weight
= 0
for node in self.nodes:
total_weight
+= self.weights.get(node, 1)

for node in self.nodes:
weight
= 1

if node in self.weights:
weight
= self.weights.get(node)

factor
= math.floor((40*len(self.nodes)*weight) / total_weight);

for j in range(0, int(factor)):
b_key
= self._hash_digest( '%s-%s ' % (node, j) )

for i in range(0, 3) :
key
= self._hash_val(b_key, lambda x: x+i*4)
self.ring[key]
= node
self._sorted_keys.append(key)

self ._sorted_keys.sort()

def get_node(self, string_key ):
"""Given a string key a corresponding node in the hash ring is returned.

If the hash ring is empty, `None` is returned.
"""
pos
= self.get_node_pos(string_key)
if pos is None:
return None
return self.ring[ self._sorted_keys[pos] ]

def get_node_pos(self, string_key):
"""Given a string key a corresponding node in the hash ring is returned
along with it's position in the ring.

If the hash ring is empty, (`None`, `None`) is returned.
"""
if not self.ring:
return None

key
= self.gen_key(string_key)

nodes
= self._sorted_keys
pos
= bisect(nodes, key)

if pos == len(nodes):
return 0
else< span style="color: #000000">:
return pos

def iterate_nodes(self, string_key, distinct=True):
"""Given a string key it returns the nodes as a generator that can hold the key.

The generator iterates one time through the ring
starting at the correct position.

if `distinct` is set, then the nodes returned will be unique,
ie no virtual copies will be returned.
"""
if not< span style="color: #000000"> self.ring:
yield None , None

returned_values ​​
= set()
def distinct_filter(value):
if str(value) not in returned_values:
returned_values.add(str(value))
return value

pos
= self.get_node_pos(string_key)
< span style="color: #0000ff">for key in self._sorted_keys[pos:] :
val
= distinct_filter(self.ring[key])
if val:
yield val

for i, key in enumerate(self._sorted_keys):
if i < pos:
val
= distinct_filter(self.ring[key])
if val:
yield val

def gen_key(self , key):
"""Given a string key it returns a long value ,
this long value represents a place on the hash ring.

md5 is currently used because it mixes well.
"""
b_key
= self._hash_digest(key)
return self._hash_val(b_key, lambda x: x)

def _hash_val(self , b_key, entry_fn):
return (( b_key[entry_fn(3)] << 24)
|(b_key[entry_fn(2)] << 16)
|(b_key[entry_fn( 1)] << 8)
| b_key[entry_fn(0)] )

def _hash_digest(self, key):
m
= md5_constructor()
m.update(bytes(key,encoding
=' utf-8'))
#return map(ord, m.di gest())
return list(m.digest())


'''
memcache_servers = [' 192.168.0.246:11212',
'192.168.0.247:11212',
'192.168.0.249:11212']

ring = HashRing(memcache_servers)
server = ring.get_node('my_key')
'''

# Increase weight

memcache_servers
= ['192.168.0.246:11212' ,
'192.168.0.247:11212',
'192.168.0.249:11212']
weights
= {
'192.168.0.246:11212' span>: 1,
'192.168.0.247:11212': 2,
'192.168.0.249:11212' : 1
}

ring
= HashRing(memcache_servers, weights)< br />server = ring.get_node('my_key')
print (server)

Share pictures

When adding and deleting the machine, there may be data not found

# -*- coding: utf-8 -*-
"""< br /> hash_ring
~~~~~~~~~~~~~~
Implements consistent hashing that can be used when
the number of server nodes can increase or decrease (like in memcached).

Consistent hashing is a scheme that provides a hash table functionality
in a way that the adding or removing of one slot
does not significantly change the mapping of keys to slots.

More information about consistent hashing can be read in these articles:

"Web Caching with Consistent Hashing":
ht tp://www8.org/w8-papers/2a-webserver/caching/paper2.html

"Consistent hashing and random trees:
Distributed caching protocols for relieving hot spots on the World Wide Web (1997)":
http://citeseerx.ist.psu.edu/legacymapper?did=38148


Example of usage::
< br /> memcache_servers = ['192.168.0.246:11212',
'192.168.0.247:11212',
'192.168.0.249:11212']

ring = HashRing( memcache_servers)
server = ring.get_node('my_key')

:copyright: 2008 by Amir Salihefendic.
:license: BSD
"""

import math
import sys
from bisect import bisect

if sys.version_info >= (2, 5 ):
import hashlib< br /> md5_constructor = hashlib.md5
else:
import md5
md5_constructor
= md5.new

class HashRing(object):

def __init__(self, nodes=None, weights=None):
" ""`nodes` is a list of objects that have a proper __str__ representat ion.
`weights` is dictionary that sets weights to the nodes. The default
weight is that all nodes are equal.
"""
self.ring
= dict()
self ._sorted_keys
= []

self.nodes
= nodes

if not weights:
weights
= {}
self.weights
= weights

self._generate_circle()

def _generate_circle(self):
"""Generates the circle.
"""
total_weight
= 0
for node in self.nodes:
total_weight
+= self.weights.get(node, 1)

for node in self .nodes:
weight
= 1

if node in self.weights:
weight
= self.weights.get(node)< br />
factor
= math.floor((40*len(self.nodes)*weight) / total_weight);

for j in range(0, int(factor)):
b_key
= self. _hash_digest( '%s-%s' % (node, j) )

for i in range(0, 3):
key
= self._hash_val(b_key , lambda x: x+i*4)
self.ring[key]
= node
self._sorted_keys.append(key)

self._sorted_keys.sort()

def get_node(self, string_key):
"""Give na string key a corresponding node in the hash ring is returned.

If the hash ring is empty, `None` is returned.
"""
pos
= self.get_node_pos(string_key)
if pos is None:
return None
return self.ring[ self._sorted_keys[pos] ]

def get_node_pos(self, string_key):
" ""Given a string key a corresponding node in the hash ring is returned
along with it's position in the rin g.

If the hash ring is empty, (`None`, `None`) is returned.
"" "
if not self.ring:
return None

key
= self.gen_key(string_key)

nodes
= self. _sorted_keys
pos
= bisect(nodes, key)

if pos == len(nodes):
return< span style="color: #000000"> 0
else:
return pos

def iterate_nodes(self, string_key, distinct=True):
"""Given a string key it returns the nodes as a generator that can hold the key.

The generator iterates one time through the ring
starting at the correct position.

if `distinct` is set, then the nodes returned will be unique,
ie no virtual copies will be returned.
"""
if not self.ring:
yield None, None

returned_values ​​
=< span style="color: #000000"> set()
def< /span> distinct_filter(value):
if str(value) not in returned_values:
returned_values.add( str(value))
return value

pos
= self.get_node_pos(string_key)
for key in self._sorted_keys[pos:]:
val
= distinct_filter(self.ring[key])
if val:< br /> yield val

for i, key in enumerate(self._sorted_keys):
if i < pos:
val
= distinct_filter(self.ring[key])
if val:
yield val

def gen_key(self, key):
"""Given a string key it returns a long value,
this long value represents a place on the hash ring.

md5 is currently used because it mixes well.
"""
b_key
= self._hash_digest(key)
return self._hash_val (b_key, lambda x: x)

def _hash_val(self, b_key, entry_fn):
return (( b_key[entry_fn(3)] << 24)
|(b_key[entry_fn(2)] << 16 )
|(b_key[entry_fn(1)] << 8)
| b_key[entry_fn(0)] )

def _hash_digest(self, key):
m
= md5_constructor()
m.update( bytes(key,encoding
='utf-8'))< br /> #return map(ord, m.digest())< br /> return list(m.digest())


'''
memcache_servers = ['192.168.0.246:11212',
'192.168.0.247:11212',
'192.168.0.249:11212']

ring = HashRing(memcache_servers)
server = ring.get_node('my_key')
'''

# Increase weight

memcache_servers
= ['192.168.0.246:11212',
'192.168.0.247:11212 ',
'192.168.0.249:11212']
weights
= {
' 192.168.0.246:11212': 1,
'192.168.0.247:11212': 2,
'192.168.0.249:11212': 1}

ring
= HashRing(memcache_servers, weights)
server
= ring.get_node(< span style="color: #800000">'
my_key')
print(server)

Leave a Comment

Your email address will not be published.