+

Add first version of HashMap lib

Benjamin Renard [2015-05-26 15:53:00]
Add first version of HashMap lib
Filename
HashMap.py
diff --git a/HashMap.py b/HashMap.py
new file mode 100644
index 0000000..4775760
--- /dev/null
+++ b/HashMap.py
@@ -0,0 +1,174 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# My hash mapping library
+#
+# Mapping configuration
+#  {
+#     '[dst key 1]': {                        # Key name in the result
+#
+#         'order': [int],                     # Processing order between destinations keys
+#
+#         # Source values
+#         'other_key': [key],                 # Other key of the destination to use as source of values
+#         'key' : '[src key]',                # Key of source hash to get source values
+#         'keys' : ['[sk1]', '[sk2]', ...],   # List of source hash's keys to get source values
+#
+#         # Clean / convert values
+#         'cleanRegex': '[regex]',            # Regex that be use to remove unwanted characters. Ex : [^0-9+]
+#         'convert': [function],              # Function to use to convert value : Original value will be passed
+#                                             # as argument and the value retrieve will replace source value in
+#                                             # the result
+#                                             # Ex :
+#                                             #  lambda x: x.strip()
+#                                             #  lambda x: "myformat : %s" % x
+#         # Deduplicate / check values
+#         'deduplicate': [bool],              # If True, sources values will be depluplicated
+#         'check': [function],                # Function to use to check source value : Source value will be passed
+#                                             # as argument and if function return True, the value will be preserved
+#                                             # Ex :
+#                                             #  lambda x: x in my_global_hash
+#         # Join values
+#         'join': '[glue]',                   # If present, sources values will be join using the "glue"
+#
+#         # Alternative mapping
+#         'or': { [map configuration] }       # If this mapping case does not retreive any value, try to get value(s)
+#                                             # with this other mapping configuration
+#     },
+#     '[dst key 2]': {
+#         [...]
+#     }
+# }
+#
+# Return format :
+# {
+#   '[dst key 1]': ['v1','v2', ...],
+#   '[dst key 2]': [ ... ],
+#   [...]
+# }
+
+import logging, re
+
+def clean_value(value):
+        if isinstance(value, int):
+                value=str(value)
+        return value.encode('utf8')
+
+def map(map_keys,src,dst={}):
+
+        def get_values(dst_key,src,m):
+		# Extract sources values
+                values=[]
+                if 'other_key' in m:
+                        if m['other_key'] in dst:
+                                values=dst[m['other_key']]
+                if 'key' in m:
+                        if m['key'] in src and src[m['key']]!='':
+                                values.append(clean_value(src[m['key']]))
+
+                if 'keys' in m:
+                        for key in m['keys']:
+                                if key in src and src[key]!='':
+                                        values.append(clean_value(src[key]))
+
+		# Clean and convert values
+                if 'cleanRegex' in m and len(values)>0:
+                        new_values=[]
+                        for v in values:
+                                nv=re.sub(m['cleanRegex'],'',v)
+                                if nv!='':
+                                        new_values.append(nv)
+                        values=new_values
+
+                if 'convert' in m and len(values)>0:
+                        new_values=[]
+                        for v in values:
+                                nv=m['convert'](v)
+                                if nv!='':
+                                        new_values.append(nv)
+                        values=new_values
+
+		# Deduplicate values
+                if m.get('deduplicate') and len(values)>1:
+                        new_values=[]
+                        for v in values:
+                                if v not in new_values:
+                                        new_values.append(v)
+                        values=new_values
+
+		# Check values
+                if 'check' in m and len(values)>0:
+                        new_values=[]
+                        for v in values:
+                                if m['check'](v):
+                                        new_values.append(v)
+                                else:
+                                        logging.debug('Invalid value %s for key %s' % (v,dst_key))
+                                        if dst_key not in invalid_values:
+                                                invalid_values[dst_key]=[]
+                                        if v not in invalid_values[dst_key]:
+                                                invalid_values[dst_key].append(v)
+                        values=new_values
+
+		# Join values
+                if 'join' in m and len(values)>1:
+                        values=[m['join'].join(values)]
+
+		# Manage alternative mapping case
+                if len(values)==0 and 'or' in m:
+                        values=get_values(dst_key,src,m['or'])
+
+
+                return values
+
+        for dst_key in sorted(map_keys.keys(), key=lambda x: map_keys[x]['order']):
+                values=get_values(dst_key,src,map_keys[dst_key])
+
+                if len(values)==0:
+                        if 'required' in map_keys[dst_key] and map_keys[dst_key]['required']:
+                                logging.debug('Destination key %s could not be filled from source but is required' % dst_key)
+                                return False
+                        continue
+
+                dst[dst_key]=values
+        return dst
+
+
+if __name__ == '__main__':
+	logging.basicConfig(level=logging.DEBUG)
+
+	src={
+		'uid':		'hmartin',
+		'firstname':	'Martin',
+		'lastname':	'Martin',
+		'disp_name':	'Henri Martin',
+		'line_1':	'3 rue de Paris',
+		'line_2':	'Pour Pierre',
+		'zip_text':	'92 120',
+		'city_text':	'Montrouge',
+		'line_city':	'92120 Montrouge',
+		'tel1':		'01 00 00 00 00',
+		'tel2':		'09 00 00 00 00',
+		'mobile':	'06 00 00 00 00',
+		'fax':		'01 00 00 00 00',
+		'email':	'H.MARTIN@GMAIL.COM',
+	}
+
+	map_c={
+		'uid':				{'order': 0, 'key': 'uid','required': True},
+		'givenName':			{'order': 1, 'key': 'firstname'},
+		'sn':				{'order': 2, 'key': 'lastname'},
+		'cn':				{'order': 3, 'key': 'disp_name','required': True, 'or': {'attrs': ['firstname','lastname'],'join': ' '}},
+		'displayName':			{'order': 4, 'other_key': 'displayName'},
+		'street':			{'order': 5, 'join': ' / ', 'keys': ['ligne_1','ligne_2']},
+		'postalCode':			{'order': 6, 'key': 'zip_text', 'cleanRegex': '[^0-9]'},
+		'l':				{'order': 7, 'key': 'city_text'},
+		'postalAddress':		{'order': 8, 'join': '$', 'keys': ['ligne_1','ligne_2','ligne_city']},
+		'telephoneNumber':		{'order': 9, 'keys': ['tel1','tel2'], 'cleanRegex': '[^0-9+]', 'deduplicate': True},
+		'mobile':			{'order': 10,'key': 'mobile'},
+		'facsimileTelephoneNumber':	{'order': 11,'key': 'fax'},
+		'mail':				{'order': 12,'key': 'email', 'convert': lambda x: x.lower().strip()}
+	}
+
+	logging.debug('[TEST] Map src=%s / config= %s' % (src,map_c))
+	logging.debug('[TEST] Result : %s' % map(map_c,src))
ViewGit