Add first version of HashMap lib
Benjamin Renard

Benjamin Renard commited on 2015-05-26 15:53:00
Showing 1 changed files, with 174 additions and 0 deletions.

... ...
@@ -0,0 +1,174 @@
1
+#!/usr/bin/python
2
+# -*- coding: utf-8 -*-
3
+#
4
+# My hash mapping library
5
+#
6
+# Mapping configuration
7
+#  {
8
+#     '[dst key 1]': {                        # Key name in the result
9
+#
10
+#         'order': [int],                     # Processing order between destinations keys
11
+#
12
+#         # Source values
13
+#         'other_key': [key],                 # Other key of the destination to use as source of values
14
+#         'key' : '[src key]',                # Key of source hash to get source values
15
+#         'keys' : ['[sk1]', '[sk2]', ...],   # List of source hash's keys to get source values
16
+#
17
+#         # Clean / convert values
18
+#         'cleanRegex': '[regex]',            # Regex that be use to remove unwanted characters. Ex : [^0-9+]
19
+#         'convert': [function],              # Function to use to convert value : Original value will be passed
20
+#                                             # as argument and the value retrieve will replace source value in
21
+#                                             # the result
22
+#                                             # Ex : 
23
+#                                             #  lambda x: x.strip()
24
+#                                             #  lambda x: "myformat : %s" % x
25
+#         # Deduplicate / check values
26
+#         'deduplicate': [bool],              # If True, sources values will be depluplicated
27
+#         'check': [function],                # Function to use to check source value : Source value will be passed
28
+#                                             # as argument and if function return True, the value will be preserved
29
+#                                             # Ex :
30
+#                                             #  lambda x: x in my_global_hash
31
+#         # Join values
32
+#         'join': '[glue]',                   # If present, sources values will be join using the "glue"
33
+#
34
+#         # Alternative mapping
35
+#         'or': { [map configuration] }       # If this mapping case does not retreive any value, try to get value(s)
36
+#                                             # with this other mapping configuration
37
+#     },
38
+#     '[dst key 2]': {
39
+#         [...]
40
+#     }
41
+# }
42
+#
43
+# Return format :
44
+# {
45
+#   '[dst key 1]': ['v1','v2', ...],
46
+#   '[dst key 2]': [ ... ],
47
+#   [...]
48
+# }
49
+
50
+import logging, re
51
+
52
+def clean_value(value):
53
+        if isinstance(value, int):
54
+                value=str(value)
55
+        return value.encode('utf8')
56
+
57
+def map(map_keys,src,dst={}):
58
+
59
+        def get_values(dst_key,src,m):
60
+		# Extract sources values
61
+                values=[]
62
+                if 'other_key' in m:
63
+                        if m['other_key'] in dst:
64
+                                values=dst[m['other_key']]
65
+                if 'key' in m:
66
+                        if m['key'] in src and src[m['key']]!='':
67
+                                values.append(clean_value(src[m['key']]))
68
+
69
+                if 'keys' in m:
70
+                        for key in m['keys']:
71
+                                if key in src and src[key]!='':
72
+                                        values.append(clean_value(src[key]))
73
+
74
+		# Clean and convert values
75
+                if 'cleanRegex' in m and len(values)>0:
76
+                        new_values=[]
77
+                        for v in values:
78
+                                nv=re.sub(m['cleanRegex'],'',v)
79
+                                if nv!='':
80
+                                        new_values.append(nv)
81
+                        values=new_values
82
+
83
+                if 'convert' in m and len(values)>0:
84
+                        new_values=[]
85
+                        for v in values:
86
+                                nv=m['convert'](v)
87
+                                if nv!='':
88
+                                        new_values.append(nv)
89
+                        values=new_values
90
+
91
+		# Deduplicate values
92
+                if m.get('deduplicate') and len(values)>1:
93
+                        new_values=[]
94
+                        for v in values:
95
+                                if v not in new_values:
96
+                                        new_values.append(v)
97
+                        values=new_values
98
+		
99
+		# Check values
100
+                if 'check' in m and len(values)>0:
101
+                        new_values=[]
102
+                        for v in values:
103
+                                if m['check'](v):
104
+                                        new_values.append(v)
105
+                                else:
106
+                                        logging.debug('Invalid value %s for key %s' % (v,dst_key))
107
+                                        if dst_key not in invalid_values:
108
+                                                invalid_values[dst_key]=[]
109
+                                        if v not in invalid_values[dst_key]:
110
+                                                invalid_values[dst_key].append(v)
111
+                        values=new_values
112
+
113
+		# Join values
114
+                if 'join' in m and len(values)>1:
115
+                        values=[m['join'].join(values)]
116
+
117
+		# Manage alternative mapping case
118
+                if len(values)==0 and 'or' in m:
119
+                        values=get_values(dst_key,src,m['or'])
120
+
121
+
122
+                return values
123
+
124
+        for dst_key in sorted(map_keys.keys(), key=lambda x: map_keys[x]['order']):
125
+                values=get_values(dst_key,src,map_keys[dst_key])
126
+
127
+                if len(values)==0:
128
+                        if 'required' in map_keys[dst_key] and map_keys[dst_key]['required']:
129
+                                logging.debug('Destination key %s could not be filled from source but is required' % dst_key)
130
+                                return False
131
+                        continue
132
+
133
+                dst[dst_key]=values
134
+        return dst
135
+
136
+
137
+if __name__ == '__main__':
138
+	logging.basicConfig(level=logging.DEBUG)
139
+
140
+	src={
141
+		'uid':		'hmartin',
142
+		'firstname':	'Martin',
143
+		'lastname':	'Martin',
144
+		'disp_name':	'Henri Martin',
145
+		'line_1':	'3 rue de Paris',
146
+		'line_2':	'Pour Pierre',
147
+		'zip_text':	'92 120',
148
+		'city_text':	'Montrouge',
149
+		'line_city':	'92120 Montrouge',
150
+		'tel1':		'01 00 00 00 00',
151
+		'tel2':		'09 00 00 00 00',
152
+		'mobile':	'06 00 00 00 00',
153
+		'fax':		'01 00 00 00 00',
154
+		'email':	'H.MARTIN@GMAIL.COM',
155
+	}
156
+
157
+	map_c={
158
+		'uid':				{'order': 0, 'key': 'uid','required': True},
159
+		'givenName':			{'order': 1, 'key': 'firstname'},
160
+		'sn':				{'order': 2, 'key': 'lastname'},
161
+		'cn':				{'order': 3, 'key': 'disp_name','required': True, 'or': {'attrs': ['firstname','lastname'],'join': ' '}},
162
+		'displayName':			{'order': 4, 'other_key': 'displayName'},
163
+		'street':			{'order': 5, 'join': ' / ', 'keys': ['ligne_1','ligne_2']},
164
+		'postalCode':			{'order': 6, 'key': 'zip_text', 'cleanRegex': '[^0-9]'},
165
+		'l':				{'order': 7, 'key': 'city_text'},
166
+		'postalAddress':		{'order': 8, 'join': '$', 'keys': ['ligne_1','ligne_2','ligne_city']},
167
+		'telephoneNumber':		{'order': 9, 'keys': ['tel1','tel2'], 'cleanRegex': '[^0-9+]', 'deduplicate': True},
168
+		'mobile':			{'order': 10,'key': 'mobile'},
169
+		'facsimileTelephoneNumber':	{'order': 11,'key': 'fax'},
170
+		'mail':				{'order': 12,'key': 'email', 'convert': lambda x: x.lower().strip()}
171
+	}
172
+	
173
+	logging.debug('[TEST] Map src=%s / config= %s' % (src,map_c))
174
+	logging.debug('[TEST] Result : %s' % map(map_c,src))
0 175