+

Last commit for HashMap.py: 199fad0049d0830547f9592f47eedee08a13e79b

Add first version of HashMap lib

Benjamin Renard [2015-05-26 15:53:00]
Add first version of HashMap lib
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # My hash mapping library
  5. #
  6. # Mapping configuration
  7. # {
  8. # '[dst key 1]': { # Key name in the result
  9. #
  10. # 'order': [int], # Processing order between destinations keys
  11. #
  12. # # Source values
  13. # 'other_key': [key], # Other key of the destination to use as source of values
  14. # 'key' : '[src key]', # Key of source hash to get source values
  15. # 'keys' : ['[sk1]', '[sk2]', ...], # List of source hash's keys to get source values
  16. #
  17. # # Clean / convert values
  18. # 'cleanRegex': '[regex]', # Regex that be use to remove unwanted characters. Ex : [^0-9+]
  19. # 'convert': [function], # Function to use to convert value : Original value will be passed
  20. # # as argument and the value retrieve will replace source value in
  21. # # the result
  22. # # Ex :
  23. # # lambda x: x.strip()
  24. # # lambda x: "myformat : %s" % x
  25. # # Deduplicate / check values
  26. # 'deduplicate': [bool], # If True, sources values will be depluplicated
  27. # 'check': [function], # Function to use to check source value : Source value will be passed
  28. # # as argument and if function return True, the value will be preserved
  29. # # Ex :
  30. # # lambda x: x in my_global_hash
  31. # # Join values
  32. # 'join': '[glue]', # If present, sources values will be join using the "glue"
  33. #
  34. # # Alternative mapping
  35. # 'or': { [map configuration] } # If this mapping case does not retreive any value, try to get value(s)
  36. # # with this other mapping configuration
  37. # },
  38. # '[dst key 2]': {
  39. # [...]
  40. # }
  41. # }
  42. #
  43. # Return format :
  44. # {
  45. # '[dst key 1]': ['v1','v2', ...],
  46. # '[dst key 2]': [ ... ],
  47. # [...]
  48. # }
  49.  
  50. import logging, re
  51.  
  52. def clean_value(value):
  53. if isinstance(value, int):
  54. value=str(value)
  55. return value.encode('utf8')
  56.  
  57. def map(map_keys,src,dst={}):
  58.  
  59. def get_values(dst_key,src,m):
  60. # Extract sources values
  61. values=[]
  62. if 'other_key' in m:
  63. if m['other_key'] in dst:
  64. values=dst[m['other_key']]
  65. if 'key' in m:
  66. if m['key'] in src and src[m['key']]!='':
  67. values.append(clean_value(src[m['key']]))
  68.  
  69. if 'keys' in m:
  70. for key in m['keys']:
  71. if key in src and src[key]!='':
  72. values.append(clean_value(src[key]))
  73.  
  74. # Clean and convert values
  75. if 'cleanRegex' in m and len(values)>0:
  76. new_values=[]
  77. for v in values:
  78. nv=re.sub(m['cleanRegex'],'',v)
  79. if nv!='':
  80. new_values.append(nv)
  81. values=new_values
  82.  
  83. if 'convert' in m and len(values)>0:
  84. new_values=[]
  85. for v in values:
  86. nv=m['convert'](v)
  87. if nv!='':
  88. new_values.append(nv)
  89. values=new_values
  90.  
  91. # Deduplicate values
  92. if m.get('deduplicate') and len(values)>1:
  93. new_values=[]
  94. for v in values:
  95. if v not in new_values:
  96. new_values.append(v)
  97. values=new_values
  98.  
  99. # Check values
  100. if 'check' in m and len(values)>0:
  101. new_values=[]
  102. for v in values:
  103. if m['check'](v):
  104. new_values.append(v)
  105. else:
  106. logging.debug('Invalid value %s for key %s' % (v,dst_key))
  107. if dst_key not in invalid_values:
  108. invalid_values[dst_key]=[]
  109. if v not in invalid_values[dst_key]:
  110. invalid_values[dst_key].append(v)
  111. values=new_values
  112.  
  113. # Join values
  114. if 'join' in m and len(values)>1:
  115. values=[m['join'].join(values)]
  116.  
  117. # Manage alternative mapping case
  118. if len(values)==0 and 'or' in m:
  119. values=get_values(dst_key,src,m['or'])
  120.  
  121.  
  122. return values
  123.  
  124. for dst_key in sorted(map_keys.keys(), key=lambda x: map_keys[x]['order']):
  125. values=get_values(dst_key,src,map_keys[dst_key])
  126.  
  127. if len(values)==0:
  128. if 'required' in map_keys[dst_key] and map_keys[dst_key]['required']:
  129. logging.debug('Destination key %s could not be filled from source but is required' % dst_key)
  130. return False
  131. continue
  132.  
  133. dst[dst_key]=values
  134. return dst
  135.  
  136.  
  137. if __name__ == '__main__':
  138. logging.basicConfig(level=logging.DEBUG)
  139.  
  140. src={
  141. 'uid': 'hmartin',
  142. 'firstname': 'Martin',
  143. 'lastname': 'Martin',
  144. 'disp_name': 'Henri Martin',
  145. 'line_1': '3 rue de Paris',
  146. 'line_2': 'Pour Pierre',
  147. 'zip_text': '92 120',
  148. 'city_text': 'Montrouge',
  149. 'line_city': '92120 Montrouge',
  150. 'tel1': '01 00 00 00 00',
  151. 'tel2': '09 00 00 00 00',
  152. 'mobile': '06 00 00 00 00',
  153. 'fax': '01 00 00 00 00',
  154. 'email': 'H.MARTIN@GMAIL.COM',
  155. }
  156.  
  157. map_c={
  158. 'uid': {'order': 0, 'key': 'uid','required': True},
  159. 'givenName': {'order': 1, 'key': 'firstname'},
  160. 'sn': {'order': 2, 'key': 'lastname'},
  161. 'cn': {'order': 3, 'key': 'disp_name','required': True, 'or': {'attrs': ['firstname','lastname'],'join': ' '}},
  162. 'displayName': {'order': 4, 'other_key': 'displayName'},
  163. 'street': {'order': 5, 'join': ' / ', 'keys': ['ligne_1','ligne_2']},
  164. 'postalCode': {'order': 6, 'key': 'zip_text', 'cleanRegex': '[^0-9]'},
  165. 'l': {'order': 7, 'key': 'city_text'},
  166. 'postalAddress': {'order': 8, 'join': '$', 'keys': ['ligne_1','ligne_2','ligne_city']},
  167. 'telephoneNumber': {'order': 9, 'keys': ['tel1','tel2'], 'cleanRegex': '[^0-9+]', 'deduplicate': True},
  168. 'mobile': {'order': 10,'key': 'mobile'},
  169. 'facsimileTelephoneNumber': {'order': 11,'key': 'fax'},
  170. 'mail': {'order': 12,'key': 'email', 'convert': lambda x: x.lower().strip()}
  171. }
  172.  
  173. logging.debug('[TEST] Map src=%s / config= %s' % (src,map_c))
  174. logging.debug('[TEST] Result : %s' % map(map_c,src))