The document describes performance testing of a password masking function. It identifies that regular expressions involving JSON keys take the most time (over 1 second) due to their complexity. The function is optimized by precompiling the regular expressions and testing them individually. This shows the slowest expressions involve JSON keys like 'adminPass' and the fastest take less than 0.001 seconds.
2. def mask_password(message, secret="***"):
"""Replace password with 'secret' in message.
:param message: The string which includes security information.
:param secret: value with which to replace passwords.
:returns: The unicode value of message with the password fields masked.
"""
try:
message = six.text_type(message)
except UnicodeDecodeError:
# NOTE(jecarey): Temporary fix to handle cases where message is a
# byte string. A better solution will be provided in Kilo.
pass
# NOTE(ldbragst): Check to see if anything in message contains any key
# specified in _SANITIZE_KEYS, if not then just return the message since
# we don't have to mask any passwords.
if not any(key in message for key in _SANITIZE_KEYS):
return message
substitute = r'g<1>' + secret + r'g<2>'
for pattern in _SANITIZE_PATTERNS_2:
message = re.sub(pattern, substitute, message)
substitute = r'g<1>' + secret
for pattern in _SANITIZE_PATTERNS_1:
message = re.sub(pattern, substitute, message)
return message
4. def mask_password(message, secret="***"):
"""Replace password with 'secret' in message.
:param message: The string which includes security information.
:param secret: value with which to replace passwords.
:returns: The unicode value of message with the password fields masked.
"""
try:
message = six.text_type(message)
except UnicodeDecodeError:
# NOTE(jecarey): Temporary fix to handle cases where message is a
# byte string. A better solution will be provided in Kilo.
pass
# NOTE(ldbragst): Check to see if anything in message contains any key
# specified in _SANITIZE_KEYS, if not then just return the message since
# we don't have to mask any passwords.
if not any(key in message for key in _SANITIZE_KEYS):
return message
substitute = r'g<1>' + secret + r'g<2>'
for pattern in _SANITIZE_PATTERNS_2:
message = re.sub(pattern, substitute, message)
substitute = r'g<1>' + secret
for pattern in _SANITIZE_PATTERNS_1:
message = re.sub(pattern, substitute, message)
return message
5. _SANITIZE_KEYS = ['adminPass', 'admin_pass', 'password', 'admin_password',
'auth_token', 'new_pass', 'auth_password', 'secret_uuid']
# NOTE(ldbragst): Let's build a list of regex objects using the list of
# _SANITIZE_KEYS we already have. This way, we only have to add the new key
# to the list of _SANITIZE_KEYS and we can generate regular expressions
# for XML and JSON automatically.
_SANITIZE_PATTERNS_2 = []
_SANITIZE_PATTERNS_1 = []
# NOTE(amrith): Some regular expressions have only one parameter, some
# have two parameters. Use different lists of patterns here.
_FORMAT_PATTERNS_1 = [r'(%(key)ss*[=]s*)[^s^'^"]+']
_FORMAT_PATTERNS_2 = [r'(%(key)ss*[=]s*["']).*?(["'])',
r'(%(key)ss+["']).*?(["'])',
r'([-]{2}%(key)ss+)[^'^"^=^s]+([s]*)',
r'(<%(key)s>).*?(</%(key)s>)',
r'(["']%(key)s["']s*:s*["']).*?(["'])',
r'(['"].*?%(key)s['"]s*:s*u?['"]).*?(['"])',
r'(['"].*?%(key)s['"]s*,s*'--?[A-z]+'s*,s*u?'
'['"]).*?(['"])',
r'(%(key)ss*--?[A-z]+s*)S+(s*)']
for key in _SANITIZE_KEYS:
for pattern in _FORMAT_PATTERNS_2:
reg_ex = re.compile(pattern % {'key': key}, re.DOTALL)
_SANITIZE_PATTERNS_2.append(reg_ex)
for pattern in _FORMAT_PATTERNS_1:
reg_ex = re.compile(pattern % {'key': key}, re.DOTALL)
_SANITIZE_PATTERNS_1.append(reg_ex)
7. def mask_password(message, secret="***"):
"""Replace password with 'secret' in message.
:param message: The string which includes security information.
:param secret: value with which to replace passwords.
:returns: The unicode value of message with the password fields masked.
"""
try:
message = six.text_type(message)
except UnicodeDecodeError:
# NOTE(jecarey): Temporary fix to handle cases where message is a
# byte string. A better solution will be provided in Kilo.
pass
# NOTE(ldbragst): Check to see if anything in message contains any key
# specified in _SANITIZE_KEYS, if not then just return the message since
# we don't have to mask any passwords.
if not any(key in message for key in _SANITIZE_KEYS):
return message
substitute = r'g<1>' + secret + r'g<2>'
for pattern in _SANITIZE_PATTERNS_2:
message = re.sub(pattern, substitute, message)
substitute = r'g<1>' + secret
for pattern in _SANITIZE_PATTERNS_1:
message = re.sub(pattern, substitute, message)
return message
8. class timeit.Timer(stmt='pass', setup='pass', timer=<timer function>)
timeit(number=1000000)
t = timeit.Timer(
stmt="re.sub(pattern, r'g<1>***g<2>', payload)",
setup="""
import re
payload = '''%s'''
pattern = re.compile(r'''%s''')
""" % (input_str, pattern.pattern))
result = t.timeit(1)
9. #!/usr/bin/env python
from __future__ import print_function
import timeit
import strutils
# A moderately sized input (~50K) string
# http://paste.openstack.org/raw/155864/
infile = 'sample.json'
with open(infile, 'r') as f:
input_str = f.read()
print('payload has %d bytes' % len(input_str))
10. times = []
for pattern in strutils._SANITIZE_PATTERNS_2:
print('ntesting %s' % pattern.pattern)
t = timeit.Timer(
"re.sub(pattern, r'g<1>***g<2>', payload)",
"""
import re
payload = '''%s'''
pattern = re.compile(r'''%s''')
""" % (input_str, pattern.pattern))
result = t.timeit(1)
print(result)
times.append((result, pattern.pattern))
print('nslowest to fastest:')
times = reversed(sorted(times))
for t in times:
print(‘%s - %s’ % t)
print('ntesting all patterns')
t = timeit.Timer(
"strutils.mask_password('''" + input_str + "''')",
"import strutils",
)
print(t.timeit(1))
24. def mask_password(message, secret="***"):
"""Replace password with 'secret' in message.
:param message: The string which includes security information.
:param secret: value with which to replace passwords.
:returns: The unicode value of message with the password fields masked.
"""
try:
message = six.text_type(message)
except UnicodeDecodeError:
# NOTE(jecarey): Temporary fix to handle cases where message is a
# byte string. A better solution will be provided in Kilo.
pass
# NOTE(ldbragst): Check to see if anything in message contains any key
# specified in _SANITIZE_KEYS, if not then just return the message since
# we don't have to mask any passwords.
if not any(key in message for key in _SANITIZE_KEYS):
return message
substitute = r'g<1>' + secret + r'g<2>'
for pattern in _SANITIZE_PATTERNS_2:
message = re.sub(pattern, substitute, message)
substitute = r'g<1>' + secret
for pattern in _SANITIZE_PATTERNS_1:
message = re.sub(pattern, substitute, message)
return message
29. def mask_password(message, secret="***"):
"""Replace password with 'secret' in message.
:param message: The string which includes security information.
:param secret: value with which to replace passwords.
:returns: The unicode value of message with the password fields masked.
"""
try:
message = six.text_type(message)
except UnicodeDecodeError:
# NOTE(jecarey): Temporary fix to handle cases where message is a
# byte string. A better solution will be provided in Kilo.
pass
substitute1 = r'g<1>' + secret
substitute2 = r'g<1>' + secret + r'g<2>'
# NOTE(ldbragst): Check to see if anything in message contains any key
# specified in _SANITIZE_KEYS, if not then just return the message since
# we don't have to mask any passwords.
for key in _SANITIZE_KEYS:
if key in message:
for pattern in _SANITIZE_PATTERNS_2[key]:
message = re.sub(pattern, substitute2, message)
for pattern in _SANITIZE_PATTERNS_1[key]:
message = re.sub(pattern, substitute1, message)
return message
30. # NOTE(ldbragst): Let's build a list of regex objects using the list of
# _SANITIZE_KEYS we already have. This way, we only have to add the new key
# to the list of _SANITIZE_KEYS and we can generate regular expressions
# for XML and JSON automatically.
_SANITIZE_PATTERNS_2 = {}
_SANITIZE_PATTERNS_1 = {}
# NOTE(dhellmann): Keep a separate list of patterns by key so we only
# need to apply the substitutions for keys we find using a quick "in"
# test.
for key in _SANITIZE_KEYS:
_SANITIZE_PATTERNS_1[key] = []
_SANITIZE_PATTERNS_2[key] = []
for pattern in _FORMAT_PATTERNS_2:
reg_ex = re.compile(pattern % {'key': key}, re.DOTALL)
_SANITIZE_PATTERNS_2[key].append(reg_ex)
for pattern in _FORMAT_PATTERNS_1:
reg_ex = re.compile(pattern % {'key': key}, re.DOTALL)
_SANITIZE_PATTERNS_1[key].append(reg_ex)
31. >>> import strutils
>>> strutils._SANITIZE_PATTERNS_2
{'admin_pass': [<_sre.SRE_Pattern object at 0x7fbf4361d9b0>, <_sre.SRE_Pattern
object at 0x7fbf43620660>, <_sre.SRE_Pattern object at 0x7fbf43621200>,
<_sre.SRE_Pattern object at 0x7fbf43621ac0>, <_sre.SRE_Pattern object at
0x7fbf43627e70>, <_sre.SRE_Pattern object at 0x7fbf43629a30>, <_sre.SRE_Pattern
object at 0x7fbf4362a3f0>, <_sre.SRE_Pattern object at 0x7fbf43629fc0>],
'auth_password': [<_sre.SRE_Pattern object at 0x7fbf436322d0>, <_sre.SRE_Pattern
object at 0x7fbf436324a0>, <_sre.SRE_Pattern object at 0x7fbf43632640>,
<_sre.SRE_Pattern object at 0x7fbf43632800>, <_sre.SRE_Pattern object at
0x7fbf43631d60>, <_sre.SRE_Pattern object at 0x7fbf43633200>, <_sre.SRE_Pattern
object at 0x7fbf43633690>, <_sre.SRE_Pattern object at 0x7fbf43633e90>], …}
>>> strutils._SANITIZE_PATTERNS_2.keys()
['admin_pass', 'auth_password', 'adminPass', 'auth_token', 'secret_uuid',
'admin_password', 'new_pass', ‘password']
>>> strutils._SANITIZE_PATTERNS_2[‘admin_pass’]
[<_sre.SRE_Pattern object at 0x7fbf4361d9b0>, …]
32. times = []
for pattern in strutils._SANITIZE_PATTERNS_2['admin_pass']:
print('ntesting %s' % pattern.pattern)
t = timeit.Timer(
"re.sub(pattern, r'g<1>***g<2>', payload)",
"""
import re
payload = '''%s'''
pattern = re.compile(r'''%s''')
""" % (input_str, pattern.pattern))
result = t.timeit(1)
print(result)
times.append((result, pattern.pattern))
print('nslowest to fastest:')
times = reversed(sorted(times))
for t in times:
print('%s - %s ' % t)
print('ntesting all patterns')
t = timeit.Timer(
"strutils.mask_password('''" + input_str + "''')",
"import strutils",
)
print(t.timeit(1))