SlideShare uma empresa Scribd logo
1 de 36
Baixar para ler offline
Regexes and
Performance Testing
Doug Hellmann
PyATL May 2015
def mask_password(message, secret="***"):
"""Replace password with 'secret' in message.
:param message: The string which includes security information.
:param secret: value with which to replace passwords.
:returns: The unicode value of message with the password fields masked.
"""
try:
message = six.text_type(message)
except UnicodeDecodeError:
# NOTE(jecarey): Temporary fix to handle cases where message is a
# byte string. A better solution will be provided in Kilo.
pass
# NOTE(ldbragst): Check to see if anything in message contains any key
# specified in _SANITIZE_KEYS, if not then just return the message since
# we don't have to mask any passwords.
if not any(key in message for key in _SANITIZE_KEYS):
return message
substitute = r'g<1>' + secret + r'g<2>'
for pattern in _SANITIZE_PATTERNS_2:
message = re.sub(pattern, substitute, message)
substitute = r'g<1>' + secret
for pattern in _SANITIZE_PATTERNS_1:
message = re.sub(pattern, substitute, message)
return message
> 3000 times faster
def mask_password(message, secret="***"):
"""Replace password with 'secret' in message.
:param message: The string which includes security information.
:param secret: value with which to replace passwords.
:returns: The unicode value of message with the password fields masked.
"""
try:
message = six.text_type(message)
except UnicodeDecodeError:
# NOTE(jecarey): Temporary fix to handle cases where message is a
# byte string. A better solution will be provided in Kilo.
pass
# NOTE(ldbragst): Check to see if anything in message contains any key
# specified in _SANITIZE_KEYS, if not then just return the message since
# we don't have to mask any passwords.
if not any(key in message for key in _SANITIZE_KEYS):
return message
substitute = r'g<1>' + secret + r'g<2>'
for pattern in _SANITIZE_PATTERNS_2:
message = re.sub(pattern, substitute, message)
substitute = r'g<1>' + secret
for pattern in _SANITIZE_PATTERNS_1:
message = re.sub(pattern, substitute, message)
return message
_SANITIZE_KEYS = ['adminPass', 'admin_pass', 'password', 'admin_password',
'auth_token', 'new_pass', 'auth_password', 'secret_uuid']
# NOTE(ldbragst): Let's build a list of regex objects using the list of
# _SANITIZE_KEYS we already have. This way, we only have to add the new key
# to the list of _SANITIZE_KEYS and we can generate regular expressions
# for XML and JSON automatically.
_SANITIZE_PATTERNS_2 = []
_SANITIZE_PATTERNS_1 = []
# NOTE(amrith): Some regular expressions have only one parameter, some
# have two parameters. Use different lists of patterns here.
_FORMAT_PATTERNS_1 = [r'(%(key)ss*[=]s*)[^s^'^"]+']
_FORMAT_PATTERNS_2 = [r'(%(key)ss*[=]s*["']).*?(["'])',
r'(%(key)ss+["']).*?(["'])',
r'([-]{2}%(key)ss+)[^'^"^=^s]+([s]*)',
r'(<%(key)s>).*?(</%(key)s>)',
r'(["']%(key)s["']s*:s*["']).*?(["'])',
r'(['"].*?%(key)s['"]s*:s*u?['"]).*?(['"])',
r'(['"].*?%(key)s['"]s*,s*'--?[A-z]+'s*,s*u?'
'['"]).*?(['"])',
r'(%(key)ss*--?[A-z]+s*)S+(s*)']
for key in _SANITIZE_KEYS:
for pattern in _FORMAT_PATTERNS_2:
reg_ex = re.compile(pattern % {'key': key}, re.DOTALL)
_SANITIZE_PATTERNS_2.append(reg_ex)
for pattern in _FORMAT_PATTERNS_1:
reg_ex = re.compile(pattern % {'key': key}, re.DOTALL)
_SANITIZE_PATTERNS_1.append(reg_ex)
Bottleneck?
def mask_password(message, secret="***"):
"""Replace password with 'secret' in message.
:param message: The string which includes security information.
:param secret: value with which to replace passwords.
:returns: The unicode value of message with the password fields masked.
"""
try:
message = six.text_type(message)
except UnicodeDecodeError:
# NOTE(jecarey): Temporary fix to handle cases where message is a
# byte string. A better solution will be provided in Kilo.
pass
# NOTE(ldbragst): Check to see if anything in message contains any key
# specified in _SANITIZE_KEYS, if not then just return the message since
# we don't have to mask any passwords.
if not any(key in message for key in _SANITIZE_KEYS):
return message
substitute = r'g<1>' + secret + r'g<2>'
for pattern in _SANITIZE_PATTERNS_2:
message = re.sub(pattern, substitute, message)
substitute = r'g<1>' + secret
for pattern in _SANITIZE_PATTERNS_1:
message = re.sub(pattern, substitute, message)
return message
class timeit.Timer(stmt='pass', setup='pass', timer=<timer function>)
timeit(number=1000000)
t = timeit.Timer(
stmt="re.sub(pattern, r'g<1>***g<2>', payload)",
setup="""
import re
payload = '''%s'''
pattern = re.compile(r'''%s''')
""" % (input_str, pattern.pattern))
result = t.timeit(1)
#!/usr/bin/env python
from __future__ import print_function
import timeit
import strutils
# A moderately sized input (~50K) string
# http://paste.openstack.org/raw/155864/
infile = 'sample.json'
with open(infile, 'r') as f:
input_str = f.read()
print('payload has %d bytes' % len(input_str))
times = []
for pattern in strutils._SANITIZE_PATTERNS_2:
print('ntesting %s' % pattern.pattern)
t = timeit.Timer(
"re.sub(pattern, r'g<1>***g<2>', payload)",
"""
import re
payload = '''%s'''
pattern = re.compile(r'''%s''')
""" % (input_str, pattern.pattern))
result = t.timeit(1)
print(result)
times.append((result, pattern.pattern))
print('nslowest to fastest:')
times = reversed(sorted(times))
for t in times:
print(‘%s - %s’ % t)
print('ntesting all patterns')
t = timeit.Timer(
"strutils.mask_password('''" + input_str + "''')",
"import strutils",
)
print(t.timeit(1))
$ python perf_test_mask_password.py
payload has 50000 bytes
testing (adminPasss*[=]s*["']).*?(["'])
0.000424146652222
testing (adminPasss+["']).*?(["'])
0.000438928604126
testing ([-]{2}adminPasss+)[^'^"^=^s]+([s]*)
0.000957012176514
testing (<adminPass>).*?(</adminPass>)
0.000320911407471
testing (["']adminPass["']s*:s*["']).*?(["'])
0.000932931900024
testing (['"].*?adminPass['"]s*:s*u?['"]).*?(['"])
1.39579486847
testing (['"].*?adminPass['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.38312697411
testing (adminPasss*--?[A-z]+s*)S+(s*)
0.000363111495972
testing (admin_passs*[=]s*["']).*?(["'])
0.000358819961548
testing (admin_passs+["']).*?(["'])
0.000358104705811
1.38247299194 - (['"].*?adminPass['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.36443901062 - (['"].*?auth_password['"]s*:s*u?['"]).*?(['"])
1.36225605011 - (['"].*?auth_token['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.35945916176 - (['"].*?admin_pass['"]s*:s*u?['"]).*?(['"])
1.358741045 - (['"].*?admin_password['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?
(['"])
1.35629177094 - (['"].*?admin_password['"]s*:s*u?['"]).*?(['"])
1.35611796379 - (['"].*?new_pass['"]s*:s*u?['"]).*?(['"])
1.35091495514 - (['"].*?auth_password['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?
(['"])
1.34805893898 - (['"].*?new_pass['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.34525895119 - (['"].*?admin_pass['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.3426618576 - (['"].*?adminPass['"]s*:s*u?['"]).*?(['"])
1.33739089966 - (['"].*?secret_uuid['"]s*:s*u?['"]).*?(['"])
1.31182909012 - (['"].*?secret_uuid['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.30403113365 - (['"].*?auth_token['"]s*:s*u?['"]).*?(['"])
1.29068493843 - (['"].*?password['"]s*:s*u?['"]).*?(['"])
1.28651189804 - (['"].*?password['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
0.00123500823975 - (["']password["']s*:s*["']).*?(["'])
0.00123286247253 - (["']adminPass["']s*:s*["']).*?(["'])
$ cat results
1.38247299194 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.36443901062 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"])
1.36225605011 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.35945916176 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"])
1.358741045 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.35629177094 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"])
1.35611796379 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"])
1.35091495514 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.34805893898 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.34525895119 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.3426618576 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"])
1.33739089966 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"])
1.31182909012 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
1.30403113365 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"])
1.29068493843 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"])
1.28651189804 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
$ cat results | sed -E 's/^.{16}/ /g'
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
$ cat results | sed -E 's/^.{16}/ /g’ | sort
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
$ cat results | sed -E 's/^.{16}/ /g’ | sort -u
(['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
(['"].*?KEY['"]s*:s*u?['"]).*?(['"])
([’"].*?KEY['"]s*:s*u?['"]).*?(['"])
“admin_pass”: “super secret”
“admin_pass”: “***”
( start group
[’”] quote
.*? anything
KEY literal key
[’”] quote
s*:s* colon, maybe with whitespace
u? optional literal u
[’”] quote
) end group
.*? anything
([’"]) group with quote
group 1: “admin_pass”: “
group 2: “
([’"].*?KEY['"]s*:s*u?['"]).*?(['"])
“admin_pass”: “super secret”
“admin_pass”: “***”
( start group
[’”] quote
.*? anything
KEY literal key
[’”] quote
s*:s* colon, maybe with whitespace
u? optional literal u
[’”] quote
) end group
.*? anything
([’"]) group with quote
group 1: “admin_pass”: “
group 2: “
(”)(.*?)(”)
“super secret”
“
“s
“su
“sup
“supe
“super
“super
“super s
“super se
“super sec
“super secr
“super secret
“super secret
“super secret”
(“)(.*)(”)
“super secret”
“super secret”
“super secret
“super secret”
Change 1:
Be Specific In Patterns
([’"].*?KEY['"]s*:s*u?['"]).*?(['"])
(['"][^"']*KEY['"]s*:s*u?['"])[^"']*(['"])
Before:
testing all patterns
22.2821819782
After:
testing all patterns
0.0565450191498
22.28 / 0.0565
= 394.3
def mask_password(message, secret="***"):
"""Replace password with 'secret' in message.
:param message: The string which includes security information.
:param secret: value with which to replace passwords.
:returns: The unicode value of message with the password fields masked.
"""
try:
message = six.text_type(message)
except UnicodeDecodeError:
# NOTE(jecarey): Temporary fix to handle cases where message is a
# byte string. A better solution will be provided in Kilo.
pass
# NOTE(ldbragst): Check to see if anything in message contains any key
# specified in _SANITIZE_KEYS, if not then just return the message since
# we don't have to mask any passwords.
if not any(key in message for key in _SANITIZE_KEYS):
return message
substitute = r'g<1>' + secret + r'g<2>'
for pattern in _SANITIZE_PATTERNS_2:
message = re.sub(pattern, substitute, message)
substitute = r'g<1>' + secret
for pattern in _SANITIZE_PATTERNS_1:
message = re.sub(pattern, substitute, message)
return message
8 keys * 10 templates
= 80 patterns
#!/usr/bin/env python
from __future__ import print_function
import timeit
infile = 'sample.json'
with open(infile, 'r') as f:
input_str = f.read()
t = timeit.Timer(
"re.sub(pattern, r'g<1>***g<2>', payload)",
"""
import re
payload = '''%s'''
pattern = re.compile(r'admin_pass')
""" % input_str)
print('regex : %10.10f' % t.timeit(5))
t = timeit.Timer(
"'admin_pass' in payload",
"payload = '''%s'''" % input_str,
)
print('literal: %10.10f' % t.timeit(5))
$ python literal_vs_regex.py
regex : 0.0003869534
literal: 0.0001130104
Change 2:
Look at Fewer Patterns
def mask_password(message, secret="***"):
"""Replace password with 'secret' in message.
:param message: The string which includes security information.
:param secret: value with which to replace passwords.
:returns: The unicode value of message with the password fields masked.
"""
try:
message = six.text_type(message)
except UnicodeDecodeError:
# NOTE(jecarey): Temporary fix to handle cases where message is a
# byte string. A better solution will be provided in Kilo.
pass
substitute1 = r'g<1>' + secret
substitute2 = r'g<1>' + secret + r'g<2>'
# NOTE(ldbragst): Check to see if anything in message contains any key
# specified in _SANITIZE_KEYS, if not then just return the message since
# we don't have to mask any passwords.
for key in _SANITIZE_KEYS:
if key in message:
for pattern in _SANITIZE_PATTERNS_2[key]:
message = re.sub(pattern, substitute2, message)
for pattern in _SANITIZE_PATTERNS_1[key]:
message = re.sub(pattern, substitute1, message)
return message
# NOTE(ldbragst): Let's build a list of regex objects using the list of
# _SANITIZE_KEYS we already have. This way, we only have to add the new key
# to the list of _SANITIZE_KEYS and we can generate regular expressions
# for XML and JSON automatically.
_SANITIZE_PATTERNS_2 = {}
_SANITIZE_PATTERNS_1 = {}
# NOTE(dhellmann): Keep a separate list of patterns by key so we only
# need to apply the substitutions for keys we find using a quick "in"
# test.
for key in _SANITIZE_KEYS:
_SANITIZE_PATTERNS_1[key] = []
_SANITIZE_PATTERNS_2[key] = []
for pattern in _FORMAT_PATTERNS_2:
reg_ex = re.compile(pattern % {'key': key}, re.DOTALL)
_SANITIZE_PATTERNS_2[key].append(reg_ex)
for pattern in _FORMAT_PATTERNS_1:
reg_ex = re.compile(pattern % {'key': key}, re.DOTALL)
_SANITIZE_PATTERNS_1[key].append(reg_ex)
>>> import strutils
>>> strutils._SANITIZE_PATTERNS_2
{'admin_pass': [<_sre.SRE_Pattern object at 0x7fbf4361d9b0>, <_sre.SRE_Pattern
object at 0x7fbf43620660>, <_sre.SRE_Pattern object at 0x7fbf43621200>,
<_sre.SRE_Pattern object at 0x7fbf43621ac0>, <_sre.SRE_Pattern object at
0x7fbf43627e70>, <_sre.SRE_Pattern object at 0x7fbf43629a30>, <_sre.SRE_Pattern
object at 0x7fbf4362a3f0>, <_sre.SRE_Pattern object at 0x7fbf43629fc0>],
'auth_password': [<_sre.SRE_Pattern object at 0x7fbf436322d0>, <_sre.SRE_Pattern
object at 0x7fbf436324a0>, <_sre.SRE_Pattern object at 0x7fbf43632640>,
<_sre.SRE_Pattern object at 0x7fbf43632800>, <_sre.SRE_Pattern object at
0x7fbf43631d60>, <_sre.SRE_Pattern object at 0x7fbf43633200>, <_sre.SRE_Pattern
object at 0x7fbf43633690>, <_sre.SRE_Pattern object at 0x7fbf43633e90>], …}
>>> strutils._SANITIZE_PATTERNS_2.keys()
['admin_pass', 'auth_password', 'adminPass', 'auth_token', 'secret_uuid',
'admin_password', 'new_pass', ‘password']
>>> strutils._SANITIZE_PATTERNS_2[‘admin_pass’]
[<_sre.SRE_Pattern object at 0x7fbf4361d9b0>, …]
times = []
for pattern in strutils._SANITIZE_PATTERNS_2['admin_pass']:
print('ntesting %s' % pattern.pattern)
t = timeit.Timer(
"re.sub(pattern, r'g<1>***g<2>', payload)",
"""
import re
payload = '''%s'''
pattern = re.compile(r'''%s''')
""" % (input_str, pattern.pattern))
result = t.timeit(1)
print(result)
times.append((result, pattern.pattern))
print('nslowest to fastest:')
times = reversed(sorted(times))
for t in times:
print('%s - %s ' % t)
print('ntesting all patterns')
t = timeit.Timer(
"strutils.mask_password('''" + input_str + "''')",
"import strutils",
)
print(t.timeit(1))
$ python ./perf_test_mask_password.py
payload has 50000 bytes
testing (admin_passs*[=]s*["'])[^"']*(["'])
0.000396966934204
testing (admin_passs+["'])[^"']*(["'])
0.000360012054443
testing ([-]{2}admin_passs+)[^'^"^=^s]+([s]*)
0.00097393989563
testing (<admin_pass>)[^<]*(</admin_pass>)
0.00052809715271
testing (["']admin_pass["']s*:s*["'])[^"']*(["'])
0.00141596794128
testing (['"][^"']*admin_pass['"]s*:s*u?['"])[^"']*(['"])
0.00194191932678
testing (['"][^'"]*admin_pass['"]s*,s*'--?[A-z]+'s*,s*u?['"])
[^"']*(['"])
0.00175809860229
testing (admin_passs*--?[A-z]+s*)S+(s*)
0.000356912612915
slowest to fastest:
0.00194191932678 - (['"][^"']*admin_pass['"]s*:s*u?['"])[^"']*(['"])
0.00175809860229 - (['"][^'"]*admin_pass['"]s*,s*'--?[A-z]+'s*,s*u?
['"])[^"']*(['"])
0.00141596794128 - (["']admin_pass["']s*:s*["'])[^"']*(["'])
0.00097393989563 - ([-]{2}admin_passs+)[^'^"^=^s]+([s]*)
0.00052809715271 - (<admin_pass>)[^<]*(</admin_pass>)
0.000396966934204 - (admin_passs*[=]s*["'])[^"']*(["'])
0.000360012054443 - (admin_passs+["'])[^"']*(["'])
0.000356912612915 - (admin_passs*--?[A-z]+s*)S+(s*)
testing all patterns
0.00732088088989
22.28 / 0.0073 = 3052
References
• https://github.com/dhellmann/pyatl-regex-performance
• https://review.openstack.org/#/c/145562/
• http://git.openstack.org/cgit/openstack/oslo.utils/tree/oslo_utils/
strutils.py
• https://docs.python.org/2.7/library/re.html
• https://docs.python.org/2/howto/regex.html#greedy-versus-non-
greedy
• https://docs.python.org/2.7/library/timeit.html
• Mastering Regular Expressions, Jeffrey E.F. Friedl

Mais conteúdo relacionado

Mais procurados

Mobile Email Security
Mobile Email SecurityMobile Email Security
Mobile Email Security
Rahul Sihag
 
Design Patterns in PHP5
Design Patterns in PHP5 Design Patterns in PHP5
Design Patterns in PHP5
Wildan Maulana
 

Mais procurados (20)

DRYing to Monad in Java8
DRYing to Monad in Java8DRYing to Monad in Java8
DRYing to Monad in Java8
 
Computer Programming- Lecture 10
Computer Programming- Lecture 10Computer Programming- Lecture 10
Computer Programming- Lecture 10
 
Making the most of 2.2
Making the most of 2.2Making the most of 2.2
Making the most of 2.2
 
Specs2
Specs2Specs2
Specs2
 
Functions
FunctionsFunctions
Functions
 
Computer Programming- Lecture 6
Computer Programming- Lecture 6Computer Programming- Lecture 6
Computer Programming- Lecture 6
 
Mobile Email Security
Mobile Email SecurityMobile Email Security
Mobile Email Security
 
Computer Programming- Lecture 7
Computer Programming- Lecture 7Computer Programming- Lecture 7
Computer Programming- Lecture 7
 
EventMachine for RubyFuZa 2012
EventMachine for RubyFuZa   2012EventMachine for RubyFuZa   2012
EventMachine for RubyFuZa 2012
 
Computer Programming- Lecture 9
Computer Programming- Lecture 9Computer Programming- Lecture 9
Computer Programming- Lecture 9
 
Lecture 12: Classes and Files
Lecture 12: Classes and FilesLecture 12: Classes and Files
Lecture 12: Classes and Files
 
Steady with ruby
Steady with rubySteady with ruby
Steady with ruby
 
Design Patterns in PHP5
Design Patterns in PHP5 Design Patterns in PHP5
Design Patterns in PHP5
 
Lenses and Prisms in Swift - Elviro Rocca - Codemotion Rome 2018
Lenses and Prisms in Swift - Elviro Rocca - Codemotion Rome 2018 Lenses and Prisms in Swift - Elviro Rocca - Codemotion Rome 2018
Lenses and Prisms in Swift - Elviro Rocca - Codemotion Rome 2018
 
jRuby: The best of both worlds
jRuby: The best of both worldsjRuby: The best of both worlds
jRuby: The best of both worlds
 
From java to kotlin beyond alt+shift+cmd+k - Droidcon italy
From java to kotlin beyond alt+shift+cmd+k - Droidcon italyFrom java to kotlin beyond alt+shift+cmd+k - Droidcon italy
From java to kotlin beyond alt+shift+cmd+k - Droidcon italy
 
Introduction to Groovy
Introduction to GroovyIntroduction to Groovy
Introduction to Groovy
 
Computer Programming- Lecture 4
Computer Programming- Lecture 4Computer Programming- Lecture 4
Computer Programming- Lecture 4
 
Ip project
Ip projectIp project
Ip project
 
Lập trình Python cơ bản
Lập trình Python cơ bảnLập trình Python cơ bản
Lập trình Python cơ bản
 

Destaque (8)

La Revolución Digital - José Manuel Casas
La Revolución Digital - José Manuel CasasLa Revolución Digital - José Manuel Casas
La Revolución Digital - José Manuel Casas
 
Ajax: User Experience
Ajax: User ExperienceAjax: User Experience
Ajax: User Experience
 
Functional Pattern Matching on Python
Functional Pattern Matching on PythonFunctional Pattern Matching on Python
Functional Pattern Matching on Python
 
Python - Lecture 7
Python - Lecture 7Python - Lecture 7
Python - Lecture 7
 
Python advanced 2. regular expression in python
Python advanced 2. regular expression in pythonPython advanced 2. regular expression in python
Python advanced 2. regular expression in python
 
Intro to Python Data Analysis in Wakari
Intro to Python Data Analysis in WakariIntro to Python Data Analysis in Wakari
Intro to Python Data Analysis in Wakari
 
Regular expressions in Python
Regular expressions in PythonRegular expressions in Python
Regular expressions in Python
 
Object Oriented Programming in Python
Object Oriented Programming in PythonObject Oriented Programming in Python
Object Oriented Programming in Python
 

Semelhante a Regexes and-performance-testing

please code in c#- please note that im a complete beginner- northwind.docx
please code in c#- please note that im a complete beginner-  northwind.docxplease code in c#- please note that im a complete beginner-  northwind.docx
please code in c#- please note that im a complete beginner- northwind.docx
AustinaGRPaigey
 
Magicke metody v Pythonu
Magicke metody v PythonuMagicke metody v Pythonu
Magicke metody v Pythonu
Jirka Vejrazka
 
Having issues with passing my values through different functions aft.pdf
Having issues with passing my values through different functions aft.pdfHaving issues with passing my values through different functions aft.pdf
Having issues with passing my values through different functions aft.pdf
rajkumarm401
 

Semelhante a Regexes and-performance-testing (20)

CoffeeScript - A Rubyist's Love Affair
CoffeeScript - A Rubyist's Love AffairCoffeeScript - A Rubyist's Love Affair
CoffeeScript - A Rubyist's Love Affair
 
Five
FiveFive
Five
 
Test and refactoring
Test and refactoringTest and refactoring
Test and refactoring
 
Swift Sequences & Collections
Swift Sequences & CollectionsSwift Sequences & Collections
Swift Sequences & Collections
 
Creating Domain Specific Languages in Python
Creating Domain Specific Languages in PythonCreating Domain Specific Languages in Python
Creating Domain Specific Languages in Python
 
Detect Negative and Positive sentiment in user reviews using python word2vec ...
Detect Negative and Positive sentiment in user reviews using python word2vec ...Detect Negative and Positive sentiment in user reviews using python word2vec ...
Detect Negative and Positive sentiment in user reviews using python word2vec ...
 
please code in c#- please note that im a complete beginner- northwind.docx
please code in c#- please note that im a complete beginner-  northwind.docxplease code in c#- please note that im a complete beginner-  northwind.docx
please code in c#- please note that im a complete beginner- northwind.docx
 
SITCON 雲林定期聚 #1
SITCON 雲林定期聚 #1SITCON 雲林定期聚 #1
SITCON 雲林定期聚 #1
 
Chapter 2 Python Language Basics, IPython.pptx
Chapter 2 Python Language Basics, IPython.pptxChapter 2 Python Language Basics, IPython.pptx
Chapter 2 Python Language Basics, IPython.pptx
 
Magicke metody v Pythonu
Magicke metody v PythonuMagicke metody v Pythonu
Magicke metody v Pythonu
 
Fundamentals of Cryptography - Caesar Cipher - Python
Fundamentals of Cryptography - Caesar Cipher - Python Fundamentals of Cryptography - Caesar Cipher - Python
Fundamentals of Cryptography - Caesar Cipher - Python
 
Function Procedure Trigger Partition.pdf
Function Procedure Trigger Partition.pdfFunction Procedure Trigger Partition.pdf
Function Procedure Trigger Partition.pdf
 
Python dictionary : past, present, future
Python dictionary: past, present, futurePython dictionary: past, present, future
Python dictionary : past, present, future
 
UNIT- 2 PPDS R20.pptx
UNIT- 2 PPDS R20.pptxUNIT- 2 PPDS R20.pptx
UNIT- 2 PPDS R20.pptx
 
Beautiful python - PyLadies
Beautiful python - PyLadiesBeautiful python - PyLadies
Beautiful python - PyLadies
 
Semaphore.pptx
Semaphore.pptxSemaphore.pptx
Semaphore.pptx
 
R code for data manipulation
R code for data manipulationR code for data manipulation
R code for data manipulation
 
R code for data manipulation
R code for data manipulationR code for data manipulation
R code for data manipulation
 
Having issues with passing my values through different functions aft.pdf
Having issues with passing my values through different functions aft.pdfHaving issues with passing my values through different functions aft.pdf
Having issues with passing my values through different functions aft.pdf
 
The Zen of Lithium
The Zen of LithiumThe Zen of Lithium
The Zen of Lithium
 

Mais de doughellmann

How I Built a Power Debugger Out of the Standard Library and Things I Found o...
How I Built a Power Debugger Out of the Standard Library and Things I Found o...How I Built a Power Debugger Out of the Standard Library and Things I Found o...
How I Built a Power Debugger Out of the Standard Library and Things I Found o...
doughellmann
 

Mais de doughellmann (15)

Reno: A new way to manage release notes
Reno: A new way to manage release notesReno: A new way to manage release notes
Reno: A new way to manage release notes
 
Reno A New Way to Manage Release Notes
Reno   A New Way to Manage Release NotesReno   A New Way to Manage Release Notes
Reno A New Way to Manage Release Notes
 
How OpenStack Makes Python Better (and vice-versa)
How OpenStack Makes Python Better (and vice-versa)How OpenStack Makes Python Better (and vice-versa)
How OpenStack Makes Python Better (and vice-versa)
 
How I Built a Power Debugger Out of the Standard Library and Things I Found o...
How I Built a Power Debugger Out of the Standard Library and Things I Found o...How I Built a Power Debugger Out of the Standard Library and Things I Found o...
How I Built a Power Debugger Out of the Standard Library and Things I Found o...
 
Rolling with the Times: Using wheels, pbr, and Twine for Distributing and Ins...
Rolling with the Times: Using wheels, pbr, and Twine for Distributing and Ins...Rolling with the Times: Using wheels, pbr, and Twine for Distributing and Ins...
Rolling with the Times: Using wheels, pbr, and Twine for Distributing and Ins...
 
Herding cats into boxes
Herding cats into boxesHerding cats into boxes
Herding cats into boxes
 
How I Built a Power Debugger Out of the Standard Library and Things I Found o...
How I Built a Power Debugger Out of the Standard Library and Things I Found o...How I Built a Power Debugger Out of the Standard Library and Things I Found o...
How I Built a Power Debugger Out of the Standard Library and Things I Found o...
 
OpenStack 5th Birthday
OpenStack 5th BirthdayOpenStack 5th Birthday
OpenStack 5th Birthday
 
OpenStack Atlanta-2014-12-18
OpenStack Atlanta-2014-12-18OpenStack Atlanta-2014-12-18
OpenStack Atlanta-2014-12-18
 
Taking the Long View: How the Oslo Program Reduces Technical Debt
Taking the Long View: How the Oslo Program Reduces Technical DebtTaking the Long View: How the Oslo Program Reduces Technical Debt
Taking the Long View: How the Oslo Program Reduces Technical Debt
 
Oslo Program Overview, OpenStack Atlanta
Oslo Program Overview, OpenStack AtlantaOslo Program Overview, OpenStack Atlanta
Oslo Program Overview, OpenStack Atlanta
 
Dynamic Code Patterns: Extending Your Applications with Plugins
Dynamic Code Patterns: Extending Your Applications with PluginsDynamic Code Patterns: Extending Your Applications with Plugins
Dynamic Code Patterns: Extending Your Applications with Plugins
 
Better Documentation Through Automation: Creating docutils & Sphinx Extensions
Better Documentation Through Automation: Creating docutils & Sphinx ExtensionsBetter Documentation Through Automation: Creating docutils & Sphinx Extensions
Better Documentation Through Automation: Creating docutils & Sphinx Extensions
 
Hidden Treasures of the Python Standard Library
Hidden Treasures of the Python Standard LibraryHidden Treasures of the Python Standard Library
Hidden Treasures of the Python Standard Library
 
An Introduction to the Zen of Python
An Introduction to the Zen of PythonAn Introduction to the Zen of Python
An Introduction to the Zen of Python
 

Último

Abortion Pills In Pretoria ](+27832195400*)[ 🏥 Women's Abortion Clinic In Pre...
Abortion Pills In Pretoria ](+27832195400*)[ 🏥 Women's Abortion Clinic In Pre...Abortion Pills In Pretoria ](+27832195400*)[ 🏥 Women's Abortion Clinic In Pre...
Abortion Pills In Pretoria ](+27832195400*)[ 🏥 Women's Abortion Clinic In Pre...
Medical / Health Care (+971588192166) Mifepristone and Misoprostol tablets 200mg
 
%+27788225528 love spells in new york Psychic Readings, Attraction spells,Bri...
%+27788225528 love spells in new york Psychic Readings, Attraction spells,Bri...%+27788225528 love spells in new york Psychic Readings, Attraction spells,Bri...
%+27788225528 love spells in new york Psychic Readings, Attraction spells,Bri...
masabamasaba
 
%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...
%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...
%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...
masabamasaba
 
%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...
%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...
%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...
masabamasaba
 
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
Health
 

Último (20)

WSO2CON2024 - It's time to go Platformless
WSO2CON2024 - It's time to go PlatformlessWSO2CON2024 - It's time to go Platformless
WSO2CON2024 - It's time to go Platformless
 
WSO2CON 2024 - Cloud Native Middleware: Domain-Driven Design, Cell-Based Arch...
WSO2CON 2024 - Cloud Native Middleware: Domain-Driven Design, Cell-Based Arch...WSO2CON 2024 - Cloud Native Middleware: Domain-Driven Design, Cell-Based Arch...
WSO2CON 2024 - Cloud Native Middleware: Domain-Driven Design, Cell-Based Arch...
 
Microsoft AI Transformation Partner Playbook.pdf
Microsoft AI Transformation Partner Playbook.pdfMicrosoft AI Transformation Partner Playbook.pdf
Microsoft AI Transformation Partner Playbook.pdf
 
8257 interfacing 2 in microprocessor for btech students
8257 interfacing 2 in microprocessor for btech students8257 interfacing 2 in microprocessor for btech students
8257 interfacing 2 in microprocessor for btech students
 
%in Bahrain+277-882-255-28 abortion pills for sale in Bahrain
%in Bahrain+277-882-255-28 abortion pills for sale in Bahrain%in Bahrain+277-882-255-28 abortion pills for sale in Bahrain
%in Bahrain+277-882-255-28 abortion pills for sale in Bahrain
 
Abortion Pills In Pretoria ](+27832195400*)[ 🏥 Women's Abortion Clinic In Pre...
Abortion Pills In Pretoria ](+27832195400*)[ 🏥 Women's Abortion Clinic In Pre...Abortion Pills In Pretoria ](+27832195400*)[ 🏥 Women's Abortion Clinic In Pre...
Abortion Pills In Pretoria ](+27832195400*)[ 🏥 Women's Abortion Clinic In Pre...
 
%+27788225528 love spells in new york Psychic Readings, Attraction spells,Bri...
%+27788225528 love spells in new york Psychic Readings, Attraction spells,Bri...%+27788225528 love spells in new york Psychic Readings, Attraction spells,Bri...
%+27788225528 love spells in new york Psychic Readings, Attraction spells,Bri...
 
%in Stilfontein+277-882-255-28 abortion pills for sale in Stilfontein
%in Stilfontein+277-882-255-28 abortion pills for sale in Stilfontein%in Stilfontein+277-882-255-28 abortion pills for sale in Stilfontein
%in Stilfontein+277-882-255-28 abortion pills for sale in Stilfontein
 
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
 
WSO2CON 2024 - WSO2's Digital Transformation Journey with Choreo: A Platforml...
WSO2CON 2024 - WSO2's Digital Transformation Journey with Choreo: A Platforml...WSO2CON 2024 - WSO2's Digital Transformation Journey with Choreo: A Platforml...
WSO2CON 2024 - WSO2's Digital Transformation Journey with Choreo: A Platforml...
 
%in Harare+277-882-255-28 abortion pills for sale in Harare
%in Harare+277-882-255-28 abortion pills for sale in Harare%in Harare+277-882-255-28 abortion pills for sale in Harare
%in Harare+277-882-255-28 abortion pills for sale in Harare
 
tonesoftg
tonesoftgtonesoftg
tonesoftg
 
%in tembisa+277-882-255-28 abortion pills for sale in tembisa
%in tembisa+277-882-255-28 abortion pills for sale in tembisa%in tembisa+277-882-255-28 abortion pills for sale in tembisa
%in tembisa+277-882-255-28 abortion pills for sale in tembisa
 
%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...
%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...
%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...
 
%in Midrand+277-882-255-28 abortion pills for sale in midrand
%in Midrand+277-882-255-28 abortion pills for sale in midrand%in Midrand+277-882-255-28 abortion pills for sale in midrand
%in Midrand+277-882-255-28 abortion pills for sale in midrand
 
%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...
%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...
%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...
 
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
 
%in Soweto+277-882-255-28 abortion pills for sale in soweto
%in Soweto+277-882-255-28 abortion pills for sale in soweto%in Soweto+277-882-255-28 abortion pills for sale in soweto
%in Soweto+277-882-255-28 abortion pills for sale in soweto
 
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
Shapes for Sharing between Graph Data Spaces - and Epistemic Querying of RDF-...
 
Announcing Codolex 2.0 from GDK Software
Announcing Codolex 2.0 from GDK SoftwareAnnouncing Codolex 2.0 from GDK Software
Announcing Codolex 2.0 from GDK Software
 

Regexes and-performance-testing

  • 1. Regexes and Performance Testing Doug Hellmann PyATL May 2015
  • 2. def mask_password(message, secret="***"): """Replace password with 'secret' in message. :param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """ try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass # NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message substitute = r'g<1>' + secret + r'g<2>' for pattern in _SANITIZE_PATTERNS_2: message = re.sub(pattern, substitute, message) substitute = r'g<1>' + secret for pattern in _SANITIZE_PATTERNS_1: message = re.sub(pattern, substitute, message) return message
  • 3. > 3000 times faster
  • 4. def mask_password(message, secret="***"): """Replace password with 'secret' in message. :param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """ try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass # NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message substitute = r'g<1>' + secret + r'g<2>' for pattern in _SANITIZE_PATTERNS_2: message = re.sub(pattern, substitute, message) substitute = r'g<1>' + secret for pattern in _SANITIZE_PATTERNS_1: message = re.sub(pattern, substitute, message) return message
  • 5. _SANITIZE_KEYS = ['adminPass', 'admin_pass', 'password', 'admin_password', 'auth_token', 'new_pass', 'auth_password', 'secret_uuid'] # NOTE(ldbragst): Let's build a list of regex objects using the list of # _SANITIZE_KEYS we already have. This way, we only have to add the new key # to the list of _SANITIZE_KEYS and we can generate regular expressions # for XML and JSON automatically. _SANITIZE_PATTERNS_2 = [] _SANITIZE_PATTERNS_1 = [] # NOTE(amrith): Some regular expressions have only one parameter, some # have two parameters. Use different lists of patterns here. _FORMAT_PATTERNS_1 = [r'(%(key)ss*[=]s*)[^s^'^"]+'] _FORMAT_PATTERNS_2 = [r'(%(key)ss*[=]s*["']).*?(["'])', r'(%(key)ss+["']).*?(["'])', r'([-]{2}%(key)ss+)[^'^"^=^s]+([s]*)', r'(<%(key)s>).*?(</%(key)s>)', r'(["']%(key)s["']s*:s*["']).*?(["'])', r'(['"].*?%(key)s['"]s*:s*u?['"]).*?(['"])', r'(['"].*?%(key)s['"]s*,s*'--?[A-z]+'s*,s*u?' '['"]).*?(['"])', r'(%(key)ss*--?[A-z]+s*)S+(s*)'] for key in _SANITIZE_KEYS: for pattern in _FORMAT_PATTERNS_2: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) _SANITIZE_PATTERNS_2.append(reg_ex) for pattern in _FORMAT_PATTERNS_1: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) _SANITIZE_PATTERNS_1.append(reg_ex)
  • 7. def mask_password(message, secret="***"): """Replace password with 'secret' in message. :param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """ try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass # NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message substitute = r'g<1>' + secret + r'g<2>' for pattern in _SANITIZE_PATTERNS_2: message = re.sub(pattern, substitute, message) substitute = r'g<1>' + secret for pattern in _SANITIZE_PATTERNS_1: message = re.sub(pattern, substitute, message) return message
  • 8. class timeit.Timer(stmt='pass', setup='pass', timer=<timer function>) timeit(number=1000000) t = timeit.Timer( stmt="re.sub(pattern, r'g<1>***g<2>', payload)", setup=""" import re payload = '''%s''' pattern = re.compile(r'''%s''') """ % (input_str, pattern.pattern)) result = t.timeit(1)
  • 9. #!/usr/bin/env python from __future__ import print_function import timeit import strutils # A moderately sized input (~50K) string # http://paste.openstack.org/raw/155864/ infile = 'sample.json' with open(infile, 'r') as f: input_str = f.read() print('payload has %d bytes' % len(input_str))
  • 10. times = [] for pattern in strutils._SANITIZE_PATTERNS_2: print('ntesting %s' % pattern.pattern) t = timeit.Timer( "re.sub(pattern, r'g<1>***g<2>', payload)", """ import re payload = '''%s''' pattern = re.compile(r'''%s''') """ % (input_str, pattern.pattern)) result = t.timeit(1) print(result) times.append((result, pattern.pattern)) print('nslowest to fastest:') times = reversed(sorted(times)) for t in times: print(‘%s - %s’ % t) print('ntesting all patterns') t = timeit.Timer( "strutils.mask_password('''" + input_str + "''')", "import strutils", ) print(t.timeit(1))
  • 11. $ python perf_test_mask_password.py payload has 50000 bytes testing (adminPasss*[=]s*["']).*?(["']) 0.000424146652222 testing (adminPasss+["']).*?(["']) 0.000438928604126 testing ([-]{2}adminPasss+)[^'^"^=^s]+([s]*) 0.000957012176514 testing (<adminPass>).*?(</adminPass>) 0.000320911407471 testing (["']adminPass["']s*:s*["']).*?(["']) 0.000932931900024 testing (['"].*?adminPass['"]s*:s*u?['"]).*?(['"]) 1.39579486847 testing (['"].*?adminPass['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.38312697411 testing (adminPasss*--?[A-z]+s*)S+(s*) 0.000363111495972 testing (admin_passs*[=]s*["']).*?(["']) 0.000358819961548 testing (admin_passs+["']).*?(["']) 0.000358104705811
  • 12. 1.38247299194 - (['"].*?adminPass['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.36443901062 - (['"].*?auth_password['"]s*:s*u?['"]).*?(['"]) 1.36225605011 - (['"].*?auth_token['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.35945916176 - (['"].*?admin_pass['"]s*:s*u?['"]).*?(['"]) 1.358741045 - (['"].*?admin_password['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*? (['"]) 1.35629177094 - (['"].*?admin_password['"]s*:s*u?['"]).*?(['"]) 1.35611796379 - (['"].*?new_pass['"]s*:s*u?['"]).*?(['"]) 1.35091495514 - (['"].*?auth_password['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*? (['"]) 1.34805893898 - (['"].*?new_pass['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.34525895119 - (['"].*?admin_pass['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.3426618576 - (['"].*?adminPass['"]s*:s*u?['"]).*?(['"]) 1.33739089966 - (['"].*?secret_uuid['"]s*:s*u?['"]).*?(['"]) 1.31182909012 - (['"].*?secret_uuid['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.30403113365 - (['"].*?auth_token['"]s*:s*u?['"]).*?(['"]) 1.29068493843 - (['"].*?password['"]s*:s*u?['"]).*?(['"]) 1.28651189804 - (['"].*?password['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 0.00123500823975 - (["']password["']s*:s*["']).*?(["']) 0.00123286247253 - (["']adminPass["']s*:s*["']).*?(["'])
  • 13. $ cat results 1.38247299194 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.36443901062 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) 1.36225605011 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.35945916176 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) 1.358741045 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.35629177094 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) 1.35611796379 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) 1.35091495514 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.34805893898 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.34525895119 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.3426618576 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) 1.33739089966 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) 1.31182909012 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) 1.30403113365 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) 1.29068493843 - (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) 1.28651189804 - (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
  • 14. $ cat results | sed -E 's/^.{16}/ /g' (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"])
  • 15. $ cat results | sed -E 's/^.{16}/ /g’ | sort (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"])
  • 16. $ cat results | sed -E 's/^.{16}/ /g’ | sort -u (['"].*?KEY['"]s*,s*'--?[A-z]+'s*,s*u?['"]).*?(['"]) (['"].*?KEY['"]s*:s*u?['"]).*?(['"])
  • 17. ([’"].*?KEY['"]s*:s*u?['"]).*?(['"]) “admin_pass”: “super secret” “admin_pass”: “***” ( start group [’”] quote .*? anything KEY literal key [’”] quote s*:s* colon, maybe with whitespace u? optional literal u [’”] quote ) end group .*? anything ([’"]) group with quote group 1: “admin_pass”: “ group 2: “
  • 18. ([’"].*?KEY['"]s*:s*u?['"]).*?(['"]) “admin_pass”: “super secret” “admin_pass”: “***” ( start group [’”] quote .*? anything KEY literal key [’”] quote s*:s* colon, maybe with whitespace u? optional literal u [’”] quote ) end group .*? anything ([’"]) group with quote group 1: “admin_pass”: “ group 2: “
  • 19. (”)(.*?)(”) “super secret” “ “s “su “sup “supe “super “super “super s “super se “super sec “super secr “super secret “super secret “super secret” (“)(.*)(”) “super secret” “super secret” “super secret “super secret”
  • 20. Change 1: Be Specific In Patterns
  • 22.
  • 23. Before: testing all patterns 22.2821819782 After: testing all patterns 0.0565450191498 22.28 / 0.0565 = 394.3
  • 24. def mask_password(message, secret="***"): """Replace password with 'secret' in message. :param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """ try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass # NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message substitute = r'g<1>' + secret + r'g<2>' for pattern in _SANITIZE_PATTERNS_2: message = re.sub(pattern, substitute, message) substitute = r'g<1>' + secret for pattern in _SANITIZE_PATTERNS_1: message = re.sub(pattern, substitute, message) return message
  • 25. 8 keys * 10 templates = 80 patterns
  • 26. #!/usr/bin/env python from __future__ import print_function import timeit infile = 'sample.json' with open(infile, 'r') as f: input_str = f.read() t = timeit.Timer( "re.sub(pattern, r'g<1>***g<2>', payload)", """ import re payload = '''%s''' pattern = re.compile(r'admin_pass') """ % input_str) print('regex : %10.10f' % t.timeit(5)) t = timeit.Timer( "'admin_pass' in payload", "payload = '''%s'''" % input_str, ) print('literal: %10.10f' % t.timeit(5))
  • 27. $ python literal_vs_regex.py regex : 0.0003869534 literal: 0.0001130104
  • 28. Change 2: Look at Fewer Patterns
  • 29. def mask_password(message, secret="***"): """Replace password with 'secret' in message. :param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """ try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass substitute1 = r'g<1>' + secret substitute2 = r'g<1>' + secret + r'g<2>' # NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. for key in _SANITIZE_KEYS: if key in message: for pattern in _SANITIZE_PATTERNS_2[key]: message = re.sub(pattern, substitute2, message) for pattern in _SANITIZE_PATTERNS_1[key]: message = re.sub(pattern, substitute1, message) return message
  • 30. # NOTE(ldbragst): Let's build a list of regex objects using the list of # _SANITIZE_KEYS we already have. This way, we only have to add the new key # to the list of _SANITIZE_KEYS and we can generate regular expressions # for XML and JSON automatically. _SANITIZE_PATTERNS_2 = {} _SANITIZE_PATTERNS_1 = {} # NOTE(dhellmann): Keep a separate list of patterns by key so we only # need to apply the substitutions for keys we find using a quick "in" # test. for key in _SANITIZE_KEYS: _SANITIZE_PATTERNS_1[key] = [] _SANITIZE_PATTERNS_2[key] = [] for pattern in _FORMAT_PATTERNS_2: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) _SANITIZE_PATTERNS_2[key].append(reg_ex) for pattern in _FORMAT_PATTERNS_1: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) _SANITIZE_PATTERNS_1[key].append(reg_ex)
  • 31. >>> import strutils >>> strutils._SANITIZE_PATTERNS_2 {'admin_pass': [<_sre.SRE_Pattern object at 0x7fbf4361d9b0>, <_sre.SRE_Pattern object at 0x7fbf43620660>, <_sre.SRE_Pattern object at 0x7fbf43621200>, <_sre.SRE_Pattern object at 0x7fbf43621ac0>, <_sre.SRE_Pattern object at 0x7fbf43627e70>, <_sre.SRE_Pattern object at 0x7fbf43629a30>, <_sre.SRE_Pattern object at 0x7fbf4362a3f0>, <_sre.SRE_Pattern object at 0x7fbf43629fc0>], 'auth_password': [<_sre.SRE_Pattern object at 0x7fbf436322d0>, <_sre.SRE_Pattern object at 0x7fbf436324a0>, <_sre.SRE_Pattern object at 0x7fbf43632640>, <_sre.SRE_Pattern object at 0x7fbf43632800>, <_sre.SRE_Pattern object at 0x7fbf43631d60>, <_sre.SRE_Pattern object at 0x7fbf43633200>, <_sre.SRE_Pattern object at 0x7fbf43633690>, <_sre.SRE_Pattern object at 0x7fbf43633e90>], …} >>> strutils._SANITIZE_PATTERNS_2.keys() ['admin_pass', 'auth_password', 'adminPass', 'auth_token', 'secret_uuid', 'admin_password', 'new_pass', ‘password'] >>> strutils._SANITIZE_PATTERNS_2[‘admin_pass’] [<_sre.SRE_Pattern object at 0x7fbf4361d9b0>, …]
  • 32. times = [] for pattern in strutils._SANITIZE_PATTERNS_2['admin_pass']: print('ntesting %s' % pattern.pattern) t = timeit.Timer( "re.sub(pattern, r'g<1>***g<2>', payload)", """ import re payload = '''%s''' pattern = re.compile(r'''%s''') """ % (input_str, pattern.pattern)) result = t.timeit(1) print(result) times.append((result, pattern.pattern)) print('nslowest to fastest:') times = reversed(sorted(times)) for t in times: print('%s - %s ' % t) print('ntesting all patterns') t = timeit.Timer( "strutils.mask_password('''" + input_str + "''')", "import strutils", ) print(t.timeit(1))
  • 33. $ python ./perf_test_mask_password.py payload has 50000 bytes testing (admin_passs*[=]s*["'])[^"']*(["']) 0.000396966934204 testing (admin_passs+["'])[^"']*(["']) 0.000360012054443 testing ([-]{2}admin_passs+)[^'^"^=^s]+([s]*) 0.00097393989563 testing (<admin_pass>)[^<]*(</admin_pass>) 0.00052809715271 testing (["']admin_pass["']s*:s*["'])[^"']*(["']) 0.00141596794128 testing (['"][^"']*admin_pass['"]s*:s*u?['"])[^"']*(['"]) 0.00194191932678 testing (['"][^'"]*admin_pass['"]s*,s*'--?[A-z]+'s*,s*u?['"]) [^"']*(['"]) 0.00175809860229 testing (admin_passs*--?[A-z]+s*)S+(s*) 0.000356912612915
  • 34. slowest to fastest: 0.00194191932678 - (['"][^"']*admin_pass['"]s*:s*u?['"])[^"']*(['"]) 0.00175809860229 - (['"][^'"]*admin_pass['"]s*,s*'--?[A-z]+'s*,s*u? ['"])[^"']*(['"]) 0.00141596794128 - (["']admin_pass["']s*:s*["'])[^"']*(["']) 0.00097393989563 - ([-]{2}admin_passs+)[^'^"^=^s]+([s]*) 0.00052809715271 - (<admin_pass>)[^<]*(</admin_pass>) 0.000396966934204 - (admin_passs*[=]s*["'])[^"']*(["']) 0.000360012054443 - (admin_passs+["'])[^"']*(["']) 0.000356912612915 - (admin_passs*--?[A-z]+s*)S+(s*) testing all patterns 0.00732088088989
  • 35. 22.28 / 0.0073 = 3052
  • 36. References • https://github.com/dhellmann/pyatl-regex-performance • https://review.openstack.org/#/c/145562/ • http://git.openstack.org/cgit/openstack/oslo.utils/tree/oslo_utils/ strutils.py • https://docs.python.org/2.7/library/re.html • https://docs.python.org/2/howto/regex.html#greedy-versus-non- greedy • https://docs.python.org/2.7/library/timeit.html • Mastering Regular Expressions, Jeffrey E.F. Friedl