Use the tokenizer for great success

This commit is contained in:
Anthony Sottile
2015-02-10 08:14:53 -08:00
parent 5207d1f29a
commit 2983d4478f
2 changed files with 63 additions and 67 deletions

View File

@ -3,34 +3,60 @@ from __future__ import print_function
from __future__ import unicode_literals
import argparse
import re
import io
import tokenize
double_quote_starts = tuple(s for s in tokenize.single_quoted if '"' in s)
compiled_tokenize_string = re.compile('(?<!")' + tokenize.String + '(?!")')
def handle_match(m):
string = m.group(0)
def handle_match(token_text):
if '"""' in token_text or "'''" in token_text:
return token_text
for double_quote_start in double_quote_starts:
if string.startswith(double_quote_start):
meat = string[len(double_quote_start):-1]
if token_text.startswith(double_quote_start):
meat = token_text[len(double_quote_start):-1]
if '"' in meat or "'" in meat:
break
return double_quote_start.replace('"', "'") + meat + "'"
return string
return token_text
def get_line_offsets_by_line_no(src):
# Padded so we can index with line number
offsets = [None, 0]
for line in src.splitlines():
offsets.append(offsets[-1] + len(line) + 1)
return offsets
def fix_strings(filename):
contents = open(filename).read()
new_contents = compiled_tokenize_string.sub(handle_match, contents)
retval = int(new_contents != contents)
if retval:
with open(filename, 'w') as write_handle:
contents = io.open(filename).read()
line_offsets = get_line_offsets_by_line_no(contents)
# Basically a mutable string
splitcontents = list(contents)
# Iterate in reverse so the offsets are always correct
tokens = reversed(list(tokenize.generate_tokens(
io.StringIO(contents).readline,
)))
for token_type, token_text, (srow, scol), (erow, ecol), _ in tokens:
if token_type == tokenize.STRING:
new_text = handle_match(token_text)
splitcontents[
line_offsets[srow] + scol:
line_offsets[erow] + ecol
] = new_text
new_contents = ''.join(splitcontents)
if contents != new_contents:
with io.open(filename, 'w') as write_handle:
write_handle.write(new_contents)
return retval
return 1
else:
return 0
def main(argv=None):