diff --git a/scripts/convert.py b/scripts/convert.py deleted file mode 100755 index 6262c9db..00000000 --- a/scripts/convert.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/python -''' -Converts a font from one format to another. The input and output formats are -inferred based on file names. This script is a thin wrapper around the fontforge -Python library, which it depends on. -''' - -import fontforge -import sys - -if __name__ == '__main__': - assert len(sys.argv) == 3, 'Usage: ./convert.py ' - font = fontforge.open(sys.argv[1]) - font.generate(sys.argv[2]) diff --git a/scripts/main.py b/scripts/main.py deleted file mode 100755 index e10c3fa8..00000000 --- a/scripts/main.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/python -''' -Extracts one or more characters from each of the svg fonts in the SVG directory -and prints data for them to stderr in JSON format. The output data is a list of -dictionaries with the following keys: - - name: string glyph name - - d: string SVG path data - - extractor: stroke data + diagnostics (see stroke_extractor for details) -''' -import argparse -import json -import sys - -import stroke_extractor - - -def get_html_attribute(glyph, attribute): - ''' - Takes an HTML SVG object and returns the path data from the "d" field. - ''' - left = ' {0}="'.format(attribute) - start = max(glyph.find(left), glyph.find(left.replace(' ', '\n'))) - end = glyph.find('"', start + len(left)) - assert start >= 0 and end >= 0, \ - 'Glyph missing {0}=".*" block:\n{1}'.format(attribute, repr(glyph)) - return glyph[start + len(left):end].replace('\n', ' ') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('-f', '--font', dest='font', - help='SVG font to read characters from.', required=True) - parser.add_argument('-m', '--manual', dest='manual', - help='Manual corrections to the algorithm.') - (options, args) = parser.parse_known_args() - if options.manual is not None: - assert len(args) == 1, 'Manual corrections can only apply to one glyph!' - options.manual = json.loads(options.manual) - # For each glyph name among the positional arguments, extract the glyph with - # that name from the SVG font. - glyphs = [] - with open(options.font) as font: - data = font.read() - for glyph_name in args: - index = data.find('glyph-name="{0}"'.format(glyph_name)) - if index < 0: - print >> sys.stderr, '{0}: missing {1}'.format(options.font, glyph_name) - continue - (left, right) = ('') - (start, end) = (data.rfind(left, 0, index), data.find(right, index)) - if start < 0 or end < 0: - print >> sys.stderr, '{0}: malformed {1}'.format(options.font, glyph_name) - continue - glyphs.append((glyph_name, data[start:end + len(right)])) - # Print data for each of the extracted glyphs in JSON format. - result = [] - for (glyph_name, glyph) in glyphs: - d = get_html_attribute(glyph, 'd') - assert d, 'Missing glyph-name or d for glyph:\n{0}'.format(glyph) - extractor = stroke_extractor.StrokeExtractor(glyph_name, d, options.manual) - data = {'name': glyph_name, 'd': d, 'extractor': extractor.get_data()} - result.append(data) - print json.dumps(result) diff --git a/scripts/stroke_extractor.py b/scripts/stroke_extractor.py deleted file mode 100644 index 509f75d6..00000000 --- a/scripts/stroke_extractor.py +++ /dev/null @@ -1,400 +0,0 @@ -''' -Given an svg.path.Path object representing a glyph, a StrokeExtractor instance -will break it down into a list of svg.path.Path objects, one for each stroke. - -The algorithm we currently use is a 'corner-and-bridge' algorithm. First, we -detect possible corners in the path object. 'Corners' are points where the -derivative of the curve angle is sharply negative - that is, points at which -the curve is very non-convex. If two strokes cross eachother, we should detect -four corners, one at each place at the outline of the intersection. - -(Note that much more complex configurations are possible - for example a stroke -may end at the middle of another stroke, or many strokes may intersect to form -a star shape.) - -We then detect 'bridges', which are edges between corners where the stroke -entering one corner may continue to the stroke exiting the other corner. In our -two-strokes-crossing example, we should detect four bridges connecting the four -corners to form a simple quadrilateral. - -Finally, we traverse the path, usually following SVG path elements, but taking -bridges when they are inline with the previously traversed path element. The -output of this traversal is our final stroke decomposition. - -At many points during this algorithm we may detect various anomalies. We log -these anomalies so that they can be reviewed manually. -''' -import collections -import copy -import math -import svg.path - - -MAX_BRIDGE_DISTANCE = 128 -MAX_BRIDGE_SPLIT_DISTANCE = 16 -MIN_CORNER_ANGLE = 0.1*math.pi -MIN_CORNER_TANGENT_DISTANCE = 4 - -# Some glyphs in the font have strokes that incorrectly curve clockwise. -# To handle these glyphs, we store a list of glyph names and stroke indices that -# should be reversed during the call to split_and_orient_path. -PATH_ORDER_MISTAKES = { - 'U9BFE': [4, 5, 6], 'U9BD2': [0, 1, 2], 'U9BB7': [0, 1, 2], - 'U9BA7': [0, 1, 2], 'U97CA': [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], - 'U9793': [4, 5, 6], 'U9767': [4, 5, 6, 7, 8]} - - -def area(path): - ''' - Returns the area of the path. The result is positive iff the path winds in - the counter-clockwise direction. - ''' - def area_under_curve(x): - return (x.start.real - x.end.real)*(x.start.imag + x.end.imag) - return int(sum(map(area_under_curve, path))/2) - - -def split_and_orient_path(name, path): - ''' - Takes a non-empty svg.path.Path object that may contain multiple closed loops. - Returns a list of svg.path.Path objects that are all minimal closed curve. - The returned paths will be the way a TTF glyph should be: exterior curves - will be counter-clockwise and interior curves will be clockwise. - ''' - paths = [[path[0]]] - for element in path[1:]: - if element.start == element.end: - continue - if element.start != paths[-1][-1].end: - paths.append([]) - paths[-1].append(element) - # Determine if this glyph is oriented in the wrong direction by computing the - # area of each glyph. The glyph with maximum |area| should have positive area, - # because it must be an exterior path. - def reverse(path): - for element in path: - (element.start, element.end) = (element.end, element.start) - return reversed(path) - areas = [area(path) for path in paths] - max_area = max((abs(area), area) for area in areas)[1] - if max_area < 0: - paths = map(reverse, paths) - for i in PATH_ORDER_MISTAKES.get(name, []): - paths[i] = reverse(list(paths[i])) - return [svg.path.Path(*path) for path in paths] - - -class Corner(object): - def __init__(self, paths, index): - self.index = index - (i, j) = index - self.path = paths[i] - self.point = paths[i][j].start - (self.tangent1, self.tangent2) = self._get_tangents() - self.angle = self._get_angle(self.tangent1, self.tangent2) - - def bridge(self, other): - ''' - Returns true if a stroke continues from this corner point to the other. - Internally, this function builds a 7-dimensional feature vector and then - calls a classifier. The 7 features are: - features[0]: The angle between the edge in and the bridge - features[1]: The angle between the bridge and the edge out - features[2]: The angle between the cross stroke out and the bridge - features[3]: The angle between the cross stroke in and the bridge - features[4]: The angle at this corner - features[5]: The angle at the other corner - features[6]: The length of the bridge - - At an ideal bridge, features[0] and features[1] should be very close to 0, - meaning that the stroke can continue smoothly from this corner to the other. - features[2] + features[3] is close to pi, meaning that the stroke in - is straight, and features[6], the distance, is small. - - This ideal configuration might look like this diagram: - - / ^ - / / - <-O S-- - - where S is this corner and O is the other and the arrows indicate the - direction of the curve. - ''' - diff = other.point - self.point - length = abs(diff) - if length == 0 or length > MAX_BRIDGE_DISTANCE: - return False - # NOTE: These angle features make sense even if points are on different - # subpaths of the glyph path! Because of our preprocessing, exterior glyph - # paths are clockwise while interior paths are counter-clockwise, so angle - # features around a bridge are the same whether or not the two sides of - # the bridge are on the same path. - features = ( - self._get_angle(self.tangent1, diff), - self._get_angle(diff, other.tangent2), - self._get_angle(diff, self.tangent2), - self._get_angle(other.tangent1, diff), - self.angle, - other.angle, - length, - ) - # TODO(skishore): Log this sample and use it to train the classifier. - result = self._run_classifier(features) - return result - - def _get_angle(self, vector1, vector2): - ratio = vector2/vector1 if vector1 else 0 - return math.atan2(ratio.imag, ratio.real) - - def _get_tangents(self): - segment1 = self.path[self.index[1] - 1] - tangent1 = segment1.end - segment1.start - if (type(segment1) == svg.path.QuadraticBezier and - abs(segment1.end - segment1.control) > MIN_CORNER_TANGENT_DISTANCE): - tangent1 = segment1.end - segment1.control - segment2 = self.path[self.index[1]] - tangent2 = segment2.end - segment2.start - if (type(segment2) == svg.path.QuadraticBezier and - abs(segment2.control - segment2.start) > MIN_CORNER_TANGENT_DISTANCE): - tangent2 = segment2.control - segment2.start - return (tangent1, tangent2) - - def _run_classifier(self, features): - # TODO(skishore): Replace these inequalities with a trained classifier. - alignment = abs(features[0]) + abs(features[1]) - incidence = abs(features[2] + features[3] + math.pi) - short = features[6] < MAX_BRIDGE_DISTANCE/2 - clean = alignment < 0.1*math.pi or alignment + incidence < 0.2*math.pi - cross = all([ - features[0] > 0, - features[1] > 0, - features[2] + features[3] < -0.5*math.pi, - ]) - result = 0 - if features[2] < 0 and features[3] < 0 and (clean or (short and cross)): - result = (1 if short else 0.75) if clean else 0.5 - return result - - -class StrokeExtractor(object): - def __init__(self, name, d, manual=None): - self.name = name - self.messages = [] - self.paths = split_and_orient_path(name, svg.path.parse_path(d)) - self.corners = self.get_corners() - self.bridges = self.get_bridges() - if manual: - self._default_corners = copy.deepcopy(self.corners) - self._default_bridges = copy.deepcopy(self.bridges) - self.apply_manual_corrections(manual) - else: - self._default_corners = self.corners - self._default_bridges = self.bridges - (self.strokes, self.stroke_adjacency) = self.extract_strokes() - - def apply_manual_corrections(self, manual): - indices = {} - for (i, path) in enumerate(self.paths): - for (j, element) in enumerate(path): - index = (i, j) - indices[element.start] = index - if index in self.corners: - assert element.start == self.corners[index].point - - def get_index(pair): - result = indices[pair[0] + pair[1]*1j] - if result not in self.corners: - self.corners[result] = Corner(self.paths, result) - return result - - for bridge in manual.get('bridges_added', []): - (index1, index2) = map(get_index, bridge) - self.bridges[index1].add(index2) - self.bridges[index2].add(index1) - for bridge in manual.get('bridges_removed', []): - (index1, index2) = map(get_index, bridge) - self.bridges[index1].remove(index2) - self.bridges[index2].remove(index1) - for (index, others) in self.bridges.items(): - if not others: - del self.bridges[index] - - def extract_stroke(self, extracted, start): - ''' - Given a path, a list of corners, and an adjacency list representation of - bridges between then, extract a stroke that starts at the given index - and add the indices of all elements on that stroke to extracted. - - This method will return a pair (path, corners), where the first element is - an svg.path.Path object representing the stroke and the second is a list of - corners that appear on that stroke. The corners list will have duplicates if - the stroke loops back on itself, which indicates a mistake somewhere. - - This method will fail if, when following edges the the initial path element, - we cross a bridge and enter a stroke that has already been extracted. If so, - the path we return will be None. - - NOTE: We deliberately avoid using bridge directionality in this algorithm - so that we can handle manually added bridges. - ''' - current = start - corners = [] - path = svg.path.Path() - visited = set() - - def advance(index): - return (index[0], (index[1] + 1) % len(self.paths[index[0]])) - - def angle(index, bridge): - tangent = self.corners[index].tangent1 - ratio = (self.corners[bridge].point - self.corners[index].point)/tangent - return math.atan2(ratio.imag, ratio.real) - - while True: - # Add the current stroke element to the path and advance along it. - path.append(self.paths[current[0]][current[1]]) - visited.add(current) - current = advance(current) - # If there is a bridge aligned with the stroke element that we advanced - # over, advance over that bridge as well. If there are multiple bridges, - # choose the one that is most aligned. - if current in self.bridges: - next = sorted(self.bridges[current], key=lambda x: angle(current, x))[0] - corners.extend([self.corners[current], self.corners[next]]) - path.append(svg.path.Line( - start=self.corners[current].point, end=self.corners[next].point)) - current = next - # Check if we either closed the loop or hit an already extracted stroke. - if current == start: - extracted.update(visited) - return (path, corners) - elif current in visited or current in extracted: - return (None, []) - - def extract_strokes(self): - ''' - Returns a pair (strokes, stroke_adjacency), where the first element is a - list of svg.path.Path objects that decompose this glyph into strokes and the - second is an adjacency-list representation of the indices of strokes which - share corner points. - - This method will log if some path elements do not appear on any stroke. - ''' - extracted = set() - strokes = [] - stroke_adjacency = collections.defaultdict(set) - corner_adjacency = collections.defaultdict(set) - for i, path in enumerate(self.paths): - for j, element in enumerate(path): - index = (i, j) - if index not in extracted: - (stroke, corners) = self.extract_stroke(extracted, index) - if stroke is None: - self.log('Stroke extraction missed some path elements!') - continue - stroke_index = len(strokes) - strokes.append(stroke) - corner_indices = set(corner.index for corner in corners) - if len(corner_indices) < len(corners): - self.log('Stroke {0} is self-intersecting!'.format(stroke_index)) - for corner_index in corner_indices: - for other_index in corner_adjacency[corner_index]: - stroke_adjacency[other_index].add(stroke_index) - stroke_adjacency[stroke_index].add(other_index) - corner_adjacency[corner_index].add(stroke_index) - return (strokes, stroke_adjacency) - - def get_bridges(self): - ''' - Returns an adjacency list of bridges. A bridge is a pair of corner indices - through which a stroke continues. The adjacency list is undirected: for any - two corner indices a and b, if b in result[a], a in result[b]. - ''' - # Collect bridge candidates scored by our bridge classifier. - candidates = [] - for corner in self.corners.itervalues(): - for other in self.corners.itervalues(): - confidence = corner.bridge(other) - if confidence > 0: - candidates.append((confidence, corner.index, other.index)) - candidates.sort(reverse=True) - # Add bridges to the set of bridges in order of decreasing confidence. - # However, we do NOT add bridges that would either a) form a triangle with - # an existing bridge, or b) that are long and should be multiple bridges. - bridges = set() - for (confidence, index1, index2) in candidates: - other1 = set(b for (a, b) in bridges if a == index1) - other2 = set(b for (a, b) in bridges if a == index2) - if (other1.intersection(other2) or - self.should_split_bridge((index1, index2))): - continue - bridges.add((index1, index2)) - bridges.add((index2, index1)) - # Convert the result to an adjacency list. Having more than two bridges at - # any given corner results in a warning. - result = collections.defaultdict(set) - for (index1, index2) in bridges: - result[index1].add(index2) - if len(result[index1]) == 3: - self.log('More than two bridges at corner {0}'.format( - self.corners[index1].point)) - return result - - def get_corners(self): - ''' - Returns a dict mapping indices to corners at that index. Each corner is a - point on the curve where the path makes a sharp negative angle. Since the - path has a small positive average angle, it is non-convex at these corners. - ''' - result = {} - for i, path in enumerate(self.paths): - candidates = [Corner(self.paths, (i, j)) for j in xrange(len(path))] - for corner in filter(lambda x: x.angle < -MIN_CORNER_ANGLE, candidates): - result[corner.index] = corner - return result - - def get_data(self): - ''' - Returns a representation of the data extracted from this glyph that can be - serialized to JSON. The result is a dictionary with the following keys: - - points: list of [x, y] pairs of endpoints on the glyph's SVG path - - corners: list of [x, y] pairs of points that are also corners - - bridges: list of pairs of corners [[x1, y1], [x2, y2]] that are bridges - - strokes: list of SVG path data strings for the extracted strokes - ''' - pair = lambda point: [int(point.real), int(point.imag)] - return { - 'points': [pair(element.end) for path in self.paths for element in path], - 'corners': [ - pair(corner.point) - for corner in self._default_corners.itervalues() - ], - 'bridges': [ - [pair(self.corners[index1].point), pair(self.corners[index2].point)] - for (index1, others) in self._default_bridges.iteritems() - for index2 in others if index1 < index2 - ], - 'strokes': [stroke.d() for stroke in self.strokes], - } - - def log(self, message): - self.messages.append(message) - - def should_split_bridge(self, bridge): - ''' - Returns true if there is some corner that is too close to the middle of the - given bridge. When this occurs, the gap between these indices should usually - be spanned by multiple bridges instead. - ''' - (index1, index2) = bridge - base = self.corners[index1].point - diff = self.corners[index2].point - base - for corner in self.corners.itervalues(): - if corner.index in bridge: - continue - t = ((corner.point.real - base.real)*diff.real + - (corner.point.imag - base.imag)*diff.imag)/(abs(diff)**2) - distance_to_line = abs(self.corners[index1].point + t*diff - corner.point) - if 0 < t < 1 and distance_to_line < MAX_BRIDGE_SPLIT_DISTANCE: - return True - return False