strip_markdown: Use a handbuilt stripper

It's not perfect, but it runs faster at about 35 us vs 650 us.
This commit is contained in:
Vishesh Handa
2020-02-17 15:51:37 +01:00
parent 17fd25cac9
commit 0d5793d097
2 changed files with 39 additions and 54 deletions

View File

@ -1,56 +1,29 @@
import 'dart:convert';
import 'dart:core';
import 'package:markdown/markdown.dart' as md;
/// Builds a plain text [String] from parsed Markdown.
class MarkdownBuilder implements md.NodeVisitor {
List<String> _texts = [];
String build(List<md.Node> nodes) {
_texts.clear();
for (md.Node node in nodes) {
node.accept(this);
}
var stringBuffer = StringBuffer();
_texts.forEach((String text) {
var t = text.trim();
if (t.isNotEmpty) {
t = t.replaceAll('\n', ' ');
t = t.trim();
stringBuffer.write(t);
stringBuffer.write(' ');
}
});
var str = stringBuffer.toString();
if (str.isNotEmpty) {
return str.substring(0, str.length - 1);
}
return str;
}
@override
void visitText(md.Text text) {
_texts.add(text.text);
}
@override
bool visitElementBefore(md.Element element) {
return true;
}
@override
void visitElementAfter(md.Element element) {
return;
}
}
String stripMarkdownFormatting(String markdown) {
final List<String> lines = markdown.replaceAll('\r\n', '\n').split('\n');
var doc = md.Document(encodeHtml: false);
var output = StringBuffer();
var regExp = RegExp('[a-zA-Z0-9]');
final MarkdownBuilder builder = MarkdownBuilder();
return builder.build(doc.parseLines(lines));
var lines = LineSplitter.split(markdown);
for (var line in lines) {
if (!line.contains(regExp)) {
continue;
}
line = line.trim();
if (line.startsWith('#')) {
line = line.replaceAll('#', '');
}
if (line.isEmpty) {
continue;
}
line = line.replaceFirst('[ ]', '');
line = line.replaceFirst('[x]', '');
line = line.replaceFirst('[X]', '');
output.write(line.trim());
output.write(' ');
}
return output.toString();
}

View File

@ -5,7 +5,7 @@ void main() {
group('Markdown Remove Formatting', () {
test('Test Headers', () {
var input = '# Hello\nHow are you?';
expect(stripMarkdownFormatting(input), 'Hello How are you?');
expect(stripMarkdownFormatting(input), 'Hello How are you? ');
});
test('Test Header2', () {
@ -15,7 +15,7 @@ void main() {
Hello
""";
expect(stripMarkdownFormatting(input), 'Test Header Hello');
expect(stripMarkdownFormatting(input), 'Test Header Hello ');
});
test('Itemized LIsts', () {
@ -27,7 +27,19 @@ look like:
""";
expect(stripMarkdownFormatting(input),
'Itemized lists look like: this one that one');
'Itemized lists look like: * this one * that one ');
});
test('Checklist', () {
var input = """Itemized lists
[ ] this one
[x] that one
[X] last
""";
expect(stripMarkdownFormatting(input),
'Itemized lists ☐ this one ☑ that one ☑ last ');
});
});
}