diff --git a/README.md b/README.md index a413cf0..be31df2 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,7 @@ But it can easily replaced by any other which is better for handing your texts. * Version 2.1-SNAPSHOT * included checkstyle source code conventions * groupid changed to **com.github.wumpz**, due to maven central releasing + * allow configurable splitting of lines to define the blocks to compare (words, characters, phrases). * Version 2.0 * switch to maven and removed other artifacts * changed groupid to **com.github.java-diff-utils** due to different forks at github diff --git a/src/main/java/com/github/difflib/text/DiffRowGenerator.java b/src/main/java/com/github/difflib/text/DiffRowGenerator.java index ad13542..9df6848 100644 --- a/src/main/java/com/github/difflib/text/DiffRowGenerator.java +++ b/src/main/java/com/github/difflib/text/DiffRowGenerator.java @@ -47,16 +47,34 @@ import java.util.regex.Pattern; * */ public class DiffRowGenerator { - + public static final Pattern SPLIT_BY_WORD_PATTERN = Pattern.compile("\\s+|[,.\\[\\](){}/\\\\*+\\-#]"); + public static final BiPredicate IGNORE_WHITESPACE_EQUALIZER = (original, revised) -> original.trim().replaceAll("\\s+", " ").equals(revised.trim().replaceAll("\\s+", " ")); + public static final BiPredicate DEFAULT_EQUALIZER = Object::equals; - private static final Pattern SPLIT_PATTERN = Pattern.compile("\\s+|[,.\\[\\](){}/\\\\*+\\-#]"); + + /** + * Splitting lines by word to achieve word by word diff checking. + */ + public static final Function> SPLITTER_BY_WORD = line -> splitStringPreserveDelimiter(line, SPLIT_BY_WORD_PATTERN); + + /** + * Splitting lines by character to achieve char by char diff checking. + */ + public static final Function> SPLITTER_BY_CHARACTER = line -> { + List list = new ArrayList<>(line.length()); + for (Character character : line.toCharArray()) { + list.add(character.toString()); + } + return list; + }; + private final boolean showInlineDiffs; private final boolean ignoreWhiteSpaces; private final Function oldTag; private final Function newTag; - private final boolean inlineDiffByWord; + private final Function> inlineDiffSplitter; private final int columnWidth; private final BiPredicate equalizer; private final boolean mergeOriginalRevised; @@ -78,8 +96,8 @@ public class DiffRowGenerator { private int columnWidth = 0; private boolean mergeOriginalRevised = false; - private boolean inlineDiffByWord = false; private boolean reportLinesUnchanged = false; + private Function> inlineDiffSplitter = SPLITTER_BY_CHARACTER; private Builder() { } @@ -177,7 +195,13 @@ public class DiffRowGenerator { * deliver no in word changes. */ public Builder inlineDiffByWord(boolean inlineDiffByWord) { - this.inlineDiffByWord = inlineDiffByWord; + inlineDiffSplitter = inlineDiffByWord?SPLITTER_BY_WORD:SPLITTER_BY_CHARACTER; + return this; + } + + + public Builder inlineDiffBySplitter(Function> inlineDiffSplitter) { + this.inlineDiffSplitter = inlineDiffSplitter; return this; } } @@ -193,9 +217,11 @@ public class DiffRowGenerator { newTag = builder.newTag; columnWidth = builder.columnWidth; mergeOriginalRevised = builder.mergeOriginalRevised; - inlineDiffByWord = builder.inlineDiffByWord; + inlineDiffSplitter = builder.inlineDiffSplitter; equalizer = ignoreWhiteSpaces ? IGNORE_WHITESPACE_EQUALIZER : DEFAULT_EQUALIZER; reportLinesUnchanged = builder.reportLinesUnchanged; + + Objects.requireNonNull(inlineDiffSplitter); } /** @@ -318,19 +344,8 @@ public class DiffRowGenerator { String joinedOrig = String.join("\n", orig); String joinedRev = String.join("\n", rev); - if (inlineDiffByWord) { - origList = splitStringPreserveDelimiter(joinedOrig); - revList = splitStringPreserveDelimiter(joinedRev); - } else { - origList = new ArrayList<>(joinedOrig.length()); - revList = new ArrayList<>(joinedRev.length()); - for (Character character : joinedOrig.toCharArray()) { - origList.add(character.toString()); - } - for (Character character : joinedRev.toCharArray()) { - revList.add(character.toString()); - } - } + origList = inlineDiffSplitter.apply(joinedOrig); + revList = inlineDiffSplitter.apply(joinedRev); List> inlineDeltas = DiffUtils.diff(origList, revList).getDeltas(); @@ -404,7 +419,7 @@ public class DiffRowGenerator { sequence.add(endPosition, generator.apply(false)); } - protected final static List splitStringPreserveDelimiter(String str) { + protected final static List splitStringPreserveDelimiter(String str, Pattern SPLIT_PATTERN) { List list = new ArrayList<>(); if (str != null) { Matcher matcher = SPLIT_PATTERN.matcher(str); diff --git a/src/test/java/com/github/difflib/text/DiffRowGeneratorTest.java b/src/test/java/com/github/difflib/text/DiffRowGeneratorTest.java index 3326fe9..1974828 100644 --- a/src/test/java/com/github/difflib/text/DiffRowGeneratorTest.java +++ b/src/test/java/com/github/difflib/text/DiffRowGeneratorTest.java @@ -3,6 +3,7 @@ package com.github.difflib.text; import com.github.difflib.algorithm.DiffException; import java.util.Arrays; import java.util.List; +import java.util.regex.Pattern; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import org.junit.Test; @@ -182,14 +183,14 @@ public class DiffRowGeneratorTest { @Test public void testSplitString() { - List list = DiffRowGenerator.splitStringPreserveDelimiter("test,test2"); + List list = DiffRowGenerator.splitStringPreserveDelimiter("test,test2", DiffRowGenerator.SPLIT_BY_WORD_PATTERN); assertEquals(3, list.size()); assertEquals("[test, ,, test2]", list.toString()); } @Test public void testSplitString2() { - List list = DiffRowGenerator.splitStringPreserveDelimiter("test , test2"); + List list = DiffRowGenerator.splitStringPreserveDelimiter("test , test2", DiffRowGenerator.SPLIT_BY_WORD_PATTERN); System.out.println(list); assertEquals(5, list.size()); assertEquals("[test, , ,, , test2]", list.toString()); @@ -197,7 +198,7 @@ public class DiffRowGeneratorTest { @Test public void testSplitString3() { - List list = DiffRowGenerator.splitStringPreserveDelimiter("test,test2,"); + List list = DiffRowGenerator.splitStringPreserveDelimiter("test,test2,", DiffRowGenerator.SPLIT_BY_WORD_PATTERN); System.out.println(list); assertEquals(4, list.size()); assertEquals("[test, ,, test2, ,]", list.toString()); @@ -262,4 +263,23 @@ public class DiffRowGeneratorTest { assertEquals("[CHANGE, ,]", rows.get(1).toString()); assertEquals("[EQUAL,other,other]", rows.get(2).toString()); } + + @Test + public void testGeneratorIssue14() throws DiffException { + DiffRowGenerator generator = DiffRowGenerator.create() + .showInlineDiffs(true) + .mergeOriginalRevised(true) + .inlineDiffBySplitter(line -> DiffRowGenerator.splitStringPreserveDelimiter(line, Pattern.compile(","))) + .oldTag(f -> "~") + .newTag(f -> "**") + .build(); + List rows = generator.generateDiffRows( + Arrays.asList("J. G. Feldstein, Chair"), + Arrays.asList("T. P. Pastor, Chair")); + + System.out.println(rows.get(0).getOldLine()); + + assertEquals(1, rows.size()); + assertEquals("~J. G. Feldstein~**T. P. Pastor**, Chair", rows.get(0).getOldLine()); + } }