Improve DictionaryNameFactory performance

This commit is contained in:
Jelle De Coninck
2024-09-13 10:45:13 +02:00
parent c2146ae315
commit 03d7effdd2
6 changed files with 383 additions and 494 deletions

View File

@@ -20,267 +20,229 @@
*/
package proguard.obfuscate;
import java.io.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Reader;
import java.net.URL;
import java.util.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
/**
* This <code>NameFactory</code> generates names that are read from a
* specified input file.
* Comments (everything starting with '#' on a single line) are ignored.
* This <code>NameFactory</code> generates names that are read from a specified input file. Comments
* (everything starting with '#' on a single line) are ignored.
*
* @author Eric Lafortune
*/
public class DictionaryNameFactory implements NameFactory
{
private static final char COMMENT_CHARACTER = '#';
public class DictionaryNameFactory implements NameFactory {
private static final char COMMENT_CHARACTER = '#';
private final List<String> names;
private Set<String> nameSet;
private final NameFactory nameFactory;
private int index = 0;
private final List names;
private final NameFactory nameFactory;
/**
* Creates a new <code>DictionaryNameFactory</code>.
*
* @param url The URL from which the names can be read.
* @param nameFactory The name factory from which names will be retrieved if the list of read
* names has been exhausted.
*/
public DictionaryNameFactory(URL url, NameFactory nameFactory) throws IOException {
this(url, true, nameFactory);
}
private int index = 0;
/**
* Creates a new <code>DictionaryNameFactory</code>.
*
* @param url The URL from which the names can be read.
* @param validJavaIdentifiers Specifies whether the produced names should be valid Java
* identifiers.
* @param nameFactory The name factory from which names will be retrieved if the list of read
* names has been exhausted.
*/
public DictionaryNameFactory(URL url, boolean validJavaIdentifiers, NameFactory nameFactory)
throws IOException {
this(
new BufferedReader(new InputStreamReader(url.openStream(), StandardCharsets.UTF_8)),
validJavaIdentifiers,
nameFactory);
}
/**
* Creates a new <code>DictionaryNameFactory</code>.
*
* @param file The file from which the names can be read.
* @param nameFactory The name factory from which names will be retrieved if the list of read
* names has been exhausted.
*/
public DictionaryNameFactory(File file, NameFactory nameFactory) throws IOException {
this(file, true, nameFactory);
}
/**
* Creates a new <code>DictionaryNameFactory</code>.
* @param url the URL from which the names can be read.
* @param nameFactory the name factory from which names will be retrieved
* if the list of read names has been exhausted.
*/
public DictionaryNameFactory(URL url,
NameFactory nameFactory) throws IOException
{
this(url, true, nameFactory);
}
/**
* Creates a new <code>DictionaryNameFactory</code>.
*
* @param file The file from which the names can be read.
* @param validJavaIdentifiers Specifies whether the produced names should be valid Java
* identifiers.
* @param nameFactory The name factory from which names will be retrieved if the list of read
* names has been exhausted.
*/
public DictionaryNameFactory(File file, boolean validJavaIdentifiers, NameFactory nameFactory)
throws IOException {
this(
new BufferedReader(
new InputStreamReader(Files.newInputStream(file.toPath()), StandardCharsets.UTF_8)),
validJavaIdentifiers,
nameFactory);
}
/**
* Creates a new <code>DictionaryNameFactory</code>.
*
* @param reader The reader from which the names can be read. The reader is closed at the end.
* @param nameFactory The name factory from which names will be retrieved if the list of read
* names has been exhausted.
*/
public DictionaryNameFactory(Reader reader, NameFactory nameFactory) throws IOException {
this(reader, true, nameFactory);
}
/**
* Creates a new <code>DictionaryNameFactory</code>.
* @param url the URL from which the names can be read.
* @param validJavaIdentifiers specifies whether the produced names should
* be valid Java identifiers.
* @param nameFactory the name factory from which names will be
* retrieved if the list of read names has been
* exhausted.
*/
public DictionaryNameFactory(URL url,
boolean validJavaIdentifiers,
NameFactory nameFactory) throws IOException
{
this (new BufferedReader(
new InputStreamReader(
url.openStream(), "UTF-8")),
validJavaIdentifiers,
nameFactory);
}
/**
* Creates a new <code>DictionaryNameFactory</code>.
*
* @param reader The reader from which the names can be read. The reader is closed at the end.
* @param validJavaIdentifiers Specifies whether the produced names should be valid Java
* identifiers.
* @param nameFactory The name factory from which names will be retrieved if the list of read
* names has been exhausted.
*/
public DictionaryNameFactory(Reader reader, boolean validJavaIdentifiers, NameFactory nameFactory)
throws IOException {
this.nameSet = readDictionary(reader, validJavaIdentifiers);
this.nameFactory = nameFactory;
this.names = new ArrayList<>(this.nameSet);
}
private static Set<String> readDictionary(Reader reader, boolean validJavaIdentifiers)
throws IOException {
try {
Set<String> names = new LinkedHashSet<>();
StringBuilder builder = new StringBuilder();
/**
* Creates a new <code>DictionaryNameFactory</code>.
* @param file the file from which the names can be read.
* @param nameFactory the name factory from which names will be retrieved
* if the list of read names has been exhausted.
*/
public DictionaryNameFactory(File file,
NameFactory nameFactory) throws IOException
{
this(file, true, nameFactory);
}
while (true) {
// Read the next character.
int c = reader.read();
// Is it a valid identifier character?
if (c != -1
&& (validJavaIdentifiers
? (builder.length() == 0
? Character.isJavaIdentifierStart((char) c)
: Character.isJavaIdentifierPart((char) c))
: (c != '\n' && c != '\r' && c != COMMENT_CHARACTER))) {
// Append it to the current identifier.
builder.append((char) c);
} else {
// Did we collect a new identifier?
if (builder.length() > 0) {
// Add the completed name to the list of names, if it's
// not in it yet.
String name = builder.toString();
names.add(name);
/**
* Creates a new <code>DictionaryNameFactory</code>.
* @param file the file from which the names can be read.
* @param validJavaIdentifiers specifies whether the produced names should
* be valid Java identifiers.
* @param nameFactory the name factory from which names will be
* retrieved if the list of read names has been
* exhausted.
*/
public DictionaryNameFactory(File file,
boolean validJavaIdentifiers,
NameFactory nameFactory) throws IOException
{
this (new BufferedReader(
new InputStreamReader(
new FileInputStream(file), "UTF-8")),
validJavaIdentifiers,
nameFactory);
}
// Clear the builder.
builder.setLength(0);
}
// Is this the beginning of a comment line?
if (c == COMMENT_CHARACTER) {
// Skip all characters till the end of the line.
do {
c = reader.read();
} while (c != -1 && c != '\n' && c != '\r');
}
/**
* Creates a new <code>DictionaryNameFactory</code>.
* @param reader the reader from which the names can be read. The
* reader is closed at the end.
* @param nameFactory the name factory from which names will be retrieved
* if the list of read names has been exhausted.
*/
public DictionaryNameFactory(Reader reader,
NameFactory nameFactory) throws IOException
{
this(reader, true, nameFactory);
}
/**
* Creates a new <code>DictionaryNameFactory</code>.
* @param reader the reader from which the names can be read.
* The reader is closed at the end.
* @param validJavaIdentifiers specifies whether the produced names should
* be valid Java identifiers.
* @param nameFactory the name factory from which names will be
* retrieved if the list of read names has been
* exhausted.
*/
public DictionaryNameFactory(Reader reader,
boolean validJavaIdentifiers,
NameFactory nameFactory) throws IOException
{
this.names = new ArrayList();
this.nameFactory = nameFactory;
try
{
StringBuffer buffer = new StringBuffer();
while (true)
{
// Read the next character.
int c = reader.read();
// Is it a valid identifier character?
if (c != -1 &&
(validJavaIdentifiers ?
(buffer.length() == 0 ?
Character.isJavaIdentifierStart((char)c) :
Character.isJavaIdentifierPart((char)c)) :
(c != '\n' &&
c != '\r' &&
c != COMMENT_CHARACTER)))
{
// Append it to the current identifier.
buffer.append((char)c);
}
else
{
// Did we collect a new identifier?
if (buffer.length() > 0)
{
// Add the completed name to the list of names, if it's
// not in it yet.
String name = buffer.toString();
if (!names.contains(name))
{
names.add(name);
}
// Clear the buffer.
buffer.setLength(0);
}
// Is this the beginning of a comment line?
if (c == COMMENT_CHARACTER)
{
// Skip all characters till the end of the line.
do
{
c = reader.read();
}
while (c != -1 &&
c != '\n' &&
c != '\r');
}
// Is this the end of the file?
if (c == -1)
{
// Just return.
return;
}
}
}
}
finally
{
reader.close();
// Is this the end of the file?
if (c == -1) {
// Just return.
return names;
}
}
}
} finally {
reader.close();
}
}
/**
* Creates a new <code>DictionaryNameFactory</code>.
*
* @param dictionaryNameFactory The dictionary name factory whose dictionary will be used.
* @param nameFactory The name factory from which names will be retrieved if the list of read
* names has been exhausted.
*/
public DictionaryNameFactory(
DictionaryNameFactory dictionaryNameFactory, NameFactory nameFactory) {
this.names = dictionaryNameFactory.names;
this.nameFactory = nameFactory;
}
// Implementations for NameFactory.
public void reset() {
index = 0;
nameFactory.reset();
}
public String nextName() {
String name;
// Do we still have names?
if (index < names.size()) {
// Return the next name.
name = names.get(index++);
} else {
if (nameSet == null) {
nameSet = new HashSet<>(names);
}
// Return the next different name from the other name factory.
do {
name = nameFactory.nextName();
} while (nameSet.contains(name));
}
return name;
}
/**
* Creates a new <code>DictionaryNameFactory</code>.
* @param dictionaryNameFactory the dictionary name factory whose dictionary
* will be used.
* @param nameFactory the name factory from which names will be
* retrieved if the list of read names has been
* exhausted.
*/
public DictionaryNameFactory(DictionaryNameFactory dictionaryNameFactory,
NameFactory nameFactory)
{
this.names = dictionaryNameFactory.names;
this.nameFactory = nameFactory;
}
// Implementations for NameFactory.
public void reset()
{
index = 0;
nameFactory.reset();
}
public String nextName()
{
String name;
// Do we still have names?
if (index < names.size())
{
// Return the next name.
name = (String)names.get(index++);
}
else
{
// Return the next different name from the other name factory.
do
{
name = nameFactory.nextName();
}
while (names.contains(name));
}
return name;
}
public static void main(String[] args)
{
try
{
DictionaryNameFactory factory =
new DictionaryNameFactory(new File(args[0]), new SimpleNameFactory());
// For debugging, we're always using UTF-8 instead of the default
// character encoding, even for writing to the standard output.
PrintWriter out =
new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"));
for (int counter = 0; counter < 50; counter++)
{
out.println("[" + factory.nextName() + "]");
}
out.flush();
}
catch (IOException ex)
{
ex.printStackTrace();
}
public static void main(String[] args) {
try {
DictionaryNameFactory factory =
new DictionaryNameFactory(new File(args[0]), new SimpleNameFactory());
// For debugging, we're always using UTF-8 instead of the default
// character encoding, even for writing to the standard output.
PrintWriter out = new PrintWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8));
for (int counter = 0; counter < 50; counter++) {
out.println("[" + factory.nextName() + "]");
}
out.flush();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}

View File

@@ -21,14 +21,13 @@
package proguard.obfuscate;
/**
* This interfaces provides methods to generate unique sequences of names.
* The names must be valid Java identifiers.
* This interfaces provides methods to generate unique sequences of names. The names must be valid
* Java identifiers.
*
* @author Eric Lafortune
*/
public interface NameFactory
{
public void reset();
public interface NameFactory {
void reset();
public String nextName();
String nextName();
}

View File

@@ -20,41 +20,33 @@
*/
package proguard.obfuscate;
/**
* NameFactory that prepends the names of the wrapped NameFactory with
* a fixed prefix.
* NameFactory that prepends the names of the wrapped NameFactory with a fixed prefix.
*
* @author Johan Leys
*/
public class PrefixingNameFactory implements NameFactory
{
private final NameFactory delegateNameFactory;
private final String prefix;
public class PrefixingNameFactory implements NameFactory {
private final NameFactory delegateNameFactory;
private final String prefix;
/**
* Creates a new PrefixingNameFactory.
*
* @param delegateNameFactory The wrapped NameFactory.
* @param prefix The prefix to add to all generated names.
*/
public PrefixingNameFactory(NameFactory delegateNameFactory, String prefix) {
this.delegateNameFactory = delegateNameFactory;
this.prefix = prefix;
}
/**
* Creates a new PrefixingNameFactory.
* @param delegateNameFactory the wrapped NameFactory.
* @param prefix the prefix to add to all generated names.
*/
public PrefixingNameFactory(NameFactory delegateNameFactory,
String prefix)
{
this.delegateNameFactory = delegateNameFactory;
this.prefix = prefix;
}
// Implementations for NameFactory.
public String nextName() {
return prefix + delegateNameFactory.nextName();
}
// Implementations for NameFactory.
public String nextName()
{
return prefix + delegateNameFactory.nextName();
}
public void reset()
{
delegateNameFactory.reset();
}
public void reset() {
delegateNameFactory.reset();
}
}

View File

@@ -23,126 +23,94 @@ package proguard.obfuscate;
import java.util.Arrays;
/**
* This <code>NameFactory</code> generates unique short names, using mixed-case
* characters or lower-case characters only.
* This <code>NameFactory</code> generates unique short names, using mixed-case characters or
* lower-case characters only.
*
* @author Eric Lafortune
*/
public class SimpleNameFactory implements NameFactory
{
private static final int CHARACTER_COUNT = 26;
public class SimpleNameFactory implements NameFactory {
private static final int CHARACTER_COUNT = 26;
/**
+ * Array of windows reserved names.
+ * This array does not include COM{digit} or LPT{digit} as {@link SimpleNameFactory} does not generate digits.
+ * This array must be sorted in ascending order as we're using {@link Arrays#binarySearch(Object[], Object)} on it.
+ */
private static final String[] reservedNames = new String[] {"AUX", "CON", "NUL", "PRN"};
/**
* + * Array of windows reserved names. + * This array does not include COM{digit} or LPT{digit}
* as {@link SimpleNameFactory} does not generate digits. + * This array must be sorted in
* ascending order as we're using {@link Arrays#binarySearch(Object[], Object)} on it. +
*/
private static final String[] reservedNames = new String[] {"AUX", "CON", "NUL", "PRN"};
private final boolean generateMixedCaseNames;
private int index = 0;
private final boolean generateMixedCaseNames;
private int index = 0;
/**
* Creates a new <code>SimpleNameFactory</code> that generates mixed-case names.
*/
public SimpleNameFactory()
{
this(true);
/** Creates a new <code>SimpleNameFactory</code> that generates mixed-case names. */
public SimpleNameFactory() {
this(true);
}
/**
* Creates a new <code>SimpleNameFactory</code>.
*
* @param generateMixedCaseNames A flag to indicate whether the generated names will be
* mixed-case, or lower-case only.
*/
public SimpleNameFactory(boolean generateMixedCaseNames) {
this.generateMixedCaseNames = generateMixedCaseNames;
}
// Implementations for NameFactory.
public void reset() {
index = 0;
}
public String nextName() {
return name(index++);
}
/** Returns the name at the given index. */
private String name(int index) {
// Create a new name for this index
return newName(index);
}
/** Creates and returns the name at the given index. */
private String newName(int index) {
// If we're allowed to generate mixed-case names, we can use twice as many characters.
int totalCharacterCount = generateMixedCaseNames ? 2 * CHARACTER_COUNT : CHARACTER_COUNT;
int baseIndex = index / totalCharacterCount;
int offset = index % totalCharacterCount;
char newChar = charAt(offset);
String newName = baseIndex == 0 ? String.valueOf(newChar) : (name(baseIndex - 1) + newChar);
if (Arrays.binarySearch(reservedNames, newName.toUpperCase()) >= 0) {
newName += newChar;
}
return newName;
}
/**
* Returns the character with the given index, between 0 and the number of acceptable characters.
*/
private char charAt(int index) {
return (char) ((index < CHARACTER_COUNT ? 'a' : 'A' - CHARACTER_COUNT) + index);
}
/**
* Creates a new <code>SimpleNameFactory</code>.
* @param generateMixedCaseNames a flag to indicate whether the generated
* names will be mixed-case, or lower-case only.
*/
public SimpleNameFactory(boolean generateMixedCaseNames)
{
this.generateMixedCaseNames = generateMixedCaseNames;
}
// Implementations for NameFactory.
public void reset()
{
index = 0;
}
public String nextName()
{
return name(index++);
}
/**
* Returns the name at the given index.
*/
private String name(int index)
{
// Create a new name for this index
return newName(index);
}
/**
* Creates and returns the name at the given index.
*/
private String newName(int index)
{
// If we're allowed to generate mixed-case names, we can use twice as
// many characters.
int totalCharacterCount = generateMixedCaseNames ?
2 * CHARACTER_COUNT :
CHARACTER_COUNT;
int baseIndex = index / totalCharacterCount;
int offset = index % totalCharacterCount;
char newChar = charAt(offset);
String newName = baseIndex == 0 ?
new String(new char[] { newChar }) :
(name(baseIndex-1) + newChar);
if (Arrays.binarySearch(reservedNames, newName.toUpperCase()) >= 0)
{
newName += newChar;
}
return newName;
}
/**
* Returns the character with the given index, between 0 and the number of
* acceptable characters.
*/
private char charAt(int index)
{
return (char)((index < CHARACTER_COUNT ? 'a' - 0 :
'A' - CHARACTER_COUNT) + index);
}
public static void main(String[] args)
{
System.out.println("Some mixed-case names:");
printNameSamples(new SimpleNameFactory(true), 60);
System.out.println("Some lower-case names:");
printNameSamples(new SimpleNameFactory(false), 60);
System.out.println("Some more mixed-case names:");
printNameSamples(new SimpleNameFactory(true), 80);
System.out.println("Some more lower-case names:");
printNameSamples(new SimpleNameFactory(false), 80);
}
private static void printNameSamples(SimpleNameFactory factory, int count)
{
for (int counter = 0; counter < count; counter++)
{
System.out.println(" ["+factory.nextName()+"]");
}
public static void main(String[] args) {
System.out.println("Some mixed-case names:");
printNameSamples(new SimpleNameFactory(true), 60);
System.out.println("Some lower-case names:");
printNameSamples(new SimpleNameFactory(false), 60);
System.out.println("Some more mixed-case names:");
printNameSamples(new SimpleNameFactory(true), 80);
System.out.println("Some more lower-case names:");
printNameSamples(new SimpleNameFactory(false), 80);
}
private static void printNameSamples(SimpleNameFactory factory, int count) {
for (int counter = 0; counter < count; counter++) {
System.out.println(" [" + factory.nextName() + "]");
}
}
}

View File

@@ -21,63 +21,46 @@
package proguard.obfuscate;
/**
* This <code>NameFactory</code> generates names that are special, by appending
* a suffix.
* This <code>NameFactory</code> generates names that are special, by appending a suffix.
*
* @author Eric Lafortune
*/
public class SpecialNameFactory implements NameFactory
{
private static final char SPECIAL_SUFFIX = '_';
public class SpecialNameFactory implements NameFactory {
private static final char SPECIAL_SUFFIX = '_';
private final NameFactory nameFactory;
private final NameFactory nameFactory;
/**
* Creates a new <code>SpecialNameFactory</code>.
*
* @param nameFactory The name factory from which original names will be retrieved.
*/
public SpecialNameFactory(NameFactory nameFactory) {
this.nameFactory = nameFactory;
}
// Implementations for NameFactory.
/**
* Creates a new <code>SpecialNameFactory</code>.
* @param nameFactory the name factory from which original names will be
* retrieved.
*/
public SpecialNameFactory(NameFactory nameFactory)
{
this.nameFactory = nameFactory;
}
// Implementations for NameFactory.
public void reset()
{
nameFactory.reset();
}
public String nextName()
{
return nameFactory.nextName() + SPECIAL_SUFFIX;
}
// Small utility methods.
/**
* Returns whether the given name is special.
*/
static boolean isSpecialName(String name)
{
return name != null &&
name.charAt(name.length()-1) == SPECIAL_SUFFIX;
}
public static void main(String[] args)
{
SpecialNameFactory factory = new SpecialNameFactory(new SimpleNameFactory());
for (int counter = 0; counter < 50; counter++)
{
System.out.println("["+factory.nextName()+"]");
}
public void reset() {
nameFactory.reset();
}
public String nextName() {
return nameFactory.nextName() + SPECIAL_SUFFIX;
}
// Small utility methods.
/** Returns whether the given name is special. */
static boolean isSpecialName(String name) {
return name != null && name.charAt(name.length() - 1) == SPECIAL_SUFFIX;
}
public static void main(String[] args) {
SpecialNameFactory factory = new SpecialNameFactory(new SimpleNameFactory());
for (int counter = 0; counter < 50; counter++) {
System.out.println("[" + factory.nextName() + "]");
}
}
}

View File

@@ -23,71 +23,56 @@ package proguard.obfuscate;
import proguard.classfile.Clazz;
/**
* NameFactory which only generates names that don't exist yet as members
* on the class for which it is created.
* NameFactory which only generates names that don't exist yet as members on the class for which it
* is created.
*
* @author Johan Leys
*/
public class UniqueMemberNameFactory implements NameFactory
{
private static final String INJECTED_MEMBER_PREFIX = "$$";
public class UniqueMemberNameFactory implements NameFactory {
private static final String INJECTED_MEMBER_PREFIX = "$$";
private final NameFactory delegateNameFactory;
private final Clazz clazz;
private final NameFactory delegateNameFactory;
private final Clazz clazz;
/**
* Utility for creating a new NameFactory that can generate names for injected members: the
* generated names are unique within the given class, and don't clash with non-injected members of
* its super classes.
*
* @param clazz The class for which to generate a NameFactory.
* @return The new NameFactory instance.
*/
public static UniqueMemberNameFactory newInjectedMemberNameFactory(Clazz clazz) {
return new UniqueMemberNameFactory(
new PrefixingNameFactory(new SimpleNameFactory(), INJECTED_MEMBER_PREFIX), clazz);
}
/**
* Utility for creating a new NameFactory that can generate names for injected
* members: the generated names are unique within the given class, and don't
* clash with non-injected members of its super classes.
*
* @param clazz the class for which to generate a NameFactory.
* @return the new NameFactory instance.
*/
public static UniqueMemberNameFactory newInjectedMemberNameFactory(Clazz clazz)
{
return new UniqueMemberNameFactory(
new PrefixingNameFactory(
new SimpleNameFactory(), INJECTED_MEMBER_PREFIX), clazz);
}
/**
* Creates a new UniqueMemberNameFactory.
*
* @param delegateNameFactory The delegate NameFactory, used for generating new candidate names.
* @param clazz The class in which to check for existing member names.
*/
public UniqueMemberNameFactory(NameFactory delegateNameFactory, Clazz clazz) {
this.delegateNameFactory = delegateNameFactory;
this.clazz = clazz;
}
// Implementations for NameFactory.
/**
* Creates a new UniqueMemberNameFactory.
* @param delegateNameFactory the delegate NameFactory, used for generating
* new candidate names.
* @param clazz the class in which to check for existing
* member names.
*/
public UniqueMemberNameFactory(NameFactory delegateNameFactory,
Clazz clazz)
{
this.delegateNameFactory = delegateNameFactory;
this.clazz = clazz;
}
public String nextName() {
String name;
// Check if the name doesn't exist yet. We don't have additional
// descriptor information, so we can only search on the name.
do {
name = delegateNameFactory.nextName();
} while (clazz.findField(name, null) != null || clazz.findMethod(name, null) != null);
// Implementations for NameFactory.
return name;
}
public String nextName()
{
String name;
// Check if the name doesn't exist yet. We don't have additional
// descriptor information, so we can only search on the name.
do
{
name = delegateNameFactory.nextName();
}
while (clazz.findField(name, null) != null ||
clazz.findMethod(name, null) != null);
return name;
}
public void reset()
{
delegateNameFactory.reset();
}
}
public void reset() {
delegateNameFactory.reset();
}
}