ld: support --build-id=xx mode

The is patch adds a new ld build-id computation mode, "xx", using
xxhash in its 128-bit mode.  The patch prereqs the xxhash-devel
headers being installed, and uses the "all-inlined" model, so no
run-time or link-time library dependence exists.

The xxhash mode performs well, saving roughly 20% of total userspace
run time from an ld job over a 800MB shared library relative to sha1.
128 bits of good hash should be collision-resistant to a number of
distinct binaries that numbers in the 2**32 - 2**64 range, even if not
"crypto" level hash.  Confirmations of this are in progress.

         ld/configury: add --with-xxhash mode, different from gdb case
                       because only using it in inline mode

         ld/ldbuildid.c: add "xx" mode, #if WITH_XXHASH

         ld/NEWS, ld.texi: mention new option

         ld/lexsup.c: add enumeration of --build-id STYLES to --help

         ld/testsuite/ld-elf/build-id.exp: add test case for 0xHEX case
                                           and conditional for xx case;
                                           also, simply tcl list syntax

https://inbox.sourceware.org/binutils/20240917201509.GB26396@redhat.com/

Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
This commit is contained in:
Frank Ch. Eigler
2024-09-19 17:06:48 -04:00
parent 07d74e51ba
commit 2299dfd4ba
9 changed files with 228 additions and 54 deletions

View File

@@ -1,5 +1,10 @@
-*- text -*- -*- text -*-
Changes in 2.44:
* Add a "--build-id=xx" option, if built with the xxhash library. This
produces a 128-bit hash, 2-4x faster than md5 or sha1.
Changes in 2.43: Changes in 2.43:
* Add support for LoongArch DT_RELR (compressed R_LARCH_RELATIVE). * Add support for LoongArch DT_RELR (compressed R_LARCH_RELATIVE).

View File

@@ -269,6 +269,9 @@
/* Version number of package */ /* Version number of package */
#undef VERSION #undef VERSION
/* whether to use inline xxhash */
#undef WITH_XXHASH
/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a /* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
`char[]'. */ `char[]'. */
#undef YYTEXT_POINTER #undef YYTEXT_POINTER

60
ld/configure vendored
View File

@@ -806,6 +806,7 @@ infodir
docdir docdir
oldincludedir oldincludedir
includedir includedir
runstatedir
localstatedir localstatedir
sharedstatedir sharedstatedir
sysconfdir sysconfdir
@@ -872,6 +873,7 @@ with_libiconv_prefix
with_libiconv_type with_libiconv_type
with_libintl_prefix with_libintl_prefix
with_libintl_type with_libintl_type
with_xxhash
with_system_zlib with_system_zlib
with_zstd with_zstd
' '
@@ -935,6 +937,7 @@ datadir='${datarootdir}'
sysconfdir='${prefix}/etc' sysconfdir='${prefix}/etc'
sharedstatedir='${prefix}/com' sharedstatedir='${prefix}/com'
localstatedir='${prefix}/var' localstatedir='${prefix}/var'
runstatedir='${localstatedir}/run'
includedir='${prefix}/include' includedir='${prefix}/include'
oldincludedir='/usr/include' oldincludedir='/usr/include'
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
@@ -1187,6 +1190,15 @@ do
| -silent | --silent | --silen | --sile | --sil) | -silent | --silent | --silen | --sile | --sil)
silent=yes ;; silent=yes ;;
-runstatedir | --runstatedir | --runstatedi | --runstated \
| --runstate | --runstat | --runsta | --runst | --runs \
| --run | --ru | --r)
ac_prev=runstatedir ;;
-runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
| --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
| --run=* | --ru=* | --r=*)
runstatedir=$ac_optarg ;;
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
ac_prev=sbindir ;; ac_prev=sbindir ;;
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@@ -1324,7 +1336,7 @@ fi
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
datadir sysconfdir sharedstatedir localstatedir includedir \ datadir sysconfdir sharedstatedir localstatedir includedir \
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
libdir localedir mandir libdir localedir mandir runstatedir
do do
eval ac_val=\$$ac_var eval ac_val=\$$ac_var
# Remove trailing slashes. # Remove trailing slashes.
@@ -1477,6 +1489,7 @@ Fine tuning of the installation directories:
--sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
--localstatedir=DIR modifiable single-machine data [PREFIX/var] --localstatedir=DIR modifiable single-machine data [PREFIX/var]
--runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
--libdir=DIR object code libraries [EPREFIX/lib] --libdir=DIR object code libraries [EPREFIX/lib]
--includedir=DIR C header files [PREFIX/include] --includedir=DIR C header files [PREFIX/include]
--oldincludedir=DIR C header files for non-gcc [/usr/include] --oldincludedir=DIR C header files for non-gcc [/usr/include]
@@ -1589,6 +1602,8 @@ Optional Packages:
--with-libintl-prefix[=DIR] search for libintl in DIR/include and DIR/lib --with-libintl-prefix[=DIR] search for libintl in DIR/include and DIR/lib
--without-libintl-prefix don't search for libintl in includedir and libdir --without-libintl-prefix don't search for libintl in includedir and libdir
--with-libintl-type=TYPE type of library to search for (auto/static/shared) --with-libintl-type=TYPE type of library to search for (auto/static/shared)
--with-xxhash use inlined libxxhash for hashing (faster)
(auto/yes/no)
--with-system-zlib use installed libz --with-system-zlib use installed libz
--with-zstd support zstd compressed debug sections --with-zstd support zstd compressed debug sections
(default=auto) (default=auto)
@@ -11683,7 +11698,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF cat > conftest.$ac_ext <<_LT_EOF
#line 11686 "configure" #line 11701 "configure"
#include "confdefs.h" #include "confdefs.h"
#if HAVE_DLFCN_H #if HAVE_DLFCN_H
@@ -11789,7 +11804,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF cat > conftest.$ac_ext <<_LT_EOF
#line 11792 "configure" #line 11807 "configure"
#include "confdefs.h" #include "confdefs.h"
#if HAVE_DLFCN_H #if HAVE_DLFCN_H
@@ -19086,6 +19101,45 @@ $as_echo "#define HAVE_DECL_GETOPT 1" >>confdefs.h
fi fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use xxhash" >&5
$as_echo_n "checking whether to use xxhash... " >&6; }
# Check whether --with-xxhash was given.
if test "${with_xxhash+set}" = set; then :
withval=$with_xxhash;
else
with_xxhash=auto
fi
if test "x$with_xxhash" != "xno"; then
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#define XXH_INLINE_ALL
#include <xxhash.h>
XXH128_hash_t r;
void foo (void) { r = XXH128("foo", 3, 0); }
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
with_xxhash=yes
$as_echo "#define WITH_XXHASH 1" >>confdefs.h
else
if test "$with_xxhash" = yes; then
as_fn_error $? "xxhash is missing or unusable" "$LINENO" 5
fi
with_xxhash=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_xxhash" >&5
$as_echo "$with_xxhash" >&6; }
# Link in zlib/zstd if we can. This allows us to read and write # Link in zlib/zstd if we can. This allows us to read and write
# compressed debug sections. # compressed debug sections.

View File

@@ -424,6 +424,28 @@ if test $ld_cv_decl_getopt_unistd_h = yes; then
[Is the prototype for getopt in <unistd.h> in the expected format?]) [Is the prototype for getopt in <unistd.h> in the expected format?])
fi fi
dnl xxhash support from gdbsupport/common.m4
AC_MSG_CHECKING([whether to use xxhash])
AC_ARG_WITH(xxhash,
AS_HELP_STRING([--with-xxhash], [use inlined libxxhash for hashing (faster) (auto/yes/no)]),
[], [with_xxhash=auto])
if test "x$with_xxhash" != "xno"; then
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#define XXH_INLINE_ALL
#include <xxhash.h>
XXH128_hash_t r;
void foo (void) { r = XXH128("foo", 3, 0); }
])],[
with_xxhash=yes
AC_DEFINE([WITH_XXHASH], 1, [whether to use inline xxhash])
],[
if test "$with_xxhash" = yes; then
AC_MSG_ERROR([xxhash is missing or unusable])
fi
with_xxhash=no])
fi
AC_MSG_RESULT([$with_xxhash])
# Link in zlib/zstd if we can. This allows us to read and write # Link in zlib/zstd if we can. This allows us to read and write
# compressed debug sections. # compressed debug sections.
AM_ZLIB AM_ZLIB

View File

@@ -3216,20 +3216,20 @@ maximum cache size to @var{size}.
Request the creation of a @code{.note.gnu.build-id} ELF note section Request the creation of a @code{.note.gnu.build-id} ELF note section
or a @code{.buildid} COFF section. The contents of the note are or a @code{.buildid} COFF section. The contents of the note are
unique bits identifying this linked file. @var{style} can be unique bits identifying this linked file. @var{style} can be
@code{uuid} to use 128 random bits, @code{sha1} to use a 160-bit @code{uuid} to use 128 random bits; @code{sha1} to use a 160-bit
@sc{SHA1} hash on the normative parts of the output contents, @sc{SHA1} hash, @code{md5} to use a 128-bit @sc{MD5} hash, or @code{xx}
@code{md5} to use a 128-bit @sc{MD5} hash on the normative parts of to use a 128-bit @sc{XXHASH} on the normative parts of the output
the output contents, or @code{0x@var{hexstring}} to use a chosen bit contents; or @code{0x@var{hexstring}} to use a chosen bit string
string specified as an even number of hexadecimal digits (@code{-} and specified as an even number of hexadecimal digits (@code{-} and
@code{:} characters between digit pairs are ignored). If @var{style} @code{:} characters between digit pairs are ignored). If @var{style}
is omitted, @code{sha1} is used. is omitted, @code{sha1} is used.
The @code{md5} and @code{sha1} styles produces an identifier The @code{md5}, @code{sha1}, and @code{xx} styles produces an
that is always the same in an identical output file, but will be identifier that is always the same in an identical output file, but
unique among all nonidentical output files. It is not intended are almost certainly unique among all nonidentical output files. It
to be compared as a checksum for the file's contents. A linked is not intended to be compared as a checksum for the file's contents.
file may be changed later by other tools, but the build ID bit A linked file may be changed later by other tools, but the build ID
string identifying the original linked file does not change. bit string identifying the original linked file does not change.
Passing @code{none} for @var{style} disables the setting from any Passing @code{none} for @var{style} disables the setting from any
@code{--build-id} options earlier on the command line. @code{--build-id} options earlier on the command line.

View File

@@ -23,6 +23,10 @@
#include "safe-ctype.h" #include "safe-ctype.h"
#include "md5.h" #include "md5.h"
#include "sha1.h" #include "sha1.h"
#ifdef WITH_XXHASH
#define XXH_INLINE_ALL
#include <xxhash.h>
#endif
#include "ldbuildid.h" #include "ldbuildid.h"
#ifdef __MINGW32__ #ifdef __MINGW32__
#include <windows.h> #include <windows.h>
@@ -35,6 +39,9 @@ bool
validate_build_id_style (const char *style) validate_build_id_style (const char *style)
{ {
if ((streq (style, "md5")) || (streq (style, "sha1")) if ((streq (style, "md5")) || (streq (style, "sha1"))
#ifdef WITH_XXHASH
|| (streq (style, "xx"))
#endif
|| (streq (style, "uuid")) || (startswith (style, "0x"))) || (streq (style, "uuid")) || (startswith (style, "0x")))
return true; return true;
@@ -47,6 +54,11 @@ compute_build_id_size (const char *style)
if (streq (style, "md5") || streq (style, "uuid")) if (streq (style, "md5") || streq (style, "uuid"))
return 128 / 8; return 128 / 8;
#ifdef WITH_XXHASH
if (streq (style, "xx"))
return 128 / 8;
#endif
if (streq (style, "sha1")) if (streq (style, "sha1"))
return 160 / 8; return 160 / 8;
@@ -93,6 +105,16 @@ read_hex (const char xdigit)
return 0; return 0;
} }
#ifdef WITH_XXHASH
static void
xx_process_bytes(const void* buffer, size_t size, void* state)
{
XXH3_128bits_update ((XXH3_state_t*) state, buffer, size);
}
#endif
bool bool
generate_build_id (bfd *abfd, generate_build_id (bfd *abfd,
const char *style, const char *style,
@@ -100,7 +122,31 @@ generate_build_id (bfd *abfd,
unsigned char *id_bits, unsigned char *id_bits,
int size ATTRIBUTE_UNUSED) int size ATTRIBUTE_UNUSED)
{ {
if (streq (style, "md5")) #ifdef WITH_XXHASH
if (streq (style, "xx"))
{
XXH3_state_t* state = XXH3_createState ();
if (!state)
{
return false;
}
XXH3_128bits_reset (state);
if (!(*checksum_contents) (abfd, &xx_process_bytes, state))
{
XXH3_freeState (state);
return false;
}
XXH128_hash_t result = XXH3_128bits_digest (state);
XXH3_freeState (state);
/* Use canonical-endianness output. */
XXH128_canonical_t result_canon;
XXH128_canonicalFromHash (&result_canon, result);
memcpy (id_bits, &result_canon,
(size_t) size < sizeof (result) ? (size_t) size : sizeof (result));
}
else
#endif
if (streq (style, "md5"))
{ {
struct md5_ctx ctx; struct md5_ctx ctx;

View File

@@ -2278,6 +2278,15 @@ elf_static_list_options (FILE *file)
{ {
fprintf (file, _("\ fprintf (file, _("\
--build-id[=STYLE] Generate build ID note\n")); --build-id[=STYLE] Generate build ID note\n"));
/* DEFAULT_BUILD_ID_STYLE n/a here */
#ifdef WITH_XXHASH
fprintf (file, _("\
Styles: none,md5,sha1,xx,uuid,0xHEX\n"));
/* NB: testsuite/ld-elf/build-id.exp depends on this syntax */
#else
fprintf (file, _("\
Styles: none,md5,sha1,uuid,0xHEX\n"));
#endif
fprintf (file, _("\ fprintf (file, _("\
--package-metadata[=JSON] Generate package metadata note\n")); --package-metadata[=JSON] Generate package metadata note\n"));
fprintf (file, _("\ fprintf (file, _("\

View File

@@ -36,42 +36,71 @@ if { !([istarget *-*-linux*]
return return
} }
run_ld_link_tests [list \
[list \ set stylelist {"" "--build-id" "--build-id=none" "--build-id=md5"
"pr28639a.o" \ "--build-id=sha1" "--build-id=guid" "--build-id=0xdeadbeef"}
"-r --build-id=md5" \
"" \ run_ld_link_tests {
"" \ {
{start.s} \ "pr28639a.o"
{{readelf {--notes} pr28639a.rd}} \ "-r --build-id=md5"
"pr28639a.o" \ ""
] \ ""
[list \ {start.s}
"pr28639a.o" \ {{readelf {--notes} pr28639a.rd}}
"-r --build-id" \ "pr28639a.o"
"" \ }
"" \ {
{dummy.s} \ "pr28639b.o"
{{readelf {--notes} pr28639b.rd}} \ "-r --build-id"
"pr28639b.o" \ ""
] \ ""
[list \ {dummy.s}
"pr28639a" \ {{readelf {--notes} pr28639b.rd}}
"--build-id tmpdir/pr28639a.o tmpdir/pr28639b.o" \ "pr28639b.o"
"" \ }
"" \ {
{dummy.s} \ "pr28639a.o deadbeef"
{{readelf {--notes} pr28639b.rd} \ "-r --build-id=0xdeadbeef"
{readelf {--notes} pr28639c.rd}} \ ""
"pr28639a" \ ""
] \ {start.s}
[list \ {{readelf {--notes} pr28639e.rd}}
"pr28639b" \ "pr28639a.o"
"--build-id=none tmpdir/pr28639a.o tmpdir/pr28639b.o" \ }
"" \ {
"" \ "pr28639a"
{dummy.s} \ "--build-id tmpdir/pr28639a.o tmpdir/pr28639b.o"
{{readelf {--notes} pr28639d.rd}} \ ""
"pr28639b" \ ""
] \ {dummy.s}
] {{readelf {--notes} pr28639b.rd}
{readelf {--notes} pr28639c.rd}}
"pr28639a"
}
{
"pr28639b"
"--build-id=none tmpdir/pr28639a.o tmpdir/pr28639b.o"
""
""
{dummy.s}
{{readelf {--notes} pr28639d.rd}}
"pr28639b"
}
}
# see if linker supports xx style also
catch "exec $ld --help | grep -A2 -- --build-id | grep Styles" tmp
if {[string first ",xx," $tmp] >= 0} then {
run_ld_link_tests {
{
"pr28639a.o xx"
"-r --build-id=xx"
""
""
{start.s}
{{readelf {--notes} pr28639a.rd}} # 16 bytes
"pr28639a.o"
}
}
}

View File

@@ -0,0 +1,6 @@
#...
Displaying notes found in: \.note\.gnu\.build-id
Owner Data size Description
GNU 0x00000004 NT_GNU_BUILD_ID \(unique build ID bitstring\)
Build ID: deadbeef
#pass