[GHC] #8935: Obscure linker bug leads to crash in GHCi

GHC ghc-devs at haskell.org
Wed May 14 21:05:27 UTC 2014


#8935: Obscure linker bug leads to crash in GHCi
-------------------------------------+------------------------------------
        Reporter:  simonmar          |            Owner:  trommler
            Type:  bug               |           Status:  patch
        Priority:  high              |        Milestone:  7.8.3
       Component:  Runtime System    |          Version:  7.8.1-rc2
      Resolution:                    |         Keywords:
Operating System:  Unknown/Multiple  |     Architecture:  Unknown/Multiple
 Type of failure:  GHCi crash        |       Difficulty:  Rocket Science
       Test Case:                    |       Blocked By:
        Blocking:                    |  Related Tickets:
-------------------------------------+------------------------------------

Comment (by dagit):

 I think you test depends on how you link the program and libraries.

 I'm using your setup, but I made the following tweaks.

 `test.c`, in this version `dlopen` of libbar is `RTLD_GLOBAL`:
 {{{
 #include <dlfcn.h>
 #include <stdio.h>
 #include <stdlib.h>

 extern int getbar(void);
 extern void setbar(int);

 int main(int argc, char *argv[])
 {
   void *deflt, *hdl;
   int (*pgetbar)(void);
   int (*psetbar)(int);

   //setbar(2);

   //fprintf(stderr,"getbar() = %d\n", getbar());

   deflt = dlopen(NULL, RTLD_LAZY | RTLD_GLOBAL);
   if (deflt == NULL) {
       fprintf(stderr,"%s\n", dlerror());
       exit(1);
   }
   //pgetbar = dlsym(deflt, "getbar");
   //fprintf(stderr,"dlsym(deflt, \"getbar\") = %p, pgetbar() = %d\n",
 pgetbar, (*pgetbar)());

   hdl = dlopen("libbar.so", RTLD_GLOBAL | RTLD_LAZY);
   if (hdl == NULL) {
       fprintf(stderr,"%s\n", dlerror());
       exit(1);
   }
   pgetbar = dlsym(hdl, "_getbar");
   char* e;
   e = dlerror();
   if( e != NULL ){
     fprintf(stderr,"%s\n", e);
   }
   psetbar = dlsym(hdl, "_setbar");
   e = dlerror();
   if( e != NULL ){
     fprintf(stderr,"%s\n", e);
   }
   fprintf(stderr,"dlsym(deflt, \"_getbar\") = %p, pgetbar() = %d\n",
 pgetbar, (*pgetbar)());
   (*psetbar)(3);
   fprintf(stderr,"(*psetbar)(3); pgetbar() = %d\n", (*pgetbar)());

   hdl = dlopen("libfoo.so", RTLD_LOCAL | RTLD_LAZY);
   if (hdl == NULL) {
       fprintf(stderr,"%s\n", dlerror());
       exit(1);
   }
   pgetbar = dlsym(hdl, "getbar");
   e = dlerror();
   if( e != NULL ){
     fprintf(stderr,"%s\n", e);
   }
   psetbar = dlsym(hdl, "setbar");
   e = dlerror();
   if( e != NULL ){
     fprintf(stderr,"%s\n", e);
   }
   fprintf(stderr,"dlsym(deflt, \"getbar\") = %p, pgetbar() = %d\n",
 pgetbar, (*pgetbar)());
   (*psetbar)(4);
   fprintf(stderr,"(*psetbar)(4); pgetbar() = %d\n", (*pgetbar)());

   return 0;
 }
 }}}

 Here, I don't link together the libraries at compile time, not even for
 `test.c` (I'm trying to be careful not to call this static linking,
 because as we'll see later it's not):
 {{{
 gcc -shared -fPIC bar.c -o libbar.so
 gcc -shared -fPIC foo.c -o libfoo.so
 gcc -Wall test.c -ldl -g
 LD_LIBRARY_PATH=. ./a.out
 }}}

 Now when I run the program I get:
 {{{
 dlsym(deflt, "_getbar") = 0x7fddcc3a5698, pgetbar() = 1
 (*psetbar)(3); pgetbar() = 3
 dlsym(deflt, "getbar") = 0x7fddcc1a3728, pgetbar() = 3
 (*psetbar)(4); pgetbar() = 4
 }}}

 Okay, as we would expect libfoo uses the definition in libbar.  Now,
 switch the `dlopen` to `RTLD_LOCAL`, and run it again:

 {{{
 dlsym(deflt, "_getbar") = 0x7f056f698698, pgetbar() = 1
 (*psetbar)(3); pgetbar() = 3
 ./a.out: symbol lookup error: ./libfoo.so: undefined symbol: _getbar
 }}}

 Now if we link libbar and the final program:
 {{{
 gcc -shared -fPIC bar.c -o libbar.so
 gcc -shared -fPIC foo.c -o libfoo.so
 gcc -Wall test.c -L. -lbar -ldl -g
 LD_LIBRARY_PATH=. ./a.out
 dlsym(deflt, "_getbar") = 0x7f75b9914698, pgetbar() = 1
 (*psetbar)(3); pgetbar() = 3
 dlsym(deflt, "getbar") = 0x7f75b96f9728, pgetbar() = 3
 (*psetbar)(4); pgetbar() = 4
 }}}

 Now it finds the symbol again. The difference seems to be this:
 {{{
 readelf -Wa a.out.withbar | grep NEEDED
  0x0000000000000001 (NEEDED)             Shared library: [libbar.so]
  0x0000000000000001 (NEEDED)             Shared library: [libdl.so.2]
  0x0000000000000001 (NEEDED)             Shared library: [libc.so.6]
 readelf -Wa a.out.withoutbar | grep NEEDED
  0x0000000000000001 (NEEDED)             Shared library: [libdl.so.2]
  0x0000000000000001 (NEEDED)             Shared library: [libc.so.6]
 }}}

 In other words, mentioning libbar on the gcc line for `test.c` adds an
 entry that the elf interpreter sees and it resolves the symbol, lazily,
 before we get into main. I would expect the same behavior if libfoo had
 been linked to libbar instead of with the main application. And indeed,
 that's what we get:
 {{{
 gcc -shared -fPIC bar.c -o libbar.so
 gcc -shared -fPIC foo.c -o libfoo.so -L. -lbar
 gcc -Wall test.c -ldl -g -o a.out.withoutbar
 LD_LIBRARY_PATH=. ./a.out.withoutbar
 dlsym(deflt, "_getbar") = 0x7fc75e77c698, pgetbar() = 1
 (*psetbar)(3); pgetbar() = 3
 dlsym(deflt, "getbar") = 0x7fc75e57a728, pgetbar() = 3
 (*psetbar)(4); pgetbar() = 4

 readelf -Wa libfoo.so | grep NEEDED
  0x0000000000000001 (NEEDED)             Shared library: [libbar.so]
  0x0000000000000001 (NEEDED)             Shared library: [libc.so.6]
 }}}

--
Ticket URL: <http://ghc.haskell.org/trac/ghc/ticket/8935#comment:48>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler


More information about the ghc-tickets mailing list