[commit: ghc] master: GHCi: Properly generate jump code for ARM (#8380) (5bab1a5)

git at git.haskell.org git at git.haskell.org
Fri Nov 22 14:22:46 UTC 2013


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/5bab1a57f572e29dfdffd6d1ce8e53a2772b18fd/ghc

>---------------------------------------------------------------

commit 5bab1a57f572e29dfdffd6d1ce8e53a2772b18fd
Author: Austin Seipp <austin at well-typed.com>
Date:   Mon Nov 11 10:26:03 2013 -0600

    GHCi: Properly generate jump code for ARM (#8380)
    
    This adds code for jumping to given addresses for ARM, written by Ben
    Gamari.
    
    However, when allocating new infotables for bytecode (which is where
    this jump code occurs), we need to be sure to flush the cache on the
    execute pointer returned from allocateExec() - on systems like ARM, the
    processor won't reliably read back code or automatically cache flush,
    where x86 will.
    
    So we add a new flushExec primitive to call out to GCC's
    __builtin___clear_cache primitive, which will properly generate the
    correct code (nothing on x86, and a call to libgcc's __clear_cache on
    ARM) and make sure we use it after writing the code out.
    
    Authored-by: Ben Gamari <bgamari.foss at gmail.com>
    Authored-by: Austin Seipp <austin at well-typed.com>
    Signed-off-by: Austin Seipp <austin at well-typed.com>


>---------------------------------------------------------------

5bab1a57f572e29dfdffd6d1ce8e53a2772b18fd
 compiler/ghci/ByteCodeItbls.lhs |   21 ++++++++++++++++++++-
 includes/rts/storage/GC.h       |    1 +
 rts/Linker.c                    |    1 +
 rts/sm/Storage.c                |   27 +++++++++++++++++++++++++++
 4 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/compiler/ghci/ByteCodeItbls.lhs b/compiler/ghci/ByteCodeItbls.lhs
index 0d07be5..2180f87 100644
--- a/compiler/ghci/ByteCodeItbls.lhs
+++ b/compiler/ghci/ByteCodeItbls.lhs
@@ -227,6 +227,20 @@ mkJumpToAddr dflags a = case platformArch (targetPlatform dflags) of
                  , fromIntegral (w64 .&. 0x0000FFFF)
                  , fromIntegral ((w64 `shiftR` 32) .&. 0x0000FFFF) ]
     
+    ArchARM { } ->
+        -- Generates Thumb sequence,
+        --      ldr r1, [pc, #0]
+        --      bx r1
+        -- 
+        -- which looks like:
+        --     00000000 <.addr-0x8>:
+        --     0:	4900        ldr    r1, [pc]      ; 8 <.addr>
+        --     4:	4708        bx     r1
+        let w32 = fromIntegral (ptrToInt a) :: Word32
+        in Left [ 0x49, 0x00
+                , 0x47, 0x08
+                , byte0 w32, byte1 w32, byte2 w32, byte3 w32]
+
     arch ->
         panic ("mkJumpToAddr not defined for " ++ show arch)
 
@@ -374,11 +388,16 @@ load = do addr <- advance
 newExecConItbl :: DynFlags -> StgConInfoTable -> IO (FunPtr ())
 newExecConItbl dflags obj
    = alloca $ \pcode -> do
-        wr_ptr <- _allocateExec (fromIntegral (sizeOfConItbl dflags obj)) pcode
+        let sz = fromIntegral (sizeOfConItbl dflags obj)
+        wr_ptr <- _allocateExec sz pcode
         ex_ptr <- peek pcode
         pokeConItbl dflags wr_ptr ex_ptr obj
+        _flushExec sz ex_ptr -- Cache flush (if needed)
         return (castPtrToFunPtr ex_ptr)
 
 foreign import ccall unsafe "allocateExec"
   _allocateExec :: CUInt -> Ptr (Ptr a) -> IO (Ptr a)
+
+foreign import ccall unsafe "flushExec"
+  _flushExec :: CUInt -> Ptr a -> IO ()
 \end{code}
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
index 8133496..f8b8afe 100644
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -161,6 +161,7 @@ typedef void* AdjustorWritable;
 typedef void* AdjustorExecutable;
 
 AdjustorWritable allocateExec(W_ len, AdjustorExecutable *exec_addr);
+void flushExec(W_ len, AdjustorExecutable exec_addr);
 #if defined(ios_HOST_OS)
 AdjustorWritable execToWritable(AdjustorExecutable exec);
 #endif
diff --git a/rts/Linker.c b/rts/Linker.c
index 77943a5..14ebac3 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -1350,6 +1350,7 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(g0)                                                 \
       SymI_HasProto(allocate)                                           \
       SymI_HasProto(allocateExec)                                       \
+      SymI_HasProto(flushExec)                                          \
       SymI_HasProto(freeExec)                                           \
       SymI_HasProto(getAllocations)                                     \
       SymI_HasProto(revertCAFs)                                         \
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 112ad83..c1a1a5a 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -1152,6 +1152,15 @@ AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
     return (ret + 1);
 }
 
+void flushExec (W_ len, AdjustorExecutable exec_addr)
+{
+  /* On ARM and other platforms, we need to flush the cache after
+     writing code into memory, so the processor reliably sees it. */
+  unsigned char* begin = (unsigned char*)exec_addr;
+  unsigned char* end   = begin + len;
+  __builtin___clear_cache(begin, end);
+}
+
 // freeExec gets passed the executable address, not the writable address.
 void freeExec (AdjustorExecutable addr)
 {
@@ -1198,6 +1207,15 @@ AdjustorWritable execToWritable(AdjustorExecutable exec)
     return writ;
 }
 
+void flushExec (W_ len, AdjustorExecutable exec_addr)
+{
+  /* On ARM and other platforms, we need to flush the cache after
+     writing code into memory, so the processor reliably sees it. */
+  unsigned char* begin = (unsigned char*)exec_addr;
+  unsigned char* end   = begin + len;
+  __builtin___clear_cache(begin, end);
+}
+
 void freeExec(AdjustorExecutable exec)
 {
     AdjustorWritable writ;
@@ -1251,6 +1269,15 @@ AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
     return ret;
 }
 
+void flushExec (W_ len, AdjustorExecutable exec_addr)
+{
+  /* On ARM and other platforms, we need to flush the cache after
+     writing code into memory, so the processor reliably sees it. */
+  unsigned char* begin = (unsigned char*)exec_addr;
+  unsigned char* end   = begin + len;
+  __builtin___clear_cache(begin, end);
+}
+
 void freeExec (void *addr)
 {
     StgPtr p = (StgPtr)addr - 1;



More information about the ghc-commits mailing list