New subject: [polyml] FFI overhead

19 Sep 2015


      Hi, I thought that I would try to speed up the SML code at
http://stackoverflow.com/questions/32425267/how-to-improving-array-benchmark...
by using the FFI, but this results in significant slowdown.
Non ffi code :
*************************************************************************
*************************************************************************
*************************************************************************
val size:int = 50000;
val loops:int = 30;
val cap:int = 50000;
val data = IntArray.array(size,0);
fun loop () =
  let
    fun loopI i =
      if i = size then
        let val _ = () in
          IntArray.update(data,0,IntArray.sub(data,size-1));
          ()
        end
      else
        let val previous = IntArray.sub(data,i-1)
            val use = if previous > cap then 0 else previous in
          IntArray.update(data,i,use+1);
          loopI (i+1)
      end
  in loopI 1 end
fun benchmarkRun () =
  let
    fun bench i =
      if i = loops then ()
      else let val _ = () in
             loop ();
             bench (i+1)
           end
  in bench 1 end
fun sum (i,value) =
  if i = size then value
  else sum(i+1,value+Array.sub(data,i))
fun main () = let val _ = () in
  benchmarkRun();
  print (Int.toString (sum (0,0)));
  print "\n"
  end
(*val _ = main ()*)
*************************************************************************
*************************************************************************
*************************************************************************
FFI code :
c code :
*************************************************************************
*************************************************************************
*************************************************************************
//intArray.c
#include <stdlib.h>
#include <stdio.h>
typedef struct _intArray {
  int size;
  int* arr;
} intArray;
intArray* createIntArray(int size){
  int i;
  intArray* p = (intArray*) malloc (sizeof(intArray));
  p->arr = (int*) malloc (size*sizeof(int));
  for(i=0; i<size; i++){
    p->arr[i] = 0;
  }
  p->size = size;
  return p;
}
void destroyIntArray(intArray* p){
  free (p->arr);
  free (p);
}
void setIntArray(intArray* p, int elem, int val){
  p->arr[elem] = val;
}
int getIntArray(intArray *p, int elem){
  return p->arr[elem];
}
int getSumIntArray(intArray* p){
  int sum = 0;
  int i;
  int size = p->size;
  for(i=0; i<size; i++){
    sum += p->arr[i];
  }
  return sum;
}
*************************************************************************
*************************************************************************
*************************************************************************
ml code :
*************************************************************************
*************************************************************************
*************************************************************************
open CInterface;
val lib = load_lib "./intArray.so";
val get = get_sym "./intArray.so";
val PINTARR = POINTER;
val c1 = call1 (get "createIntArray") INT PINTARR
val c2 = call3 (get "setIntArray") (PINTARR,INT,INT) VOID
val c3 = call2 (get "getIntArray") (PINTARR,INT) INT
val c4 = call1 (get "getSumIntArray") (PINTARR) INT
fun c_createIntArray (size) = c1 (size);
fun c_setIntArray (p,elem,value) = c2 (p,elem,value);
fun c_getIntArray (p,elem) = c3 (p,elem);
fun c_getSumIntArray (p) = c4 (p);
val size:int = 50000;
val loops:int = 30;
val cap:int = 50000;
fun loop (pData2) =
  let
    fun loopI i =
      if i = size then
        let val _ = () in
          c_setIntArray(pData2,0,c_getIntArray(pData2,size-1));
          ()
        end
      else
        let
            val previous = c_getIntArray(pData2,i-1);
            val use = if previous > cap then 0 else previous in
          c_setIntArray(pData2,i,use+1);
          loopI (i+1)
      end
  in loopI 1 end
fun benchmarkRun (pData2) =
  let
    fun bench i =
      if i = loops then ()
      else let val _ = () in
             loop (pData2);
             bench (i+1)
           end
  in bench 1 end
fun main () =
  let
    val pData = c_createIntArray(size);
    val final = load_sym lib "destroyIntArray";
  in
  setFinal final pData;
  benchmarkRun(pData);
  print (Int.toString (c_getSumIntArray (pData)));
  print "\n"
  end
*************************************************************************
*************************************************************************
*************************************************************************
The times are :
a)for non ffi sml : 0.09s
b)for ffi sml : 11.8s
Is there any way I can improve the speeds on the ffi code? Thanks