Hi, I thought that I would try to speed up the SML code at http://stackoverflow.com/questions/32425267/how-to-improving-array-benchmark... by using the FFI, but this results in significant slowdown.
Non ffi code :
************************************************************************* ************************************************************************* *************************************************************************
val size:int = 50000; val loops:int = 30; val cap:int = 50000;
val data = IntArray.array(size,0);
fun loop () = let fun loopI i = if i = size then let val _ = () in IntArray.update(data,0,IntArray.sub(data,size-1)); () end else let val previous = IntArray.sub(data,i-1) val use = if previous > cap then 0 else previous in IntArray.update(data,i,use+1); loopI (i+1) end in loopI 1 end
fun benchmarkRun () = let fun bench i = if i = loops then () else let val _ = () in loop (); bench (i+1) end in bench 1 end
fun sum (i,value) = if i = size then value else sum(i+1,value+Array.sub(data,i))
fun main () = let val _ = () in benchmarkRun(); print (Int.toString (sum (0,0))); print "\n" end
(*val _ = main ()*)
************************************************************************* ************************************************************************* *************************************************************************
FFI code :
c code :
************************************************************************* ************************************************************************* *************************************************************************
//intArray.c #include <stdlib.h> #include <stdio.h>
typedef struct _intArray { int size; int* arr; } intArray;
intArray* createIntArray(int size){ int i; intArray* p = (intArray*) malloc (sizeof(intArray)); p->arr = (int*) malloc (size*sizeof(int)); for(i=0; i<size; i++){ p->arr[i] = 0; } p->size = size; return p; }
void destroyIntArray(intArray* p){ free (p->arr); free (p); }
void setIntArray(intArray* p, int elem, int val){ p->arr[elem] = val; }
int getIntArray(intArray *p, int elem){ return p->arr[elem]; }
int getSumIntArray(intArray* p){ int sum = 0; int i; int size = p->size; for(i=0; i<size; i++){ sum += p->arr[i]; } return sum; }
************************************************************************* ************************************************************************* *************************************************************************
ml code :
************************************************************************* ************************************************************************* *************************************************************************
open CInterface;
val lib = load_lib "./intArray.so"; val get = get_sym "./intArray.so";
val PINTARR = POINTER;
val c1 = call1 (get "createIntArray") INT PINTARR val c2 = call3 (get "setIntArray") (PINTARR,INT,INT) VOID val c3 = call2 (get "getIntArray") (PINTARR,INT) INT val c4 = call1 (get "getSumIntArray") (PINTARR) INT
fun c_createIntArray (size) = c1 (size); fun c_setIntArray (p,elem,value) = c2 (p,elem,value); fun c_getIntArray (p,elem) = c3 (p,elem); fun c_getSumIntArray (p) = c4 (p);
val size:int = 50000; val loops:int = 30; val cap:int = 50000;
fun loop (pData2) = let fun loopI i = if i = size then let val _ = () in c_setIntArray(pData2,0,c_getIntArray(pData2,size-1)); () end else let val previous = c_getIntArray(pData2,i-1); val use = if previous > cap then 0 else previous in c_setIntArray(pData2,i,use+1); loopI (i+1) end in loopI 1 end
fun benchmarkRun (pData2) = let fun bench i = if i = loops then () else let val _ = () in loop (pData2); bench (i+1) end in bench 1 end
fun main () = let val pData = c_createIntArray(size); val final = load_sym lib "destroyIntArray"; in setFinal final pData; benchmarkRun(pData); print (Int.toString (c_getSumIntArray (pData))); print "\n" end
************************************************************************* ************************************************************************* *************************************************************************
The times are :
a)for non ffi sml : 0.09s b)for ffi sml : 11.8s
Is there any way I can improve the speeds on the ffi code? Thanks