Using Cuda Within Mathematica

Using CUDA
within
Mathematica

Kashif Rasul
and Raqibul Hassan

l a b s

Overview

• Intro to Mathematica and its API
• CUDA + Mathematica
• Some examples

Mathematica intro
• Mathematica is a modular
computational system in which the
kernel is separate from the front end
which handles the interaction with the
user.

• The most common way to work is to use
interactive documents called notebooks
which mix text input and output as well
as graphics and other material.

Structure of
Mathematica
• An import aspect of Mathematica is
that it can also interact with other
applications.

• This is achieved through MathLink,
a standardised API for two-way
communication with the kernel.

MathLink

• MathLink allows external programs both
to call Mathematica, and to be called by
Mathematica.

• We will use MathLink to let Mathematica
call CUDA functions inside an external
program.

Simple example
addtwo.tm
:Begin:
:Function: addtwo
:Pattern: AddTwo[i_Integer,j_Integer]
:Arguments: { i, j }
:ArgumentTypes: {Integer,Integer}
:ReturnType: Integer
:End:

addtwo.c

#include <mathlink.h>

int addtwo( int i, int j)
{
return i+j;
}

int main(int argc, char* argv[])
{
return MLMain(argc, argv);
}

mprep & gcc

$ mprep addtwo.tm -o addtwotm.c

$ gcc -I${INCDIR} addtwotm.c addtwo.c
-L${LIBDIR} -lMLi3 -lstdc++ -o addtwo

In[3]:= SetDirectory
" Applications Mathematica.app SystemFiles Links MathLink DeveloperKit
PrebuiltExamples"

Out[3]= Applications Mathematica.app SystemFiles Links MathLink DeveloperKit
PrebuiltExamples

In[4]:= link Install ". addtwo"
Out[4]= LinkObject
Applications Mathematica.app SystemFiles Links MathLink DeveloperKit
PrebuiltExamples addtwo, 524, 8

In[5]:= LinkPatterns link
Out[5]= AddTwo i_Integer, j_Integer

In[6]:= ? AddTwo

AddTwo x , y gives the sum of two machine integers x and y.

In[7]:= AddTwo 2, 3
Out[7]= 5

In[8]:= AddTwo 2^31 1, 1
Out[8]= 2 147 483 648

In[9]:= Uninstall link
Out[9]= Applications Mathematica.app SystemFiles Links MathLink DeveloperKit
PrebuiltExamples addtwo

MathLink
Template file
• When a MathLink template file is
processed, two basic things are done:
• :Pattern:& :Arguments: specifications
are used to generate a Mathematica
definition
• :Function:, :ArgumentTypes:
& :ReturnType: specifications are used
to generate C source code

:ArgumentTypes:

Mathematica speciﬁcation C speciﬁcation

Integer int
Real double
IntegerList int*, long
RealList double*, long
String char*
Symbol char*
Manual void

Handling
Lists & Arrays
:Begin: int sumList(int *a, long alen)
:Function: sumList {
:Pattern: SumList[a_List] int i, tot=0;
:Arguments: {a}
:ArgumentTypes:{IntegerList} for(i=0; i<alen; i++)
:ReturnType: Integer tot += a[i];
:End:
return tot;
}

Manual ArgumentTypes
:Begin:
:Function: sumList
:Pattern: SumList[a:{___Integer}]
:Arguments: {a}
:ArgumentTypes:{Manual}
:ReturnType: Integer
:End:

int sumList(void) { int sumList(void) {
int n, i; int n;
int a[MAX]; int *a;

MLCheckFunction(stdlink, "List", &n); MLGetInteger32List(stdlink, &a, &n);
...
for (i=0; i<n; i++) MLReleaseInteger32List(stdlink, a, n);
MLGetInteger32(stdlink, a+i); ...
... }
}

Array of arb. depth

/* read an array of double-precision floating-point numbers from a link */
void f(MLINK lp)
{
double *data;
int *dims;
char **heads;
int d; /* stores the rank of the array */

if(! MLGetRealArray(lp, &data, &dims, &heads, &d))
{
/* unable to read the array from lp */
return;
}
/* ... */
MLReleaseRealArray(lp, data, dims, heads, d);
}

Handling Complex
numbers
In[1]:= Head 2 3
Out[1]= Complex

If you pass a list of complex numbers to your external program,
then MLGetReal64Array() will create a two-dimensional array
containing a sequence of pairs of real and imaginary parts. In this
case, heads[0] will be "List" while heads[1] will be "Complex".

//get an array of floating-point numbers of any depth
MLGetReal64Array(stdlink,double**a,int**dims,char***heads,int*d);

Summary of API
//get a list of integers, allocating the memory needed to store it
MLGetInteger32List(stdlink,int**a,int*n);
//get a list of floating-point numbers
MLGetReal64List(stdlink,double**a,int*n);
//release the memory associated with a list of integers
MLReleaseInteger32List(stdlink,int*a,int n);
//release the memory associated with a list of floating-point numbers
MLReleaseReal64List(stdlink,double*a,int n);

//get an array of integers of any depth
MLGetInteger32Array(stdlink,int**a,int**dims,char***heads,int*d);
//get an array of floating-point numbers of any depth
MLGetReal32Array(stdlink,float**a,int**dims,char***heads,int*d);
//release memory associated with an integer array
MLReleaseInteger32Array(stdlink,int*a,int*dims,char**heads,int d);
//release memory associated with a floating-point array
MLReleaseReal32Array(stdlink,float*a,int*dims,char**heads,int d);

Manual ReturnType
void bits(int i)
{
int a[32], k;
:Begin:
:Function: bits for(k=0; k<32; k++) {
:Pattern: ToBits[i_Integer] a[k] = i%2;
:Arguments: {i} i >>= 1;
:ArgumentTypes:{Integer} if (i==0) break;
:ReturnType: Manual }
:End:
if (k<32) k++;

MLPutInteger32List(stdlink,
a, k);
return;
}

General array
int a[8][16][100];
int dims[] = {8, 16, 100};

MLPutInteger32Array(stdlink, a, dims, NULL, 3);

or
int ***a;

MLPutFunction(stdlink, "List", n1);
for (i=0; i<n1; i++) {
MLPutFunction(stdlink, "List", n2);
for (j=0; j<n2; j++) {
MLPutInteger32List(stdlink, a[i][j], n3);
}
}

Unkown length
In[10]:= Sequence 1, Sequence 4, Sequence
Out[10]= 1, 4

MLPutFunction(stdlink, "List", 1);

while( condition )
{
/* generate an element */
MLPutFunction(stdlink, "Sequence", 2);
MLPutInteger32(stdlink, i );
}

MLPutFunction(stdlink, "Sequence", 0);

Return Complex
numbers
// Complex data type
typedef float2 Complex;

Complex* h_convolved_signal;

// Return transformed signal to Mathematica as a Complex List
MLPutFunction(stdlink,"List",n);
for (long i = 0; i < n; i++) {
MLPutFunction(stdlink,"Complex",2);
MLPutFloat(stdlink,h_convolved_signal[i].x*norm);
MLPutFloat(stdlink,h_convolved_signal[i].y*norm);
}

Return Complex
numbers
In[4]:= list Table RandomReal , 12
Out[4]= 0.389421, 0.222396, 0.434636, 0.0886136, 0.233102, 0.941771,
0.928712, 0.764119, 0.791473, 0.381426, 0.757661, 0.44273

In[5]:= Map Function x , Apply Complex, x , Partition list, 2

Out[5]= 0.389421 0.222396 , 0.434636 0.0886136 , 0.233102 0.941771 ,
0.928712 0.764119 , 0.791473 0.381426 , 0.757661 0.44273

MLPutFunction(stdlink, "Map", 2);
MLPutFunction(stdlink, "Function", 2);
MLPutSymbol(stdlink, "x");
MLPutFunction(stdlink, "Apply", 2);
MLPutSymbol(stdlink, "Complex");
MLPutFunction(stdlink, "Partition", 2);
MLPutFunction(stdlink, "Times", 2);
MLPutReal(stdlink, norm);
MLPutReal32List(stdlink, (float*)h_convolved_signal, 2*n);
MLPutInteger(stdlink, 2);

Error & Interrupt
if(! MLPutInteger(stdlink, 10)) if(! MLPutReal64(stdlink, 3.22))
{ {
/* check the possible errors */ /* unable to send 3.22 to lp */
switch(MLError(stdlink)) printf("MathLink Error: %sn",
{ MLErrorMessage(stdlink));
case MLEDEAD: MLClearError(stdlink);
/* the link died unexpectedly */ }
break;
case MLECLOSED:
/* the other side closed the link */
break;
case MLEOK:
/* no error occurred */ while(len--)
break; {
default: sum += *list++;
/* ... */ /* check for the abort */
} if(MLAbort) return (double)0;
} }

Running on remote
computers
$ ./addtwo -linkcreate -linkprotocol TCPIP
Link created on: 63166@192.168.1.107,63167@192.168.1.107

In[5]:= Install LinkConnect "63166 192.168.1.107,63167 192.168.1.107",
LinkProtocol "TCPIP"

Out[5]= LinkObject 63166 192.168.1.107,63167 192.168.1.107, 1110, 8

In[6]:= AddTwo 2, 3
Out[6]= 5

Mathematica + CUDA
#include <cutil_inline.h>

int main(int argc, char **argv)
{
// use command-line specified CUDA device,
// otherwise use device with highest Gflops/s
if(cutCheckCmdLineFlag(argc, (const char**)argv, "device"))
cutilDeviceInit(argc, argv);
else
cudaSetDevice( cutGetMaxGflopsDeviceId() );

}

mathematica_cuda
# Add source files here
EXECUTABLE := cuFourier
# CUDA source files (compiled with cudacc)
CUFILES := cuFourier.cu
# CUDA dependency files
# CU_DEPS :=
# C/C++ source files (compiled with gcc / c++)
# CCFILES :=
# Additional libraries needed by the project
USECUFFT := 1
# MathLink Template files
TMFILES := cuFourier.tm

###################################################
# Rules and targets

include ../../common/common.mk

FindCUDA +
FindMathLink via CMake

• CMake http://www.cmake.org/
• FindCUDA https://gforge.sci.utah.edu/
gf/project/ﬁndcuda/

• FindMathLink http://github.com/kashif/
FindMathLink/tree

CMakeLists.txt
set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)
set(source_files test_bin.cu)
CUDA_COMPILE(CUDA_FILES test_bin.cu)

MathLink_ADD_TM(test.tm)

INCLUDE_DIRECTORIES(
${MathLink_INCLUDE_DIR}
)
LINK_DIRECTORIES(
${MathLink_LIBRARY_DIR}
)

ADD_EXECUTABLE(cuda_compile_example
${CUDA_FILES}
${source_files}
test.tm.c
main.cc
external_dependency.h
)
TARGET_LINK_LIBRARIES(cuda_compile_example
${MathLink_LIBRARIES}
${CUDA_LIBRARIES}
)

double to ﬂoat
conversion
// General check for CUDA GPU SM Capabilities
//inline bool cutilDrvCudaCapabilities(int major_version, int minor_version);

char **heads;
int *dims;
int rank;
float *h_float;
double *h_double;

if (cutilDrvCudaCapabilities( 1,3 ))
{
MLGetReal64Array(stdlink, &h_double, &dims, &heads, &rank);
}
else
{
MLGetReal32Array(stdlink, &h_float, &dims, &heads, &rank);
}

CUBLAS & CUFFT

• Follow the usual routine of sending data
to the MathLink app

• Use CUBLAS or CUFFT
• Return result back to Mathematica

cuFourier
In[1]:= ListLinePlot Abs Fourier RandomReal 1, 200 ^2

0.30

0.25

0.20

Out[1]= 0.15

0.10

0.05

50 100 150 200

Clone mathematica_cuda

$ git clone
git://github.com/kashif/mathematica_cuda.git

$ cd mathematica_cuda/src

$ mkdir cuFourier

$ mate cuFourier

cuFourier.tm

:Begin:
:Function: cuFourier1D
:Pattern: CUFourier1D[ a:{__?NumericQ} ]
:Arguments: { a }
:ArgumentTypes:{ RealList }
:ReturnType: Manual
:End:

cuFourier.cu
// includes system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

// includes cuda
#include <cufft.h>

// includes mathlink

// Complex data type
typedef float2 Complex;

///////////////////////////////////////////////////////////////
// Showing the use of CUFFT for fast convolution using FFT.
///////////////////////////////////////////////////////////////
extern "C" void cuFourier1D(double*, long);

////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////
int main(int argc, char *argv[])
{
// use command-line specified CUDA device, otherwise use device
// with highest Gflops/s
if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") )
cutilDeviceInit(argc, argv);
else
cudaSetDevice( cutGetMaxGflopsDeviceId() );

}

void cuFourier1D (double *h_A, long n)
{
double norm = 1.0/sqrt((double) n);
long mem_size = sizeof(Complex) * n;

// Allocate host memory for the signal
Complex* h_signal = (Complex*)malloc(mem_size);

// Initalize the memory for the signal
for (long i = 0; i < n; ++i) {
h_signal[i].x = (float)h_A[i];
h_signal[i].y = 0.0f;
}

// Allocate device memory for signal
Complex* d_signal;
cutilSafeCall(cudaMalloc((void**)&d_signal, mem_size));
// Copy host memory to device
cutilSafeCall(cudaMemcpy(d_signal, h_signal, mem_size,
cudaMemcpyHostToDevice));

// CUFFT plan
cufftHandle plan;
cufftSafeCall(cufftPlan1d(&plan, n, CUFFT_C2C, 1));

// Transform signal
cufftSafeCall(cufftExecC2C(plan, (cufftComplex *)d_signal,
(cufftComplex *)d_signal,
CUFFT_INVERSE));

// Copy device memory to host
Complex* h_convolved_signal = h_signal;
cutilSafeCall(cudaMemcpy(h_convolved_signal, d_signal,
mem_size, cudaMemcpyDeviceToHost));

// Release d_signal
cutilSafeCall(cudaFree(d_signal));

// Destroy CUFFT context
cufftSafeCall(cufftDestroy(plan));

MLPutFunction(stdlink, "Map", 2);
MLPutFunction(stdlink, "Function", 2);
MLPutFunction(stdlink, "Apply", 2);
MLPutSymbol(stdlink, "Complex");
MLPutFunction(stdlink, "Partition", 2);
MLPutFunction(stdlink, "Times", 2);
MLPutReal(stdlink, norm);
MLPutReal32List(stdlink, (float*)h_convolved_signal, 2*n);
MLPutInteger(stdlink, 2);

// Cleanup memory
free(h_signal);

cudaThreadExit();
}

Makefile
##################################################################
#
# Build script for project
#
##################################################################

# Add source files here
EXECUTABLE := cuFourier
# CUDA source files (compiled with cudacc)
CUFILES := cuFourier.cu
# Additional libraries needed by the project
USECUFFT := 1

# MathLink Template files
TMFILES := cuFourier.tm

##################################################################
# Rules and targets
include ../../common/common.mk

In[35]:= link
Install
" Users kashif Dropbox 20090630_NDVI_CUDA mathematica_cuda bin darwin
release cuFourier"
Out[35]= LinkObject
Users kashif Dropbox 20090630_NDVI_CUDA mathematica_cuda bin darwin
release cuFourier, 605, 9

Out[36]= CUFourier1D a : __ ?NumericQ

In[37]:= ListLinePlot Abs CUFourier1D RandomReal 1, 200 ^2

0.4

0.3

Out[37]=
0.2

0.1

50 100 150 200

In[38]:= Uninstall link
Out[38]= Users kashif Dropbox 20090630_NDVI_CUDA mathematica_cuda bin darwin
release cuFourier

Image Deconvolution
for Life Sciences

• Confocal and Wideﬁeld microscopy
3D or 4D images

• Multichannel (3 or more channels)
• Comes in a wide variety of formats

Bio-Formats Java lib.

• Standalone Java library for reading and
writing life science image formats

• Get both the pixels and metadata
• Licensed under GPL
• http://www.loci.wisc.edu/ome/
formats.html

Java + Mathematica:
J/Link
Needs "JLink`"

InstallJava
LinkObject
' usr local Wolfram Mathematica 7.0 SystemFiles Java Linux x86 64 bin java' classpath
" usr local Wolfram Mathematica 7.0 SystemFiles Links JLink JLink.jar"
Xmx256m Djava.system.class.loader com.wolfram.jlink.JLinkSystemClassLoader
Djava.util.prefs.PreferencesFactory com.wolfram.jlink.DisabledPreferencesFactory
com.wolfram.jlink.Install init " tmp m000001207601", 4, 4

ReinstallJava ClassPath " home kashif Dropbox BioFormats Java loci_tools.jar"
LinkObject
' usr local Wolfram Mathematica 7.0 SystemFiles Java Linux x86 64 bin java' classpath
" usr local Wolfram Mathematica 7.0 SystemFiles Links JLink JLink.jar"
Xmx256m Djava.system.class.loader com.wolfram.jlink.JLinkSystemClassLoader
Djava.util.prefs.PreferencesFactory com.wolfram.jlink.DisabledPreferencesFactory
com.wolfram.jlink.Install init " tmp m000002207601", 8, 4

Reading LIF images
reader JavaNew "loci.formats.ImageReader" LoadJavaClass "loci.formats.FormatTools"

« JavaObject loci.formats.ImageReader » JavaClass loci.formats.FormatTools,

bpp FormatTools`getBytesPerPixel pixelType
reader setId " media cdrom xyz 1ch by2 MT1.lif"
1
reader getSeriesCount
reader getSizeX
7
512
reader setSeries 0
reader getSizeY
sizeC reader getSizeC 512
1
reader getSizeZ
pixelType reader getPixelType 90

1

num reader getImageCount
90

Reading pixel volume
LoadJavaClass "loci.common.DataTools"
JavaClass loci.common.DataTools,

volume
Flatten
N
Table DataTools`makeDataArray
reader openBytes z, 0, 0, reader getSizeX , reader getSizeY , bpp, False, True ,
z, 0, reader getSizeZ 1 ;

unflatten e_, d__ ? IntegerQ && Positive & :
Fold Partition, e, Take d , 1, 2, 1 ; Length e Times d

array unflatten volume, reader getSizeX , reader getSizeY ,
reader getSizeZ ;

View a slice
Image array 165, All, All 255

Image deconvled
Result
165, All, All

Wiener Deconv.
:Begin:
:Function: wienerDeconvolve
:Pattern: WienerDeconvolve[nx_Integer, ny_Integer, nz_Integer,
epsilon_Real, sigma_Real, inImage:{___Real}]
:Arguments: { nx, ny, nz, epsilon, sigma, inImage }
:ArgumentTypes: { Integer, Integer, Integer, Real, Real, Manual }
:ReturnType: Manual
:End:

void wienerDeconvolve(int nx, int ny, int nz, double epsilon, double sigma)
{
float *inImage;
int length;

if(! MLGetReal32List(stdlink, &inImage, &length))
{
return;
}

amira Projection view
®

http://www.amiravis.com

Export " home kashif Amira522 data deconv alphalobe MaxLike.raw",
result, "Real32" ;

Reading Landsat Images
In[4]:= reader JavaNew "loci.formats.ImageReader"
Out[4]= « JavaObject loci.formats.ImageReader »

In[5]:= reader JavaNew "loci.formats.ChannelSeparator", reader
Out[5]= « JavaObject loci.formats.ChannelSeparator »

In[35]:= reader setId " Users sabman satellite_images multispectral bhtmref.tif"

In[7]:= reader getSeriesCount
Out[7]= 1

In[8]:= sizeC reader getSizeC
Out[8]= 6

In[9]:= pixelType reader getPixelType
Out[9]= 1

In[11]:= num reader getImageCount
Out[11]= 6

In[12]:= pixelType reader getPixelType

Loading Landsat data
in Mathematica
In[14]:= LoadJavaClass "loci.formats.FormatTools"
Out[14]= JavaClass loci.formats.FormatTools,

In[15]:= bpp FormatTools`getBytesPerPixel pixelType
Out[15]= 1

In[16]:= reader getSizeX
Out[16]= 512

In[17]:= isLittle reader isLittleEndian
Out[17]= True

In[18]:= reader getSizeY
Out[18]= 512

In[19]:= LoadJavaClass "loci.common.DataTools"
Out[19]= JavaClass loci.common.DataTools,

In[31]:= red DataTools`makeDataArray
reader openBytes 2, 0, 0, reader getSizeX , reader getSizeY , bpp, False, True ;

In[53]:= Image Partition 100 Normalize red , reader getSizeX

In[56]:= NIR DataTools`makeDataArray
reader openBytes 3, 0, 0, reader getSizeX , reader getSizeY , bpp, False, True ;

In[57]:= Image Partition 100 Normalize NIR , reader getSizeX

In[39]:= link Install " Users sabman mathematica_cuda bin darwin emurelease ndvi"
Out[39]= LinkObject Users sabman mathematica_cuda bin darwin emurelease ndvi, 41, 10

Out[40]= ndvi a_List, b_List

In[41]:= NDVI ndvi Partition NIR, reader getSizeX , Partition red, reader getSizeX ;

In[42]:= Image Partition NDVI, reader getSizeX

ndvi.tm

:Begin:
:Function: ndvi
:Pattern: ndvi[ a_List, b_List ]
:Arguments: { a, b }
:ArgumentTypes: { Manual }
:ReturnType: Manual
:End:

ndvi.cu
void ndvi(void)
{
short int *h_A, *h_B;
float *h_C_GPU;
short int *d_A, *d_B;
float *d_C;

char **heads_A, **heads_B;
int *dims_A, *dims_B;
int rank_A, rank_B;

if(! MLGetInteger16Array(stdlink, &h_A, &dims_A, &heads_A, &rank_A))
{
return;
}

if(! MLGetInteger16Array(stdlink, &h_B, &dims_B, &heads_B, &rank_B))
{
return;
}

//Initializing data
h_C_GPU = (float *)malloc(dims_A[0]*dims_A[1]*sizeof(float));

//Allocating GPU memory
cutilSafeCall( cudaMalloc((void **)&d_A, dims_A[0]*dims_A[1]*sizeof(short int)) );
cutilSafeCall( cudaMalloc((void **)&d_B, dims_A[0]*dims_A[1]*sizeof(short int)) );
cutilSafeCall( cudaMalloc((void **)&d_C, dims_A[0]*dims_A[1]*sizeof(float)) );

//Copy data to GPU memory for further processing
cutilSafeCall( cudaMemcpy(d_A, h_A, dims_A[0]*dims_A[1]*sizeof(short int),
cudaMemcpyHostToDevice) );
cutilSafeCall( cudaMemcpy(d_B, h_B, dims_A[0]*dims_A[1]*sizeof(short int),
cudaMemcpyHostToDevice) );

cutilSafeCall( cudaThreadSynchronize() );

dim3 grid(ceil((float)dims_A[0]/(float)16.0f), ceil((float) dims_A[1]/32.0f), 1);
dim3 threads(ceil( dims_A[0]/(float)grid.x), ceil( dims_A[1]/(float)grid.y), 1);

ndviGPU<<<grid, threads>>>(d_C, d_A, d_B, dims_A[0], dims_A[1]);
cutilCheckMsg("ndviGPU() execution failedn");
cutilSafeCall( cudaThreadSynchronize() );

//Release d_A and d_B
cutilSafeCall( cudaFree(d_B) );
cutilSafeCall( cudaFree(d_A) );

//Read back GPU results into h_C_GPU
cutilSafeCall( cudaMemcpy(h_C_GPU, d_C, dims_A[0]*dims_A[1]*sizeof(float),
cudaMemcpyDeviceToHost) );

//Release d_C
cutilSafeCall( cudaFree(d_C) );

//Return result
MLPutReal32List(stdlink, h_C_GPU, dims_A[0]*dims_A[1]);

//Release h_A and h_B
MLReleaseInteger16Array(stdlink, h_A, dims_A, heads_A, rank_A);
MLReleaseInteger16Array(stdlink, h_B, dims_B, heads_B, rank_B);

cudaThreadExit();

NDVI Kernel
///////////////////////////////////////////////////////////////////////////////
// Calculate ndvi of two channels d_A and d_B on GPU and store result in d_C
///////////////////////////////////////////////////////////////////////////////

__global__ void ndviGPU(
float *d_C,
short int *d_A,
short int *d_B,
int width,
int height
){

unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y;

if(xIndex < width && yIndex < height)
{
unsigned int i = yIndex * (width) + xIndex;
d_C[i] = __fdividef( (float)(d_A[i] - d_B[i]), (float)(d_A[i] + d_B[i]) );
}
}

NDVI output

0 1

In[64]:= ArrayPlot Partition NDVI, reader getSizeX , ColorFunction "Rainbow"

Questions?

http://hpc.nomad-labs.com
kashif@nomad-labs.com
twitter krasul

Using Cuda Within Mathematica

Recomendados

Recomendados

Mais conteúdo relacionado

Mais procurados

Mais procurados (20)

Semelhante a Using Cuda Within Mathematica

Semelhante a Using Cuda Within Mathematica (20)

Mais de Shoaib Burq

Mais de Shoaib Burq (11)

Último

Último (20)

Using Cuda Within Mathematica