This post contains the following topics:
You can create either dense n-dimensional arrays or distributed dense n-dimensional arrays using Microsoft codename “Cloud Numerics” lab.
You can create dense arrays with Numerics.Local. For example:
using local = Microsoft.Numerics.Local;var a = local.NumericDenseArrayFactory. CreateFromSystemArray<double>( new double [,] { {-0.5, 1.0}, { 0.5, 1.0} } );
You can create distributed dense arrays with Numerics.Distributed. For example:
using dist = Microsoft.Numerics.Distributed;var c = new dist.NumericDenseArray<double>(a); // Explicit distributed data creation
You can cast from a distributed array to a local array. For example:
var d = c.ToLocalArray(); // Implict distributed data recast
You can also assign local data to distributed data. For example:
var a = local.NumericDenseArrayFactory.CreateFromSystemArray<double>( new double [,] { {-0.5, 1.0}, { 0.5, 1.0} } );dist.NumericDenseArray<double> c = a; // Assignment with backend distributed data
var a = local.NumericDenseArrayFactory.CreateFromSystemArray<double>( new double [,] { {-0.5, 1.0}, { 0.5, 1.0} } );
dist.NumericDenseArray<double> c = a; // Assignment with backend distributed data
The “Cloud Numerics” lab provides an interface you can implement for loading data from a file.
The steps to loading distributed data from a file are:
1. Create a class that returns an object that conforms to the Numerics.Distributed.IO.IParallelReader interface or else use or modify the Distributed.IO.CSVLoader class provided in the Cloud Numerics lab distribution.
2. Use the Distributed.IO.Loader.LoadData() method to load your data into a distributed dense array.
For more details, see the blog post titled Using the IParallelReader Interface.
For more information on Windows Azure Blob storage, navigate to the following Getting Started page http://www.microsoft.com/windowsazure/learn/get-started/
using System;using System.Collections.Generic;using System.Linq;using System.Text;using Microsoft.WindowsAzure.StorageClient;using msnl = Microsoft.Numerics.Local;using msnd = Microsoft.Numerics.Distributed;namespace ExampleWithSerialIO{ class Program { // Sample blobs that hold matrices of random numbers as binary data static string accountName = @"https://cloudnumericslab.blob.core.windows.net/"; // 1000-by-1000 matrix static string blobAddress= @”https://cloudnumericslab.blob.core.windows.net/arraycollection/mediummatrix”; // Method to read blob data and convert it into local NumericDenseArray of doubles public static msnl.NumericDenseArray<double> ReadBlob() { long i,j; // Get reference to blob var blobClient = new CloudBlobClient(accountName); var blob = blobClient.GetBlobReference(blobAddress); // Read number of rows and columns from blob metadata blob.FetchAttributes(); long rows = Convert.ToInt64(blob.Metadata["dimension0"]); long columns = Convert.ToInt64(blob.Metadata["dimension1"]); // Convert blob binary data to local NumericDenseArray var outArray = msnl.NumericDenseArrayFactory.Create<double>(new long[] { rows, columns }); var blobData = blob.DownloadByteArray(); for (i = 0; i < rows; i++) { for (j = 0; j < columns; j++) { outArray[i, j] = BitConverter.ToDouble(blobData, (int)(i * columns + j) * 8); } } return outArray; } static void Main() { // Initialize runtime Microsoft.Numerics.NumericsRuntime.Initialize(); // Read data and implicitly cast to distributed array msnd.NumericDenseArray<double> data = ReadBlob(); // Compute mean of dataset var mean = Microsoft.Numerics.Statistics.Descriptive.Mean(data); // Write result. When running on Windows Azure cluster, // the output is available in job output Console.WriteLine("Mean of data: {0}", mean); // Shut down runtime Microsoft.Numerics.NumericsRuntime.Shutdown(); } }}
using System;using System.Linq;using msnl = Microsoft.Numerics.Local;using msnd = Microsoft.Numerics.Distributed;using Microsoft.Numerics;using Microsoft.WindowsAzure;using Microsoft.WindowsAzure.StorageClient;// A example method for reading an array from blob storage// Each blob contains a piece of 2-D arraynamespace AzureArrayReader{ [Serializable()] public class AzureArrayReader : msnd.IO.IParallelReader<double> { private string accountName; private string containerName; public AzureArrayReader(string accountName,string containerName) { this.accountName = accountName; this.containerName = containerName; } // Assign blobs to MPI ranks public object[] ComputeAssignment(int nranks) { Object[] blobs = new Object[nranks]; var blobClient = new CloudBlobClient(accountName); var matrixContainer = blobClient.GetContainerReference(containerName); var blobCount = matrixContainer.ListBlobs().Count(); int maxBlobsPerRank = (int)Math.Ceiling((double)blobCount / (double)nranks); int currentBlob = 0; for (int i = 0; i < nranks; i++) { int step = Math.Max(0, Math.Min(maxBlobsPerRank, blobCount - currentBlob) ); blobs[i] = new int[] { currentBlob, step }; currentBlob = currentBlob + step; } return (object[])blobs; } // Assume pieces are concatenated along column dimension public int DistributedDimension { get { return 1; } set { } } // Read data from blobs public msnl.NumericDenseArray<double> ReadWorker(Object assignment) { var blobClient = new CloudBlobClient(accountName); var matrixContainer = blobClient.GetContainerReference(containerName); int[] blobs = (int[])assignment; long i, j, k; msnl.NumericDenseArray<double> outArray; var firstBlob = matrixContainer.GetBlockBlobReference("slab0"); firstBlob.FetchAttributes(); long rows = Convert.ToInt64(firstBlob.Metadata["dimension0"]); long[] columnsPerSlab = new long[blobs[1]]; if (blobs[1] > 0) { // Get blob metadata, validate that each piece has equal number of rows for (i = 0; i < blobs[1]; i++) { var matrixBlob = matrixContainer.GetBlockBlobReference( "slab" + (blobs[0] + i).ToString()); matrixBlob.FetchAttributes(); if (Convert.ToInt64(matrixBlob.Metadata["dimension0"]) != rows) { throw new System.IO.InvalidDataException("Invalid slab shape"); } columnsPerSlab[i] = Convert.ToInt64(matrixBlob.Metadata["dimension1"]); } // Construct output array outArray = msnl.NumericDenseArrayFactory.Create<double>( new long[] { rows, columnsPerSlab.Sum() } ); // Read data long columnCounter = 0; for (i = 0; i < blobs[1]; i++) { var matrixBlob = matrixContainer.GetBlobReference("slab" + (blobs[0] + i).ToString()); var blobData = matrixBlob.DownloadByteArray(); for (j = 0; j < columnsPerSlab[i]; j++) { for (k = 0; k < rows; k++) { outArray[k, columnCounter] = BitConverter.ToDouble(blobData, (int)(j * rows + k) * 8); } columnCounter = columnCounter + 1; } } } else { // If a rank was assigned zero blobs, return empty array outArray = msnl.NumericDenseArrayFactory.Create<double>( new long[] {rows, 0 }); } return outArray; } }}
This section provides the following examples of how to use the C# LINQ extensions to access array data.
using System;using System.Collections.Generic;using System.Linq;using System.Text;using Microsoft.Numerics;using Microsoft.Numerics.Local;namespace HowToRecipes{ class LINQtoNDAExtractExample { public static void Run() { // Create Numeric Dense Array var numbers = NumericDenseArrayFactory.CreateFromSystemArray<int>( new int[] { 1, 2, 3, 4, 5, 6 }); // Set indexes of start and end of the part to be extracted int idxStart = 1; int idxEnd = 4; Console.WriteLine("All numbers: {0}", numbers); Console.WriteLine("Start index: {0}, End index {1}", idxStart, idxEnd); // Extract NumericDenseArray<int> outArray = NumericDenseArrayFactory.CreateFromSystemArray<int>( numbers .Where((x, i) => (i >= idxStart && i <= idxEnd)) .ToArray()); Console.WriteLine("Extracted array: {0}", outArray); } }}
using System;using System.Linq;using System.Collections;using System.Collections.Generic;using Microsoft.Numerics;using Microsoft.Numerics.Local;namespace HowToRecipes{ class LINQtoNDATrimNaNsExample { public static void Run() { // Create Numeric dense array with NaNs var sampleNan = NumericDenseArrayFactory.CreateFromSystemArray<double>( new double[] { double.NaN, 1.0, 2.0, 3.0, double.NaN, 4.0, 5.0, 6.0 } ); Console.WriteLine("Array with NaNs: {0}", sampleNan); // Trim NaN var cleanedNDA = NumericDenseArrayFactory.CreateFromSystemArray<double>( sampleNan .Where(x => (!double.IsNaN(x))) .ToArray()); Console.WriteLine("Trimmed array: {0}", cleanedNDA); } }}