Past Meetup

Deep Learning for MLlib with Sparkling Water

This Meetup is past

186 people went

Location image of event venue

Details

Please join us for pizza at 6:30 and the presentation will start at 7. Looking forward to seeing you there!

http://0xdata.com/blog/2014/09/Sparkling-Water/

http://databricks.com/blog/2014/06/30/sparkling-water-h20-spark.html

Best of breed opensource comes to machine learning users through Sparkling Water.

Get Deep Learning for Spark & integrate your big data machine learning applications with Machine Learning from MLlib and H2O.

// Test if we can correctly learn A, B where Y = logistic(A + B*X)

test("deep learning log regression") {

val nPoints = 10000

val A = 2.0

val B = -1.5

// Generate testing data

val trainData = DeepLearningSuite.generateLogisticInput(A, B, nPoints, 42)

// Create RDD from testing data

val trainRDD = sc.parallelize(trainData, 2)

trainRDD.cache()

import H2OContext._

// Create H2O data frame

val trainH2ORDD = toDataFrame(sc, trainRDD)

// Launch Deep Learning:

// - configure parameters

val dlParams = new DeepLearningParameters()

dlParams.source = trainH2ORDD

dlParams.response = trainH2ORDD.lastVec()

dlParams.classification = true

// - create a model builder

val dl = new DeepLearning(dlParams)

val dlModel = dl.train() .get()

val validationData = DeepLearningSuite.generateLogisticInput(A, B, nPoints, 17)

val validationRDD = sc.parallelize(validationData, 2)

val validationH2ORDD = toDataFrame(sc, validationRDD)

// Score validation data

val predictionH2OFrame = new DataFrame(dlModel.score(validationH2ORDD))('predict) // Missing implicit conversion

val predictionRDD = toRDD[DoubleHolder](sc, predictionH2OFrame)

// Validate prediction

validatePrediction( predictionRDD.collect().map (_.predict.getOrElse(Double.NaN)), validationData)

}