Differences

This shows you the differences between two versions of the page.

--- en:site:recherche:logiciels:sparqlwithspark:datasetwatdiv [15/09/2016 10:06] – [Load VP's] hubert
+++ en:site:recherche:logiciels:sparqlwithspark:datasetwatdiv [16/09/2016 23:01] (current) – [Load VP's] hubert
@@ Line 1: / Line 1: @@
+{{indexmenu_n>1}}
 ====== Loading WatDiv Dataset ======
-===== Load and encode data =====
+===== Data preparation: encode raw data =====
-<code>
+<code scala>
 import org.apache.spark.sql.DataFrame
@@ Line 86: / Line 88: @@
 Create one dataset per property.
-<code>
+<code scala>
 /*
 val df = num.
@@ Line 117: / Line 119: @@
 ===== Load VP's =====
-<code>
+<code scala>
 // S2RDF VP
@@ Line 128: / Line 130: @@
 val dir = "/user/hubert/watdiv"
-// 1 billion triple
+// 1 billion triples
 val scale = "1G"
@@ Line 145: / Line 147: @@
 //val dictSO = sqlContext.read.parquet(dictSOFile).repartition(NB_FRAGMENTS, col("so"))
 dictSO.persist().count
-//dictSO.unpersist()
 // VP Dataset
 // -------
-//val encodedFile = dir + "/frame" + scale
 val vpDir = dir + "/vp" + scale
-// CHRONO
+// TIMER
 def queryTimeDFIter(q: DataFrame, nbIter: Int): Unit = {
   var l = new scala.collection.mutable.ArrayBuffer[Double](nbIter)
@@ Line 170: / Line 170: @@
-// define VPs to be loaded
+// Define the VPs to be loaded
 //-------------------------
 val nbP = dictP.count.toInt