site:recherche:logiciels:rdfdist
Différences
Ci-dessous, les différences entre deux révisions de la page.
| Les deux révisions précédentesRévision précédenteProchaine révision | Révision précédente | ||
| site:recherche:logiciels:rdfdist [11/05/2015 12:29] – [RDFdist] amine | site:recherche:logiciels:rdfdist [26/04/2017 13:06] (Version actuelle) – amann | ||
|---|---|---|---|
| Ligne 1: | Ligne 1: | ||
| - | ===== RDFdist ===== | + | {{indexmenu_n> |
| + | |||
| + | ===== RDFdist | ||
| This wiki page provides information about the experiments RDF distribution approaches using [[http:// | This wiki page provides information about the experiments RDF distribution approaches using [[http:// | ||
| - | This information consists of i) the source code for both data preparation and query evaluation, | + | This information consists of i) the query workload ii) the source code for both data preparation and query evaluation, and iii) the description of two datasets used in the experiments. |
| - | For the sake of reproducibility, | + | For the sake of reproducibility, |
| - | =====Data preparation and Query evaluation Source Code===== | + | |
| - | ====Hash-based approaches==== | + | =====Query workload==== |
| + | We picked three queries from the [[http:// | ||
| + | which are referred to as Query 2, Query 3 and Query 4, respectively. | ||
| + | We have created an addirional one referred to as Query 1. | ||
| + | We also created two queries for the [[https:// | ||
| + | ===Query 1 (synthetic, LUBM)=== | ||
| + | < | ||
| + | SELECT ?x ?y ?z | ||
| + | WHERE | ||
| + | {?x lubm: | ||
| + | ?y lubm: | ||
| + | ?z lubm: | ||
| + | } | ||
| + | </ | ||
| - | < | ||
| + | |||
| + | |||
| + | ===Query 2 (#2 of LUBM)=== | ||
| + | < | ||
| + | PREFIX rdf: < | ||
| + | PREFIX ub: < | ||
| + | SELECT ?X, ?Y, ?Z | ||
| + | WHERE | ||
| + | {?X rdf:type ub: | ||
| + | ?Y rdf:type ub: | ||
| + | ?Z rdf:type ub: | ||
| + | ?X ub:memberOf ?Z . | ||
| + | ?Z ub: | ||
| + | ?X ub: | ||
| + | </ | ||
| + | |||
| + | ===Query 3 (#9 of LUBM)=== | ||
| + | < | ||
| + | PREFIX rdf: < | ||
| + | PREFIX ub: < | ||
| + | SELECT ?X, ?Y, ?Z | ||
| + | WHERE | ||
| + | {?X rdf:type ub:Student . | ||
| + | ?Y rdf:type ub:Faculty . | ||
| + | ?Z rdf:type ub:Course . | ||
| + | ?X ub:advisor ?Y . | ||
| + | ?Y ub: | ||
| + | ?X ub: | ||
| + | </ | ||
| + | |||
| + | ===Query 4 (#12 of LUBM)=== | ||
| + | < | ||
| + | PREFIX rdf: < | ||
| + | PREFIX ub: < | ||
| + | SELECT ?X, ?Y | ||
| + | WHERE | ||
| + | {?X rdf:type ub:Chair . | ||
| + | ?Y rdf:type ub: | ||
| + | ?X ub:worksFor ?Y . | ||
| + | ?Y ub: | ||
| + | |||
| + | </ | ||
| + | |||
| + | ===Query 5 (synthetic, Wikidata)=== | ||
| + | < | ||
| + | SELECT ?x ?y ?z | ||
| + | WHERE | ||
| + | {?x entity: | ||
| + | ?y entity: | ||
| + | ?z entity: | ||
| + | } | ||
| + | </ | ||
| + | |||
| + | ===Query 6 (synthetic, Wikidata) === | ||
| + | < | ||
| + | SELECT ?x ?y ?z | ||
| + | {?x entity:P39v ?y. | ||
| + | ?x entity: | ||
| + | ?x rdf:type ?w | ||
| + | } | ||
| + | </ | ||
| + | |||
| + | =====Source Code===== | ||
| + | |||
| + | |||
| + | |||
| + | ====Hash-based approaches==== | ||
| + | The following code corresponds to the triple hashing and subject hashing approaches for the LUBM datasets. | ||
| + | It consists of a data preparation part followd by a query evaluation part. | ||
| Ligne 28: | Ligne 110: | ||
| val t1 = java.lang.System.currentTimeMillis(); | val t1 = java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | |||
| + | /** | ||
| + | * set inputData with the path to the data encoded as quadruples (see Datasets excerpts) | ||
| + | */ | ||
| // loading and transformating the dataset | // loading and transformating the dataset | ||
| Ligne 36: | Ligne 123: | ||
| println(" | println(" | ||
| - | // Partitioning the dataset | + | // Partitioning the dataset |
| - | val triples = triples0.map{case(f, | + | /** |
| + | * Uncomment one of the following lines depending on whether hashing is applied | ||
| + | on the entire triple or only the subject | ||
| + | */ | ||
| + | // val triples = triples0.partitionBy(new HashPartitioner(part)) | ||
| + | // val triples = triples0.map{case(f, | ||
| // | // | ||
| Ligne 63: | Ligne 156: | ||
| val takco : Long = 1115684864 | val takco : Long = 1115684864 | ||
| - | def ajout(a : ListBuffer[(Long, | ||
| - | a += e | ||
| - | return a | ||
| - | } | ||
| // ----------------------------------------------------------- | // ----------------------------------------------------------- | ||
| Ligne 144: | Ligne 233: | ||
| println(" | println(" | ||
| + | |||
| + | // ----------------------------------------------------------- | ||
| + | // highly selective on wikidata :: 8 results | ||
| + | // QR5: ?x < | ||
| + | ?y < | ||
| + | ?z < | ||
| + | // ----------------------------------------------------------- | ||
| + | val qr5 = triples.filter({case(s, | ||
| + | join(triples.filter({case(s, | ||
| + | map({case(k, | ||
| + | join(triples.filter({case(s, | ||
| + | qr5.persist | ||
| + | qr5.collect | ||
| + | |||
| + | // mid selectivity start shaped query : 10418 results | ||
| + | // QR6: ?x < | ||
| + | |||
| + | val qr6 = triples.filter({case(s, | ||
| + | join(triples.filter({case(s, | ||
| + | join(triples.filter({case(s, | ||
| + | |||
| + | qr6.persist | ||
| + | qr6.collect | ||
| + | |||
| </ | </ | ||
| - | ==Hashing applied on the subject == | ||
| - | ====Partitioning-based approaches==== | ||
| - | ==nHop== | + | ====Graph partitioning-based approaches==== |
| - | ==WARP== | + | ===Huang Approach |
| + | < | ||
| + | import org.apache.spark.HashPartitioner | ||
| + | import scala.collection.mutable.ListBuffer | ||
| - | ==Hybrid== | + | val folder= " |
| + | val dataset= " | ||
| + | val scale=" | ||
| + | val part=20 //10, 20 | ||
| + | val folderName = folder +scale | ||
| + | val fileName = dataset+scale+" | ||
| + | |||
| + | val t1 = java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | val quads_I_SPO = sc.textFile(s"/ | ||
| + | |||
| + | val quadsDist = quads_I_SPO.partitionBy(new HashPartitioner(part)).persist | ||
| + | |||
| + | |||
| + | |||
| + | val t2 = java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | print(" | ||
| + | |||
| + | val advisor : Long = 1233125376 | ||
| + | val worksFor : Long = 1136656384 | ||
| + | val suborg : Long = 1224736768 | ||
| + | val memof : Long = 113246208 | ||
| + | val undeg : Long = 1101004800 | ||
| + | val teaof : Long = 1199570944 | ||
| + | val takco : Long = 1115684864 | ||
| + | |||
| + | |||
| + | // ----------------------------------------------------------- | ||
| + | // Query 1 : (not part of the benchmark) | ||
| + | // Pattern: (x advisor y) (y worksFor z) (z subOrganisation t) | ||
| + | // ----------------------------------------------------------- | ||
| + | |||
| + | var t1 = java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | var pataws = quadsDist.filter({case(i, | ||
| + | join(quadsDist.filter({case(i, | ||
| + | map({case (y,(x,z)) => (z, | ||
| + | join(quadsDist.filter({case(i, | ||
| + | map({case (z, | ||
| + | |||
| + | pataws.count | ||
| + | |||
| + | var pataws2 = pataws.flatMap(x=> | ||
| + | |||
| + | var t2= java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | println(" | ||
| + | |||
| + | |||
| + | // ----------------------------------------------------------- | ||
| + | // LUBM 2 : MSU | ||
| + | // Pattern: (x memberOf y) (y subOrg z) (x UndergraduateDegreeFrom z) | ||
| + | // ----------------------------------------------------------- | ||
| + | |||
| + | var t1 = java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | //var pmemof = quadsDist.filter({case(i, | ||
| + | |||
| + | var patmsu = quadsDist.filter({case(i, | ||
| + | | ||
| + | | ||
| + | | ||
| + | |||
| + | patmsu.count | ||
| + | |||
| + | var patmsu2 = patmsu.flatMap(identity).distinct | ||
| + | |||
| + | var t2= java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | println(" | ||
| + | |||
| + | // ----------------------------------------------------------- | ||
| + | // LUBM 9 : ATT | ||
| + | // Pattern: (x advisor y) (y teacherOf z) (x takesCourse z) | ||
| + | // ----------------------------------------------------------- | ||
| + | |||
| + | var t1 = java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | var patatt = quadsDist.filter({case(i, | ||
| + | join(quadsDist.filter({case(i, | ||
| + | map({case (y,(x,z)) => (x+"" | ||
| + | join(quadsDist.filter({case(i, | ||
| + | |||
| + | patatt.distinct.count | ||
| + | |||
| + | var t2= java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | println(" | ||
| + | |||
| + | |||
| + | // ----------------------------------------------------------- | ||
| + | // LUBM 12 : WS | ||
| + | // Pattern: (y worksFor z) (z subOrganisation t) | ||
| + | // ----------------------------------------------------------- | ||
| + | |||
| + | val t1 = java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | val patws = quadsDist.filter({case(i, | ||
| + | |||
| + | val ans_patws = patws.distinct.count | ||
| + | |||
| + | val t2= java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | println(" | ||
| + | </ | ||
| + | |||
| + | ===Warp=== | ||
| + | < | ||
| + | // Spark implementation of WARP replication | ||
| + | // usage: run this code into the spark-shell | ||
| + | |||
| + | import scala.collection.mutable.ListBuffer | ||
| + | import org.apache.spark.rdd.RDD | ||
| + | import scala.reflect.ClassTag | ||
| + | import org.apache.spark.SparkContext | ||
| + | import org.apache.spark.SparkContext._ | ||
| + | |||
| + | import org.apache.spark.HashPartitioner | ||
| + | import org.apache.spark.Partitioner | ||
| + | |||
| + | import java.io.Serializable | ||
| + | |||
| + | |||
| + | val folder= " | ||
| + | val dataset= " | ||
| + | val scale=" | ||
| + | |||
| + | /* We have 15 cores per machine. | ||
| + | Each core is accessing a separate part in parallel. | ||
| + | The default parallelism = #machines * 15 cores | ||
| + | < | ||
| + | */ | ||
| + | val machine = sc.defaultParallelism /15 | ||
| + | val part = sc.defaultParallelism | ||
| + | val folderName = folder + scale | ||
| + | val fileName = dataset + scale | ||
| + | /** | ||
| + | * set inputData with the path to the data encoded as quadruples (see Datasets excerpts) | ||
| + | */ | ||
| + | val inputData = s"/ | ||
| + | |||
| + | // Initial state, delete the storage | ||
| + | sc.getPersistentRDDs.values.foreach(x => x.unpersist()) | ||
| + | sc.getPersistentRDDs.values.size | ||
| + | |||
| + | var t1 = java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | def affiche(s: String): Unit={ | ||
| + | println("#### | ||
| + | println(s) | ||
| + | println("#### | ||
| + | } | ||
| + | |||
| + | |||
| + | /* | ||
| + | ----------------------- | ||
| + | STEP 1: read triples | ||
| + | ----------------------- | ||
| + | */ | ||
| + | |||
| + | /** | ||
| + | Function | ||
| + | returns a quad (subject, property, object, partID) | ||
| + | with partID = hash(subject) | ||
| + | **/ | ||
| + | def lireTriples(input: | ||
| + | | ||
| + | map(line => line.substring(1, | ||
| + | map(tab => (tab(0).toLong, | ||
| + | } | ||
| + | |||
| + | val triples = lireTriples(inputData) | ||
| + | triples.setName(" | ||
| + | triples.persist | ||
| + | |||
| + | //nombre total de nuplets | ||
| + | // | ||
| + | |||
| + | // stat: nb triplets par partition | ||
| + | // | ||
| + | |||
| + | |||
| + | |||
| + | /* | ||
| + | --------------------------------------------------------------------------- | ||
| + | STEP 2: COMPUTE REPLICATION TRIPLES | ||
| + | |||
| + | Computes the triple to replicate in every partition, for each query | ||
| + | --------------------------------------------------------------------------- | ||
| + | */ | ||
| + | |||
| + | /* | ||
| + | | ||
| + | */ | ||
| + | def filterProp(input: | ||
| + | x // on projette sur S et O car P est fixe | ||
| + | return input.filter(x => x._2 == prop).map{ case (s,p,o,f) => (s,o,f)} | ||
| + | } | ||
| + | |||
| + | def getOutsideTriple(seedF: | ||
| + | var a = t.toArray | ||
| + | var res: ListBuffer[(Long, | ||
| + | |||
| + | for(i <-0 to (a.length - 1)) { | ||
| + | if( a(i)._3 != seedF) { | ||
| + | res.append( (a(i)._1, | ||
| + | } | ||
| + | } | ||
| + | return res | ||
| + | } | ||
| + | |||
| + | def getReplicaForQuery(query: | ||
| + | var replica = sc.parallelize(List((0L, | ||
| + | var min_nb = -1L | ||
| + | |||
| + | for (i <- 0 to (nbCandidateSeed-1)) { | ||
| + | var t1 = query.map(x => (x(i)._3, x)) | ||
| + | // | ||
| + | |||
| + | // lister les triplets a repliquer = ceux qui ne sont pas dans la seed partition | ||
| + | var t2 = t1.flatMap{ case(seedF, tripleList) => getOutsideTriple(seedF, | ||
| + | |||
| + | // count the triples to replicate | ||
| + | var nb = t2.count | ||
| + | if(min_nb == -1 || nb < min_nb){ | ||
| + | min_nb=nb | ||
| + | replica=t2 | ||
| + | println(" | ||
| + | println(s" | ||
| + | } | ||
| + | else { | ||
| + | println(s" | ||
| + | } | ||
| + | } | ||
| + | return replica | ||
| + | } | ||
| + | |||
| + | // ------------------------------------------------------- | ||
| + | // Compute replication for the folowing QUERIES | ||
| + | // ------------------------------------------------------- | ||
| + | |||
| + | // based on dictionnary encoding | ||
| + | val advisor : Long = 1233125376 | ||
| + | val worksFor : Long = 1136656384 | ||
| + | val suborg : Long = 1224736768 | ||
| + | val memof : Long = 1132462080 | ||
| + | val undeg : Long = 1101004800 | ||
| + | val teaof : Long = 1199570944 | ||
| + | val takco : Long = 1115684864 | ||
| + | |||
| + | // ----------------------------------------------------------- | ||
| + | // Query 1 : (not part of the benchmark) | ||
| + | // Pattern: (x advisor y) (y worksFor z) (z subOrganisation t) | ||
| + | // ----------------------------------------------------------- | ||
| + | val query1 = filterProp(triples, | ||
| + | join(filterProp(triples, | ||
| + | map{ case ( y, ((x,f1), (z, f2))) => (z, (x, f1, y, f2)) }. | ||
| + | join(filterProp(triples, | ||
| + | map{ case ( z, ( (x, | ||
| + | |||
| + | query1.setName(" | ||
| + | // | ||
| + | // | ||
| + | // resultat: 4108791 | ||
| + | |||
| + | var t1 = java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | val replica1 = getReplicaForQuery(query1, | ||
| + | |||
| + | var t2= java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | println(" | ||
| + | |||
| + | // stat: nb triples a repliquer dans chaque partition | ||
| + | // | ||
| + | // (0,15551) (1,15776) (2,17639) (3,201275) (4,46068) | ||
| + | |||
| + | |||
| + | // ----------------------------------------------------------- | ||
| + | // LUBM 2 : MSU | ||
| + | // Pattern: (x memberOf y) (y subOrg z) (x UndergraduateDegreeFrom z) | ||
| + | // ----------------------------------------------------------- | ||
| + | val query2 = filterProp(triples, | ||
| + | join(filterProp(triples, | ||
| + | map{ case ( y, ((x,f1), (z,f2) )) => ((x, z), (y, f1, f2)) }. | ||
| + | join(filterProp(triples, | ||
| + | map{ case ((x, z), ((y, f1, f2), f3)) => ListBuffer((x, | ||
| + | |||
| + | query2.setName(" | ||
| + | // | ||
| + | // | ||
| + | |||
| + | val replica2 = getReplicaForQuery(query2, | ||
| + | |||
| + | //stat: nb triples a repliquer dans chaque partition | ||
| + | // | ||
| + | //(0,1) (1,3) (3,4) (4,896) | ||
| + | |||
| + | // ----------------------------------------------------------- | ||
| + | // LUBM 9 : ATT | ||
| + | // Pattern: (x advisor y) (y teacherOf z) (x takesCourse z) | ||
| + | // ----------------------------------------------------------- | ||
| + | val query9 = filterProp(triples, | ||
| + | join(filterProp(triples, | ||
| + | map{ case ( y, ((x, f1), (z, f2) )) => ((x, z), (y, f1, f2)) }. | ||
| + | join(filterProp(triples, | ||
| + | map{ case ((x, z), ((y, f1, f2), f3)) => ListBuffer((x, | ||
| + | |||
| + | query9.setName(" | ||
| + | // | ||
| + | query9.count | ||
| + | // 272982 | ||
| + | |||
| + | val replica9 = getReplicaForQuery(query9, | ||
| + | |||
| + | // stat : nb triples à repliquer dans chaque partition | ||
| + | // | ||
| + | //(0,3131) (1,3213) (2,3504) (3,44706) (4,2014) | ||
| + | |||
| + | |||
| + | // ----------------------------------------------------------- | ||
| + | // LUBM 12 : WS | ||
| + | // Pattern: (y worksFor z) (z subOrganisation t) | ||
| + | // ----------------------------------------------------------- | ||
| + | val query12 = filterProp(triples, | ||
| + | join(filterProp(triples, | ||
| + | map{ case ( z, ((y, f1), (t, f2) )) => ListBuffer((y, | ||
| + | |||
| + | query12.setName(" | ||
| + | // | ||
| + | query12.count | ||
| + | //720628 | ||
| + | |||
| + | val replica12 = getReplicaForQuery(query12, | ||
| + | |||
| + | //stat : nb triples à repliquer dans chaque partition | ||
| + | // | ||
| + | |||
| + | |||
| + | /* | ||
| + | --------------------------------------------------------------------------- | ||
| + | STEP 3: UNION DES REPLICAS CALCULES POUR CHAQUE REQUETE | ||
| + | --------------------------------------------------------------------------- | ||
| + | */ | ||
| + | |||
| + | val allreplica = replica1.union(replica2).union(replica9).union(replica12).distinct | ||
| + | |||
| + | val nbAjout = allreplica.count | ||
| + | // 357 161 pour 5 part | ||
| + | // 734 295 pour 10 part | ||
| + | // 779 983 pour 20 part | ||
| + | |||
| + | // replication rate | ||
| + | affiche(" | ||
| + | affiche(" | ||
| + | |||
| + | var t2= java.lang.System.currentTimeMillis(); | ||
| + | affiche(" | ||
| + | |||
| + | val oDist = triples.map{ case(s, | ||
| + | val oMean = oDist.map{case(f, | ||
| + | val odevs = oDist.map{case(f, | ||
| + | val ostddev = Math.sqrt(odevs.sum / machine) | ||
| + | |||
| + | val nDist = oDist.join(allreplica.map{ case(s, | ||
| + | val nMean = nDist.sum / machine | ||
| + | val ndevs = nDist.map(score => (score - nMean) * (score - nMean)) | ||
| + | val nstddev = Math.sqrt(ndevs.sum / machine) | ||
| + | |||
| + | |||
| + | |||
| + | /* | ||
| + | --------------------------------------------------------------------------- | ||
| + | STEP 4: LOCAL QUERY PROCESSING | ||
| + | On the WARP replicated data | ||
| + | --------------------------------------------------------------------------- | ||
| + | */ | ||
| + | |||
| + | // Extends each partition with the WARP replicated triples. | ||
| + | // Each machine stores one partition | ||
| + | val triplesWARP = triples.union(allreplica). | ||
| + | map{ case (s,p,o,f) => (f, (s,p,o))}. | ||
| + | partitionBy(new HashPartitioner(machine)).map{ case (f, (s,p,o)) => (s,p,o,f)} | ||
| + | |||
| + | triplesWARP.setName(" | ||
| + | triplesWARP.persist() | ||
| + | triplesWARP.count | ||
| + | |||
| + | |||
| + | var t2= java.lang.System.currentTimeMillis(); | ||
| + | affiche(" | ||
| + | |||
| + | |||
| + | // ------------------- | ||
| + | // Q1 LOCAL | ||
| + | // ------------------- | ||
| + | t1= java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | // Query1 locale avec projection sur les variables (x,y,z,t) | ||
| + | val localQuery1 = filterProp(triplesWARP, | ||
| + | join(filterProp(triplesWARP, | ||
| + | map{ case ( (y, f), (x, z)) => ((z, f), (x, y)) }. | ||
| + | join(filterProp(triplesWARP, | ||
| + | map{ case ( (z, f), ( (x,y), t)) => (x, y, z, t)} | ||
| + | |||
| + | localQuery1.count | ||
| + | // | ||
| + | |||
| + | t2= java.lang.System.currentTimeMillis(); | ||
| + | affiche(" | ||
| + | |||
| + | // ----------------------------------------------------------- | ||
| + | // Q2 LOCAL: | ||
| + | // Pattern: (x memberOf y) (y subOrg z) (x UndergraduateDegreeFrom z) | ||
| + | // ----------------------------------------------------------- | ||
| + | t1= java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | val localQuery2 = filterProp(triplesWARP, | ||
| + | join(filterProp(triplesWARP, | ||
| + | map{ case ( (y, f), (x, z)) => ((x, z, f), y) }. | ||
| + | join(filterProp(triplesWARP, | ||
| + | map{ case ((x, z, f), (y, 1)) => (x, y, z) } | ||
| + | |||
| + | localQuery2.count | ||
| + | //2528 | ||
| + | t2= java.lang.System.currentTimeMillis(); | ||
| + | affiche(" | ||
| + | |||
| + | |||
| + | // ----------------------------------------------------------- | ||
| + | // Q9 LOCAL | ||
| + | // Pattern: (x advisor y) (y teacherOf z) (x takesCourse z) | ||
| + | // ----------------------------------------------------------- | ||
| + | t1= java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | val localQuery9 = filterProp(triplesWARP, | ||
| + | join(filterProp(triplesWARP, | ||
| + | map{ case ( (y, f), (x, z)) => ((x, z, f), y) }. | ||
| + | join(filterProp(triplesWARP, | ||
| + | map{ case ((x, z, f), (y, 1)) => (x, y, z) } | ||
| + | |||
| + | localQuery9.count | ||
| + | //272982 | ||
| + | |||
| + | t2= java.lang.System.currentTimeMillis(); | ||
| + | affiche(" | ||
| + | |||
| + | |||
| + | // ----------------------------------------------------------- | ||
| + | // Q12 LOCAL | ||
| + | // Pattern: (y worksFor z) (z subOrganisation t) | ||
| + | // ----------------------------------------------------------- | ||
| + | t1= java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | val localQuery12 = filterProp(triplesWARP, | ||
| + | join(filterProp(triplesWARP, | ||
| + | map{ case ( (z, f), (y, t)) => (y, z, t) } | ||
| + | |||
| + | localQuery12.count | ||
| + | //938356 | ||
| + | |||
| + | t2= java.lang.System.currentTimeMillis(); | ||
| + | affiche(" | ||
| + | |||
| + | |||
| + | </ | ||
| + | ===2-hop based approach=== | ||
| + | < | ||
| + | val folder= | ||
| + | val dataset= " | ||
| + | val scale=" | ||
| + | |||
| + | val folderName = folder +scale | ||
| + | val part = Array(5, | ||
| + | |||
| + | for (p <- part) | ||
| + | { | ||
| + | val fileName = dataset+scale+" | ||
| + | val fileNamewatdiv2k_encoded_unique_quads.partNew.5 | ||
| + | val t1 = java.lang.System.currentTimeMillis(); | ||
| + | |||
| + | val quads = sc.textFile(s"/ | ||
| + | |||
| + | var addOneHop = quads.map({case(s, | ||
| + | |||
| + | val newQuads = quads.union(addOneHop).distinct | ||
| + | val newQuadsSize = newQuads.count | ||
| + | |||
| + | val t2 = java.lang.System.currentTimeMillis(); | ||
| + | val hopSize = addOneHop.count | ||
| + | println(s" | ||
| + | println(s" | ||
| + | newQuads.saveAsTextFile(s"/ | ||
| + | } | ||
| + | </ | ||
| ====Datasets excerpts==== | ====Datasets excerpts==== | ||
| + | ===Encoding of LUBM concepts and properties=== | ||
| + | |||
| + | < | ||
| + | Properties: | ||
| + | 0 < | ||
| + | 603979776 < | ||
| + | 671088640 < | ||
| + | 738197504 < | ||
| + | 805306368 < | ||
| + | 872415232 < | ||
| + | 939524096 < | ||
| + | 1006632960 < | ||
| + | 1082130432 < | ||
| + | 1090519040 < | ||
| + | 1098907648 < | ||
| + | 1101004800 < | ||
| + | 1103101952 < | ||
| + | 1105199104 < | ||
| + | 1107296256 < | ||
| + | 1115684864 < | ||
| + | 1124073472 < | ||
| + | 1132462080 < | ||
| + | 1136656384 < | ||
| + | 1138753536 < | ||
| + | 1140850688 < | ||
| + | 1149239296 < | ||
| + | 1157627904 < | ||
| + | 1166016512 < | ||
| + | 1174405120 < | ||
| + | 1182793728 < | ||
| + | 1191182336 < | ||
| + | 1199570944 < | ||
| + | 1207959552 < | ||
| + | 1216348160 < | ||
| + | 1224736768 < | ||
| + | 1233125376 < | ||
| + | 1241513984 < | ||
| + | |||
| + | Concepts: | ||
| + | 0 < | ||
| + | 268435456 < | ||
| + | 301989888 < | ||
| + | 335544320 < | ||
| + | 369098752 < | ||
| + | 402653184 < | ||
| + | 436207616 < | ||
| + | 469762048 < | ||
| + | 536870912 < | ||
| + | 570425344 < | ||
| + | 603979776 < | ||
| + | 637534208 < | ||
| + | 671088640 < | ||
| + | 704643072 < | ||
| + | 713031680 < | ||
| + | 721420288 < | ||
| + | 729808896 < | ||
| + | 738197504 < | ||
| + | 805306368 < | ||
| + | 872415232 < | ||
| + | 939524096 < | ||
| + | 956301312 < | ||
| + | 973078528 < | ||
| + | 1006632960 < | ||
| + | 1015021568 < | ||
| + | 1023410176 < | ||
| + | 1031798784 < | ||
| + | 1033895936 < | ||
| + | 1035993088 < | ||
| + | 1040187392 < | ||
| + | 1042284544 < | ||
| + | 1044381696 < | ||
| + | 1044643840 < | ||
| + | 1044905984 < | ||
| + | 1045168128 < | ||
| + | 1045430272 < | ||
| + | 1045692416 < | ||
| + | 1045954560 < | ||
| + | 1046478848 < | ||
| + | 1073741824 < | ||
| + | 1140850688 < | ||
| + | 1174405120 < | ||
| + | 1207959552 < | ||
| + | </ | ||
| + | |||
| + | ===LUBM Univ1 === | ||
| + | -[[http:// | ||
| + | |||
| + | -[[http:// | ||
| + | |||
| + | -[[http:// | ||
| + | |||
| + | |||
| + | -[[http:// | ||
site/recherche/logiciels/rdfdist.1431340141.txt.gz · Dernière modification : de amine
