trustedanalytics · WafaaT · May 26, 2016 · May 26, 2016 · May 26, 2016 · May 26, 2016
diff --git a/doc-api-examples/src/main/resources/python/graph/export_to_orientdb.rst b/doc-api-examples/src/main/resources/python/graph/export_to_orientdb.rst
@@ -28,7 +28,7 @@
 >>> result = graph.export_to_orientdb("OrientDbDocTest",5)
 <progress>
 >>> result
-    {u'db_uri': u'remote:hostname:2424/OrientDbTest',
+    {u'db_uri': u'remote:hostname:2424/OrientDbDocTest',
     u'exported_edges': {u'edges': {u'exported_count': 5, u'failure_count': 0}},
     u'exported_vertices': {u'source': {u'exported_count': 5,u'failure_count': 0}}}
 

diff --git a/...ph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdb/DbConfiguration.scala b/...ph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdb/DbConfiguration.scala
@@ -26,7 +26,8 @@ case class DbConfiguration(@ArgDoc("""OrientDB database URI.""") dbUri: String,
                            @ArgDoc("""The database password.""") dbPassword: String,
                            @ArgDoc("""Port number.""") portNumber: String,
                            @ArgDoc("""The database host.""") dbHost: String,
-                           @ArgDoc("""The root password.""") rootPassword: String) extends Serializable {
+                           @ArgDoc("""The root password.""") rootPassword: String,
+                           @ArgDoc("""Additional database properties.""") dbProperties: Option[Map[String, Any]] = None) extends Serializable {
 
   require(dbUri != null, "database URI is required")
   require(dbUserName != null, "the user name is required")

diff --git a/...aph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdb/GraphDbFactory.scala b/...aph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdb/GraphDbFactory.scala
@@ -38,6 +38,10 @@ object GraphDbFactory extends EventLogging {
    */
   def graphDbConnector(dbConfigurations: DbConfiguration): OrientGraphNoTx = {
     val orientDb: ODatabaseDocumentTx = new ODatabaseDocumentTx(dbConfigurations.dbUri)
+    dbConfigurations.dbProperties.foreach(propertyMap => {
+      propertyMap.foreach { case (key, value) => orientDb.setProperty(key, value) }
+    })
+
     val orientGraphDb = if (dbConfigurations.dbUri.startsWith("remote:")) {
       if (!new OServerAdmin(dbConfigurations.dbUri).connect(rootUserName, dbConfigurations.rootPassword).existsDatabase()) {
         new OServerAdmin(dbConfigurations.dbUri).connect(rootUserName, dbConfigurations.rootPassword).createDatabase("graph", "plocal")

diff --git a/...gins/src/main/scala/org/trustedanalytics/atk/plugins/orientdbimport/OrientDbEdgeRdd.scala b/...gins/src/main/scala/org/trustedanalytics/atk/plugins/orientdbimport/OrientDbEdgeRdd.scala
@@ -37,7 +37,8 @@ class OrientDbEdgeRdd(sc: SparkContext, dbConfigurations: DbConfiguration) exten
     val edgeBuffer = new ArrayBuffer[Edge]()
     val schemaReader = new SchemaReader(graph)
     val edgeSchema = schemaReader.importEdgeSchema(partition.className)
-    val edges: OrientDynaElementIterable = graph.command(new OCommandSQL(s"select from ${partition.className}")).execute()
+    val edges: OrientDynaElementIterable = graph.command(
+      new OCommandSQL(s"select from cluster:${partition.clusterId} where @class='${partition.className}'")).execute()
     val edgeIterator = edges.iterator().asInstanceOf[java.util.Iterator[BlueprintsEdge]]
     while (edgeIterator.hasNext) {
       val edgeReader = new EdgeReader(graph, edgeSchema)

diff --git a/...ytics/atk/testutils/TestingOrientDb.scala → ...alytics/atk/plugins/TestingOrientDb.scala b/...ytics/atk/testutils/TestingOrientDb.scala → ...alytics/atk/plugins/TestingOrientDb.scala
@@ -13,10 +13,14 @@
  *  See the License for the specific language governing permissions and
  *  limitations under the License.
  */
-package org.trustedanalytics.atk.testutils
+package org.trustedanalytics.atk.plugins
 
 import java.io.File
-import com.tinkerpop.blueprints.impls.orient.{ OrientGraphNoTx, OrientGraphFactory }
+
+import com.orientechnologies.orient.core.intent.OIntentMassiveInsert
+import com.tinkerpop.blueprints.impls.orient.{ OrientGraphFactory, OrientGraphNoTx }
+import org.trustedanalytics.atk.plugins.orientdb.DbConfiguration
+import org.trustedanalytics.atk.testutils.DirectoryUtils
 
 /**
  * setup for testing export to OrientDB plugin functions
@@ -25,12 +29,14 @@ trait TestingOrientDb {
 
   var tmpDir: File = null
   var dbUri: String = null
-  var dbName: String = "OrientDbTest"
+  var dbName: String = "OrientDbTest1"
   var dbUserName = "admin"
   var dbPassword = "admin"
   var rootPassword = "root"
+  var dbConfig: DbConfiguration = null
   var orientMemoryGraph: OrientGraphNoTx = null
   var orientFileGraph: OrientGraphNoTx = null
+  val dbProperties: Map[String, Any] = Map(("storage.diskCache.bufferSize", 256))
 
   /**
    * create in memory Orient graph database
@@ -44,11 +50,13 @@ trait TestingOrientDb {
    * create plocal Orient graph database
    */
   def setupOrientDb(): Unit = {
-
-    tmpDir = DirectoryUtils.createTempDirectory("orient-graph-for-unit-testing")
-    dbUri = "plocal:/" + tmpDir.getAbsolutePath + "/" + dbName
+    val uuid = java.util.UUID.randomUUID.toString
+    tmpDir = DirectoryUtils.createTempDirectory("orientgraphtests")
+    dbUri = "plocal:" + tmpDir.getAbsolutePath + "/" + dbName + uuid
+    dbConfig = new DbConfiguration(dbUri, dbUserName, dbUserName, "port", "host", rootPassword, Some(dbProperties))
     val factory = new OrientGraphFactory(dbUri, dbUserName, dbPassword)
     orientFileGraph = factory.getNoTx
+    orientFileGraph.declareIntent(new OIntentMassiveInsert())
   }
 
   /**

diff --git a/...lugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/EdgeFrameWriterTest.scala b/...lugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/EdgeFrameWriterTest.scala
@@ -20,7 +20,8 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.GenericRow
 import org.scalatest.{ BeforeAndAfterEach, Matchers, WordSpec }
 import org.trustedanalytics.atk.domain.schema._
-import org.trustedanalytics.atk.testutils.{ TestingOrientDb, TestingSparkContextWordSpec }
+import org.trustedanalytics.atk.plugins.TestingOrientDb
+import org.trustedanalytics.atk.testutils.TestingSparkContextWordSpec
 
 class EdgeFrameWriterTest extends WordSpec with TestingSparkContextWordSpec with TestingOrientDb with Matchers with BeforeAndAfterEach {
 
@@ -35,7 +36,6 @@ class EdgeFrameWriterTest extends WordSpec with TestingSparkContextWordSpec with
   "Edge frame writer" should {
     "Export edge frame" in {
       // exporting a vertex frame:
-      val dbConfig = new DbConfiguration(dbUri, dbUserName, dbUserName, "port", "host", rootPassword)
       val vColumns = List(Column(GraphSchema.vidProperty, DataTypes.int64), Column(GraphSchema.labelProperty, DataTypes.string), Column("name", DataTypes.string), Column("from", DataTypes.string), Column("to", DataTypes.string), Column("fair", DataTypes.int32))
       val vSchema = new VertexSchema(vColumns, GraphSchema.labelProperty, null)
 
@@ -56,14 +56,14 @@ class EdgeFrameWriterTest extends WordSpec with TestingSparkContextWordSpec with
 
       //exporting the edge frame:
       val eColumns = List(Column(GraphSchema.edgeProperty, DataTypes.int64), Column(GraphSchema.srcVidProperty, DataTypes.int64), Column(GraphSchema.destVidProperty, DataTypes.int64), Column(GraphSchema.labelProperty, DataTypes.string), Column("distance", DataTypes.int32))
-      val eSchema = new EdgeSchema(eColumns, "label", GraphSchema.labelProperty, GraphSchema.labelProperty)
+      val eSchema = new EdgeSchema(eColumns, "edge_label", GraphSchema.labelProperty, GraphSchema.labelProperty)
       val edges: List[Row] = List(
         new GenericRow(Array(1L, 1L, 2L, "distance1", 100)),
         new GenericRow(Array(2L, 2L, 3L, "distance2", 200)),
         new GenericRow(Array(3L, 3L, 4L, "distance3", 400)))
       val eRowRdd = sparkContext.parallelize(edges)
       val edgeFrameRdd = new EdgeFrameRdd(eSchema, eRowRdd)
-      val batchSize = 3
+      val batchSize = 4
       if (orientFileGraph.getEdgeType(eSchema.label) == null) {
         val schemaWriter = new SchemaWriter(orientFileGraph)
         val oEdgeType = schemaWriter.createEdgeSchema(eSchema)

diff --git a/...aph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/EdgeWriterTest.scala b/...aph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/EdgeWriterTest.scala
@@ -19,7 +19,8 @@ import org.apache.spark.atk.graph.{ Edge, Vertex }
 import org.apache.spark.sql.catalyst.expressions.GenericRow
 import org.scalatest.{ BeforeAndAfterEach, Matchers, WordSpec }
 import org.trustedanalytics.atk.domain.schema._
-import org.trustedanalytics.atk.testutils.{ TestingOrientDb, TestingSparkContextWordSpec }
+import org.trustedanalytics.atk.plugins.TestingOrientDb
+import org.trustedanalytics.atk.testutils.TestingSparkContextWordSpec
 
 class EdgeWriterTest extends WordSpec with Matchers with TestingSparkContextWordSpec with TestingOrientDb with BeforeAndAfterEach {
   override def beforeEach() {

diff --git a/...gins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/VertexFrameWriterTest.scala b/...gins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/VertexFrameWriterTest.scala
@@ -20,7 +20,8 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.GenericRow
 import org.scalatest.{ BeforeAndAfterEach, Matchers, WordSpec }
 import org.trustedanalytics.atk.domain.schema.{ VertexSchema, DataTypes, GraphSchema, Column }
-import org.trustedanalytics.atk.testutils.{ TestingOrientDb, TestingSparkContextWordSpec }
+import org.trustedanalytics.atk.plugins.TestingOrientDb
+import org.trustedanalytics.atk.testutils.TestingSparkContextWordSpec
 
 class VertexFrameWriterTest extends WordSpec with TestingSparkContextWordSpec with Matchers with TestingOrientDb with BeforeAndAfterEach {
   override def beforeEach() {
@@ -33,7 +34,6 @@ class VertexFrameWriterTest extends WordSpec with TestingSparkContextWordSpec wi
 
   "vertex frame writer" should {
     "export vertex frame to OrientDB" in {
-      val dbConfig = new DbConfiguration(dbUri, dbUserName, dbPassword, "port", "host", rootPassword)
       val columns = List(Column(GraphSchema.vidProperty, DataTypes.int64), Column(GraphSchema.labelProperty, DataTypes.string), Column("name", DataTypes.string), Column("from", DataTypes.string), Column("to", DataTypes.string), Column("fair", DataTypes.int32))
       val schema = new VertexSchema(columns, GraphSchema.labelProperty, null)
       val vertices: List[Row] = List(

diff --git a/...h-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/VertexWriterTest.scala b/...h-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/VertexWriterTest.scala
@@ -20,7 +20,8 @@ import org.apache.spark.atk.graph.Vertex
 import org.apache.spark.sql.catalyst.expressions.GenericRow
 import org.scalatest.{ BeforeAndAfterEach, Matchers, WordSpec }
 import org.trustedanalytics.atk.domain.schema.{ VertexSchema, DataTypes, GraphSchema, Column }
-import org.trustedanalytics.atk.testutils.{ TestingOrientDb, TestingSparkContextWordSpec }
+import org.trustedanalytics.atk.plugins.TestingOrientDb
+import org.trustedanalytics.atk.testutils.TestingSparkContextWordSpec
 
 class VertexWriterTest extends WordSpec with Matchers with TestingSparkContextWordSpec with TestingOrientDb with BeforeAndAfterEach {
 

diff --git a/...ugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/EdgeReaderTest.scala b/...ugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/EdgeReaderTest.scala
@@ -20,8 +20,8 @@ import org.apache.spark.sql.catalyst.expressions.GenericRow
 import org.scalatest.{ Matchers, BeforeAndAfterEach, WordSpec }
 import org.trustedanalytics.atk.domain.schema._
 import org.trustedanalytics.atk.engine.frame.RowWrapper
+import org.trustedanalytics.atk.plugins.TestingOrientDb
 import org.trustedanalytics.atk.plugins.orientdb.{ SchemaWriter, VertexWriter, EdgeWriter }
-import org.trustedanalytics.atk.testutils.TestingOrientDb
 
 class EdgeReaderTest extends WordSpec with TestingOrientDb with Matchers with BeforeAndAfterEach {
 

diff --git a/...ins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/SchemaReaderTest.scala b/...ins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/SchemaReaderTest.scala
@@ -20,8 +20,8 @@ import org.apache.spark.sql.catalyst.expressions.GenericRow
 import org.scalatest.{ Matchers, BeforeAndAfterEach, WordSpec }
 import org.trustedanalytics.atk.domain.schema.DataTypes.string
 import org.trustedanalytics.atk.domain.schema._
+import org.trustedanalytics.atk.plugins.TestingOrientDb
 import org.trustedanalytics.atk.plugins.orientdb.{ SchemaWriter, EdgeWriter, VertexWriter }
-import org.trustedanalytics.atk.testutils.TestingOrientDb
 
 class SchemaReaderTest extends WordSpec with TestingOrientDb with Matchers with BeforeAndAfterEach {
 

diff --git a/...ins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/VertexReaderTest.scala b/...ins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/VertexReaderTest.scala
@@ -20,8 +20,8 @@ import org.apache.spark.sql.catalyst.expressions.GenericRow
 import org.scalatest.{ Matchers, BeforeAndAfterEach, WordSpec }
 import org.trustedanalytics.atk.domain.schema.{ VertexSchema, DataTypes, GraphSchema, Column }
 import org.trustedanalytics.atk.engine.frame.RowWrapper
+import org.trustedanalytics.atk.plugins.TestingOrientDb
 import org.trustedanalytics.atk.plugins.orientdb.VertexWriter
-import org.trustedanalytics.atk.testutils.TestingOrientDb
 
 class VertexReaderTest extends WordSpec with TestingOrientDb with Matchers with BeforeAndAfterEach {
 

diff --git a/...in/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaPluginArgs.scala b/...in/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaPluginArgs.scala
@@ -65,8 +65,7 @@ time the model is trained, allows LDA to generate the same topic distribution
 if the corpus and LDA parameters are unchanged.""") randomSeed: Option[Long] = None,
                         @ArgDoc("""Period (in iterations) between checkpoints (default = 10). Checkpointing helps with recovery
 * (when nodes fail). It also helps with eliminating temporary shuffle files on disk, which can be
-* important when LDA is run for many iterations. If the checkpoint directory is not set, this setting is ignored.""")
-                        checkPointInterval: Int = 10) {
+* important when LDA is run for many iterations. If the checkpoint directory is not set, this setting is ignored.""") checkPointInterval: Int = 10) {
 
   require(model != null, "model is required")
   require(frame != null, "frame is required")

diff --git a/pom.xml b/pom.xml
@@ -140,7 +140,7 @@ export MAVEN_OPTS="-Xmx512m -XX:PermSize=256m"
         <dep.hive.version>1.1.0-${dep.cdh.version}</dep.hive.version>
         <dev.spark.hive.version>1.6.0-${dep.cdh.version}</dev.spark.hive.version>
         <dep.daal.version>2016.2.181</dep.daal.version>
-        <dep.orientdb.version>2.1.16</dep.orientdb.version>
+        <dep.orientdb.version>2.2.6</dep.orientdb.version>
 
         <!--START GAO MAVEN -->
 
@@ -307,7 +307,7 @@ export MAVEN_OPTS="-Xmx512m -XX:PermSize=256m"
                                 <filereports>WDF TestSuite.txt</filereports>
                                 <parallel>false</parallel>
                                 <stdout>FTD</stdout>
-                                <argLine>-Xmx512m -XX:PermSize=256m</argLine>
+                                <argLine>-Xmx1024m -XX:PermSize=256m</argLine>
                             </configuration>
                             <executions>
                                 <execution>