diff --git a/doc-api-examples/src/main/resources/python/graph/export_to_orientdb.rst b/doc-api-examples/src/main/resources/python/graph/export_to_orientdb.rst index 904d7c3e14..7db4fd1d70 100644 --- a/doc-api-examples/src/main/resources/python/graph/export_to_orientdb.rst +++ b/doc-api-examples/src/main/resources/python/graph/export_to_orientdb.rst @@ -28,7 +28,7 @@ >>> result = graph.export_to_orientdb("OrientDbDocTest",5) >>> result - {u'db_uri': u'remote:hostname:2424/OrientDbTest', + {u'db_uri': u'remote:hostname:2424/OrientDbDocTest', u'exported_edges': {u'edges': {u'exported_count': 5, u'failure_count': 0}}, u'exported_vertices': {u'source': {u'exported_count': 5,u'failure_count': 0}}} diff --git a/engine-plugins/graph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdb/DbConfiguration.scala b/engine-plugins/graph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdb/DbConfiguration.scala index f02e49f695..d5352ac70f 100644 --- a/engine-plugins/graph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdb/DbConfiguration.scala +++ b/engine-plugins/graph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdb/DbConfiguration.scala @@ -26,7 +26,8 @@ case class DbConfiguration(@ArgDoc("""OrientDB database URI.""") dbUri: String, @ArgDoc("""The database password.""") dbPassword: String, @ArgDoc("""Port number.""") portNumber: String, @ArgDoc("""The database host.""") dbHost: String, - @ArgDoc("""The root password.""") rootPassword: String) extends Serializable { + @ArgDoc("""The root password.""") rootPassword: String, + @ArgDoc("""Additional database properties.""") dbProperties: Option[Map[String, Any]] = None) extends Serializable { require(dbUri != null, "database URI is required") require(dbUserName != null, "the user name is required") diff --git a/engine-plugins/graph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdb/GraphDbFactory.scala b/engine-plugins/graph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdb/GraphDbFactory.scala index 5cc02e606b..7830108afd 100644 --- a/engine-plugins/graph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdb/GraphDbFactory.scala +++ b/engine-plugins/graph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdb/GraphDbFactory.scala @@ -38,6 +38,10 @@ object GraphDbFactory extends EventLogging { */ def graphDbConnector(dbConfigurations: DbConfiguration): OrientGraphNoTx = { val orientDb: ODatabaseDocumentTx = new ODatabaseDocumentTx(dbConfigurations.dbUri) + dbConfigurations.dbProperties.foreach(propertyMap => { + propertyMap.foreach { case (key, value) => orientDb.setProperty(key, value) } + }) + val orientGraphDb = if (dbConfigurations.dbUri.startsWith("remote:")) { if (!new OServerAdmin(dbConfigurations.dbUri).connect(rootUserName, dbConfigurations.rootPassword).existsDatabase()) { new OServerAdmin(dbConfigurations.dbUri).connect(rootUserName, dbConfigurations.rootPassword).createDatabase("graph", "plocal") diff --git a/engine-plugins/graph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdbimport/OrientDbEdgeRdd.scala b/engine-plugins/graph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdbimport/OrientDbEdgeRdd.scala index 85cc94298a..a89d3bbec6 100644 --- a/engine-plugins/graph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdbimport/OrientDbEdgeRdd.scala +++ b/engine-plugins/graph-plugins/src/main/scala/org/trustedanalytics/atk/plugins/orientdbimport/OrientDbEdgeRdd.scala @@ -37,7 +37,8 @@ class OrientDbEdgeRdd(sc: SparkContext, dbConfigurations: DbConfiguration) exten val edgeBuffer = new ArrayBuffer[Edge]() val schemaReader = new SchemaReader(graph) val edgeSchema = schemaReader.importEdgeSchema(partition.className) - val edges: OrientDynaElementIterable = graph.command(new OCommandSQL(s"select from ${partition.className}")).execute() + val edges: OrientDynaElementIterable = graph.command( + new OCommandSQL(s"select from cluster:${partition.clusterId} where @class='${partition.className}'")).execute() val edgeIterator = edges.iterator().asInstanceOf[java.util.Iterator[BlueprintsEdge]] while (edgeIterator.hasNext) { val edgeReader = new EdgeReader(graph, edgeSchema) diff --git a/testutils/src/main/scala/org/trustedanalytics/atk/testutils/TestingOrientDb.scala b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/TestingOrientDb.scala similarity index 70% rename from testutils/src/main/scala/org/trustedanalytics/atk/testutils/TestingOrientDb.scala rename to engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/TestingOrientDb.scala index 2be7e3f8b1..d3993a0434 100644 --- a/testutils/src/main/scala/org/trustedanalytics/atk/testutils/TestingOrientDb.scala +++ b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/TestingOrientDb.scala @@ -13,10 +13,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.trustedanalytics.atk.testutils +package org.trustedanalytics.atk.plugins import java.io.File -import com.tinkerpop.blueprints.impls.orient.{ OrientGraphNoTx, OrientGraphFactory } + +import com.orientechnologies.orient.core.intent.OIntentMassiveInsert +import com.tinkerpop.blueprints.impls.orient.{ OrientGraphFactory, OrientGraphNoTx } +import org.trustedanalytics.atk.plugins.orientdb.DbConfiguration +import org.trustedanalytics.atk.testutils.DirectoryUtils /** * setup for testing export to OrientDB plugin functions @@ -25,12 +29,14 @@ trait TestingOrientDb { var tmpDir: File = null var dbUri: String = null - var dbName: String = "OrientDbTest" + var dbName: String = "OrientDbTest1" var dbUserName = "admin" var dbPassword = "admin" var rootPassword = "root" + var dbConfig: DbConfiguration = null var orientMemoryGraph: OrientGraphNoTx = null var orientFileGraph: OrientGraphNoTx = null + val dbProperties: Map[String, Any] = Map(("storage.diskCache.bufferSize", 256)) /** * create in memory Orient graph database @@ -44,11 +50,13 @@ trait TestingOrientDb { * create plocal Orient graph database */ def setupOrientDb(): Unit = { - - tmpDir = DirectoryUtils.createTempDirectory("orient-graph-for-unit-testing") - dbUri = "plocal:/" + tmpDir.getAbsolutePath + "/" + dbName + val uuid = java.util.UUID.randomUUID.toString + tmpDir = DirectoryUtils.createTempDirectory("orientgraphtests") + dbUri = "plocal:" + tmpDir.getAbsolutePath + "/" + dbName + uuid + dbConfig = new DbConfiguration(dbUri, dbUserName, dbUserName, "port", "host", rootPassword, Some(dbProperties)) val factory = new OrientGraphFactory(dbUri, dbUserName, dbPassword) orientFileGraph = factory.getNoTx + orientFileGraph.declareIntent(new OIntentMassiveInsert()) } /** diff --git a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/EdgeFrameWriterTest.scala b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/EdgeFrameWriterTest.scala index df39c8276c..acef70c58b 100644 --- a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/EdgeFrameWriterTest.scala +++ b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/EdgeFrameWriterTest.scala @@ -20,7 +20,8 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.GenericRow import org.scalatest.{ BeforeAndAfterEach, Matchers, WordSpec } import org.trustedanalytics.atk.domain.schema._ -import org.trustedanalytics.atk.testutils.{ TestingOrientDb, TestingSparkContextWordSpec } +import org.trustedanalytics.atk.plugins.TestingOrientDb +import org.trustedanalytics.atk.testutils.TestingSparkContextWordSpec class EdgeFrameWriterTest extends WordSpec with TestingSparkContextWordSpec with TestingOrientDb with Matchers with BeforeAndAfterEach { @@ -35,7 +36,6 @@ class EdgeFrameWriterTest extends WordSpec with TestingSparkContextWordSpec with "Edge frame writer" should { "Export edge frame" in { // exporting a vertex frame: - val dbConfig = new DbConfiguration(dbUri, dbUserName, dbUserName, "port", "host", rootPassword) val vColumns = List(Column(GraphSchema.vidProperty, DataTypes.int64), Column(GraphSchema.labelProperty, DataTypes.string), Column("name", DataTypes.string), Column("from", DataTypes.string), Column("to", DataTypes.string), Column("fair", DataTypes.int32)) val vSchema = new VertexSchema(vColumns, GraphSchema.labelProperty, null) @@ -56,14 +56,14 @@ class EdgeFrameWriterTest extends WordSpec with TestingSparkContextWordSpec with //exporting the edge frame: val eColumns = List(Column(GraphSchema.edgeProperty, DataTypes.int64), Column(GraphSchema.srcVidProperty, DataTypes.int64), Column(GraphSchema.destVidProperty, DataTypes.int64), Column(GraphSchema.labelProperty, DataTypes.string), Column("distance", DataTypes.int32)) - val eSchema = new EdgeSchema(eColumns, "label", GraphSchema.labelProperty, GraphSchema.labelProperty) + val eSchema = new EdgeSchema(eColumns, "edge_label", GraphSchema.labelProperty, GraphSchema.labelProperty) val edges: List[Row] = List( new GenericRow(Array(1L, 1L, 2L, "distance1", 100)), new GenericRow(Array(2L, 2L, 3L, "distance2", 200)), new GenericRow(Array(3L, 3L, 4L, "distance3", 400))) val eRowRdd = sparkContext.parallelize(edges) val edgeFrameRdd = new EdgeFrameRdd(eSchema, eRowRdd) - val batchSize = 3 + val batchSize = 4 if (orientFileGraph.getEdgeType(eSchema.label) == null) { val schemaWriter = new SchemaWriter(orientFileGraph) val oEdgeType = schemaWriter.createEdgeSchema(eSchema) diff --git a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/EdgeWriterTest.scala b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/EdgeWriterTest.scala index 995ae8eb05..9a9367f045 100644 --- a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/EdgeWriterTest.scala +++ b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/EdgeWriterTest.scala @@ -19,7 +19,8 @@ import org.apache.spark.atk.graph.{ Edge, Vertex } import org.apache.spark.sql.catalyst.expressions.GenericRow import org.scalatest.{ BeforeAndAfterEach, Matchers, WordSpec } import org.trustedanalytics.atk.domain.schema._ -import org.trustedanalytics.atk.testutils.{ TestingOrientDb, TestingSparkContextWordSpec } +import org.trustedanalytics.atk.plugins.TestingOrientDb +import org.trustedanalytics.atk.testutils.TestingSparkContextWordSpec class EdgeWriterTest extends WordSpec with Matchers with TestingSparkContextWordSpec with TestingOrientDb with BeforeAndAfterEach { override def beforeEach() { diff --git a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/VertexFrameWriterTest.scala b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/VertexFrameWriterTest.scala index ec777c5cb6..2ce47f0271 100644 --- a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/VertexFrameWriterTest.scala +++ b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/VertexFrameWriterTest.scala @@ -20,7 +20,8 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.GenericRow import org.scalatest.{ BeforeAndAfterEach, Matchers, WordSpec } import org.trustedanalytics.atk.domain.schema.{ VertexSchema, DataTypes, GraphSchema, Column } -import org.trustedanalytics.atk.testutils.{ TestingOrientDb, TestingSparkContextWordSpec } +import org.trustedanalytics.atk.plugins.TestingOrientDb +import org.trustedanalytics.atk.testutils.TestingSparkContextWordSpec class VertexFrameWriterTest extends WordSpec with TestingSparkContextWordSpec with Matchers with TestingOrientDb with BeforeAndAfterEach { override def beforeEach() { @@ -33,7 +34,6 @@ class VertexFrameWriterTest extends WordSpec with TestingSparkContextWordSpec wi "vertex frame writer" should { "export vertex frame to OrientDB" in { - val dbConfig = new DbConfiguration(dbUri, dbUserName, dbPassword, "port", "host", rootPassword) val columns = List(Column(GraphSchema.vidProperty, DataTypes.int64), Column(GraphSchema.labelProperty, DataTypes.string), Column("name", DataTypes.string), Column("from", DataTypes.string), Column("to", DataTypes.string), Column("fair", DataTypes.int32)) val schema = new VertexSchema(columns, GraphSchema.labelProperty, null) val vertices: List[Row] = List( diff --git a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/VertexWriterTest.scala b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/VertexWriterTest.scala index 2a88d5c9a3..cec9703408 100644 --- a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/VertexWriterTest.scala +++ b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdb/VertexWriterTest.scala @@ -20,7 +20,8 @@ import org.apache.spark.atk.graph.Vertex import org.apache.spark.sql.catalyst.expressions.GenericRow import org.scalatest.{ BeforeAndAfterEach, Matchers, WordSpec } import org.trustedanalytics.atk.domain.schema.{ VertexSchema, DataTypes, GraphSchema, Column } -import org.trustedanalytics.atk.testutils.{ TestingOrientDb, TestingSparkContextWordSpec } +import org.trustedanalytics.atk.plugins.TestingOrientDb +import org.trustedanalytics.atk.testutils.TestingSparkContextWordSpec class VertexWriterTest extends WordSpec with Matchers with TestingSparkContextWordSpec with TestingOrientDb with BeforeAndAfterEach { diff --git a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/EdgeReaderTest.scala b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/EdgeReaderTest.scala index 8dd868835e..e4768873de 100644 --- a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/EdgeReaderTest.scala +++ b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/EdgeReaderTest.scala @@ -20,8 +20,8 @@ import org.apache.spark.sql.catalyst.expressions.GenericRow import org.scalatest.{ Matchers, BeforeAndAfterEach, WordSpec } import org.trustedanalytics.atk.domain.schema._ import org.trustedanalytics.atk.engine.frame.RowWrapper +import org.trustedanalytics.atk.plugins.TestingOrientDb import org.trustedanalytics.atk.plugins.orientdb.{ SchemaWriter, VertexWriter, EdgeWriter } -import org.trustedanalytics.atk.testutils.TestingOrientDb class EdgeReaderTest extends WordSpec with TestingOrientDb with Matchers with BeforeAndAfterEach { diff --git a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/SchemaReaderTest.scala b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/SchemaReaderTest.scala index 9302b6677b..3c8c3554b5 100644 --- a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/SchemaReaderTest.scala +++ b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/SchemaReaderTest.scala @@ -20,8 +20,8 @@ import org.apache.spark.sql.catalyst.expressions.GenericRow import org.scalatest.{ Matchers, BeforeAndAfterEach, WordSpec } import org.trustedanalytics.atk.domain.schema.DataTypes.string import org.trustedanalytics.atk.domain.schema._ +import org.trustedanalytics.atk.plugins.TestingOrientDb import org.trustedanalytics.atk.plugins.orientdb.{ SchemaWriter, EdgeWriter, VertexWriter } -import org.trustedanalytics.atk.testutils.TestingOrientDb class SchemaReaderTest extends WordSpec with TestingOrientDb with Matchers with BeforeAndAfterEach { diff --git a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/VertexReaderTest.scala b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/VertexReaderTest.scala index 8614b5a0b7..782eba929b 100644 --- a/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/VertexReaderTest.scala +++ b/engine-plugins/graph-plugins/src/test/scala/org/trustedanalytics/atk/plugins/orientdbimport/VertexReaderTest.scala @@ -20,8 +20,8 @@ import org.apache.spark.sql.catalyst.expressions.GenericRow import org.scalatest.{ Matchers, BeforeAndAfterEach, WordSpec } import org.trustedanalytics.atk.domain.schema.{ VertexSchema, DataTypes, GraphSchema, Column } import org.trustedanalytics.atk.engine.frame.RowWrapper +import org.trustedanalytics.atk.plugins.TestingOrientDb import org.trustedanalytics.atk.plugins.orientdb.VertexWriter -import org.trustedanalytics.atk.testutils.TestingOrientDb class VertexReaderTest extends WordSpec with TestingOrientDb with Matchers with BeforeAndAfterEach { diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaPluginArgs.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaPluginArgs.scala index e53a80906a..b07d450082 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaPluginArgs.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaPluginArgs.scala @@ -65,8 +65,7 @@ time the model is trained, allows LDA to generate the same topic distribution if the corpus and LDA parameters are unchanged.""") randomSeed: Option[Long] = None, @ArgDoc("""Period (in iterations) between checkpoints (default = 10). Checkpointing helps with recovery * (when nodes fail). It also helps with eliminating temporary shuffle files on disk, which can be -* important when LDA is run for many iterations. If the checkpoint directory is not set, this setting is ignored.""") - checkPointInterval: Int = 10) { +* important when LDA is run for many iterations. If the checkpoint directory is not set, this setting is ignored.""") checkPointInterval: Int = 10) { require(model != null, "model is required") require(frame != null, "frame is required") diff --git a/pom.xml b/pom.xml index c9f6561770..dc10005bce 100644 --- a/pom.xml +++ b/pom.xml @@ -140,7 +140,7 @@ export MAVEN_OPTS="-Xmx512m -XX:PermSize=256m" 1.1.0-${dep.cdh.version} 1.6.0-${dep.cdh.version} 2016.2.181 - 2.1.16 + 2.2.6 @@ -307,7 +307,7 @@ export MAVEN_OPTS="-Xmx512m -XX:PermSize=256m" WDF TestSuite.txt false FTD - -Xmx512m -XX:PermSize=256m + -Xmx1024m -XX:PermSize=256m