diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesPredictPlugin.scala index 65d2c2e656..1d6b2751ca 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesPredictPlugin.scala @@ -62,14 +62,6 @@ class NaiveBayesPredictPlugin extends SparkCommandPlugin[NaiveBayesPredictArgs, override def name: String = "model:naive_bayes/predict" override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: NaiveBayesPredictArgs)(implicit invocation: Invocation) = 9 - /** * Get the predictions for observations in a test frame * @@ -84,6 +76,7 @@ class NaiveBayesPredictPlugin extends SparkCommandPlugin[NaiveBayesPredictArgs, val model: Model = arguments.model // Loading model + require(!frame.rdd.isEmpty(), "Predict Frame is empty. Please predict on a non-empty Frame.") val naiveBayesJsObject = model.dataOption.getOrElse( throw new RuntimeException("This model has not been trained yet. Please train before trying to predict") ) diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesPublishPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesPublishPlugin.scala index c4063a49f4..ff6b2dc937 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesPublishPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesPublishPlugin.scala @@ -51,20 +51,6 @@ class NaiveBayesPublishPlugin extends CommandPlugin[ModelPublishArgs, ExportMeta override def name: String = "model:naive_bayes/publish" override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - - /** - * User documentation exposed in Python. - * - * [[http://docutils.sourceforge.net/rst.html ReStructuredText]] - */ - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: ModelPublishArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesTestPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesTestPlugin.scala index 35b2e4e6d9..be31b012dc 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesTestPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesTestPlugin.scala @@ -75,13 +75,6 @@ class NaiveBayesTestPlugin extends SparkCommandPlugin[NaiveBayesTestArgs, Classi override def name: String = "model:naive_bayes/test" override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: NaiveBayesTestArgs)(implicit invocation: Invocation) = 9 /** * Get the predictions for observations in a test frame * @@ -95,6 +88,7 @@ class NaiveBayesTestPlugin extends SparkCommandPlugin[NaiveBayesTestArgs, Classi val model: Model = arguments.model val frame: SparkFrame = arguments.frame + require(!frame.rdd.isEmpty(), "Test Frame is empty. Please test on a non-empty Frame.") //Extracting the model and data to run on val naiveBayesData = model.data.convertTo[NaiveBayesData] val naiveBayesModel = naiveBayesData.naiveBayesModel diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesTrainPlugin.scala index 0a8e5d3f51..45dc082280 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/NaiveBayesTrainPlugin.scala @@ -64,12 +64,6 @@ class NaiveBayesTrainPlugin extends SparkCommandPlugin[NaiveBayesTrainArgs, Unit override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - override def numberOfJobs(arguments: NaiveBayesTrainArgs)(implicit invocation: Invocation) = 109 - /** * Run MLLib's NaiveBayes() on the training frame and create a Model for it. * @@ -84,6 +78,7 @@ class NaiveBayesTrainPlugin extends SparkCommandPlugin[NaiveBayesTrainArgs, Unit val model: Model = arguments.model //create RDD from the frame + require(!frame.rdd.isEmpty(), "Train Frame is empty. Please train on a non-empty Frame.") val labeledTrainRdd: RDD[LabeledPoint] = frame.rdd.toLabeledPointRDD(arguments.labelColumn, arguments.observationColumns) //Running MLLib diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierPredictPlugin.scala index e6feda5a43..c2fca3b837 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierPredictPlugin.scala @@ -61,13 +61,6 @@ class RandomForestClassifierPredictPlugin extends SparkCommandPlugin[RandomFores override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: RandomForestClassifierPredictArgs)(implicit invocation: Invocation) = 9 - /** * Get the predictions for observations in a test frame * @@ -82,6 +75,7 @@ class RandomForestClassifierPredictPlugin extends SparkCommandPlugin[RandomFores val frame: SparkFrame = arguments.frame //Running MLLib + require(!frame.rdd.isEmpty(), "Predict Frame is empty. Please predict on a non-empty Frame.") val rfData = model.readFromStorage().convertTo[RandomForestClassifierData] val rfModel = rfData.randomForestModel if (arguments.observationColumns.isDefined) { diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierPublishPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierPublishPlugin.scala index d3652013bb..88950ecc6b 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierPublishPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierPublishPlugin.scala @@ -51,20 +51,6 @@ class RandomForestClassifierPublishPlugin extends CommandPlugin[ModelPublishArgs override def name: String = "model:random_forest_classifier/publish" override def apiMaturityTag = Some(ApiMaturityTag.Beta) - - /** - * User documentation exposed in Python. - * - * [[http://docutils.sourceforge.net/rst.html ReStructuredText]] - */ - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: ModelPublishArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierTestPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierTestPlugin.scala index 3014424d78..ebf9d44df9 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierTestPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierTestPlugin.scala @@ -77,12 +77,6 @@ class RandomForestClassifierTestPlugin extends SparkCommandPlugin[RandomForestCl override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: RandomForestClassifierTestArgs)(implicit invocation: Invocation) = 9 /** * Get the predictions for observations in a test frame * @@ -97,6 +91,7 @@ class RandomForestClassifierTestPlugin extends SparkCommandPlugin[RandomForestCl val frame: SparkFrame = arguments.frame //Extracting the model and data to run on + require(!frame.rdd.isEmpty(), "Test Frame is empty. Please test on a non-empty Frame.") val rfData = model.readFromStorage().convertTo[RandomForestClassifierData] val rfModel = rfData.randomForestModel if (arguments.observationColumns.isDefined) { diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierTrainPlugin.scala index 282b1c7ee4..839b2cc900 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/RandomForestClassifierTrainPlugin.scala @@ -63,12 +63,6 @@ class RandomForestClassifierTrainPlugin extends SparkCommandPlugin[RandomForestC override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - override def numberOfJobs(arguments: RandomForestClassifierTrainArgs)(implicit invocation: Invocation) = 109 - /** * Run MLLib's RandomForest classifier on the training frame and create a Model for it. * @@ -83,6 +77,7 @@ class RandomForestClassifierTrainPlugin extends SparkCommandPlugin[RandomForestC val model: Model = arguments.model //create RDD from the frame + require(!frame.rdd.isEmpty(), "Train Frame is empty. Please train on a non-empty Frame.") val labeledTrainRdd: RDD[LabeledPoint] = frame.rdd.toLabeledPointRDD(arguments.labelColumn, arguments.observationColumns) val randomForestModel = RandomForest.trainClassifier(labeledTrainRdd, arguments.numClasses, arguments.getCategoricalFeaturesInfo, arguments.numTrees, arguments.getFeatureSubsetCategory, arguments.impurity, arguments.maxDepth, arguments.maxBins, arguments.seed) diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDPredictPlugin.scala index 15fbbf73ac..fe35641728 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDPredictPlugin.scala @@ -47,13 +47,6 @@ class SVMWithSGDPredictPlugin extends SparkCommandPlugin[ClassificationWithSGDPr override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: ClassificationWithSGDPredictArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDPublishPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDPublishPlugin.scala index 7e65eaa9d3..635a128aa1 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDPublishPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDPublishPlugin.scala @@ -52,20 +52,6 @@ class SVMWithSGDPublishPlugin extends CommandPlugin[ModelPublishArgs, ExportMeta override def name: String = "model:svm/publish" override def apiMaturityTag = Some(ApiMaturityTag.Beta) - - /** - * User documentation exposed in Python. - * - * [[http://docutils.sourceforge.net/rst.html ReStructuredText]] - */ - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: ModelPublishArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDTestPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDTestPlugin.scala index 758c59b538..4c8d7f4d05 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDTestPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDTestPlugin.scala @@ -58,13 +58,6 @@ class SVMWithSGDTestPlugin extends SparkCommandPlugin[ClassificationWithSGDTestA override def name: String = "model:svm/test" override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: ClassificationWithSGDTestArgs)(implicit invocation: Invocation) = 1 /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDTrainPlugin.scala index 288aee5c7d..fad118af16 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/SVMWithSGDTrainPlugin.scala @@ -48,13 +48,6 @@ class SVMWithSGDTrainPlugin extends SparkCommandPlugin[ClassificationWithSGDTrai override def name: String = "model:svm/train" override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - override def numberOfJobs(arguments: ClassificationWithSGDTrainArgs)(implicit invocation: Invocation) = 1 - /** * Run MLLib's SVMWithSGD() on the training frame and create a Model for it. * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/glm/LogisticRegressionPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/glm/LogisticRegressionPredictPlugin.scala index 281547bf74..7df27cf4a4 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/glm/LogisticRegressionPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/glm/LogisticRegressionPredictPlugin.scala @@ -46,13 +46,6 @@ class LogisticRegressionPredictPlugin extends SparkCommandPlugin[ClassificationW override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: ClassificationWithSGDPredictArgs)(implicit invocation: Invocation) = 9 - /** * Get the predictions for observations in a test frame * @@ -66,6 +59,7 @@ class LogisticRegressionPredictPlugin extends SparkCommandPlugin[ClassificationW val frame: SparkFrame = arguments.frame val model: Model = arguments.model + require(!frame.rdd.isEmpty(), "Predict Frame is empty. Please predict on a non-empty Frame.") //Running MLLib val logRegData = model.data.convertTo[LogisticRegressionData] val logRegModel = logRegData.logRegModel diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/glm/LogisticRegressionTestPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/glm/LogisticRegressionTestPlugin.scala index 4304f6c619..c24c2924e9 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/glm/LogisticRegressionTestPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/glm/LogisticRegressionTestPlugin.scala @@ -58,12 +58,6 @@ class LogisticRegressionTestPlugin extends SparkCommandPlugin[ClassificationWith override def name: String = "model:logistic_regression/test" override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: ClassificationWithSGDTestArgs)(implicit invocation: Invocation) = 9 /** * Get the predictions for observations in a test frame * @@ -77,6 +71,7 @@ class LogisticRegressionTestPlugin extends SparkCommandPlugin[ClassificationWith val frame: SparkFrame = arguments.frame val model: Model = arguments.model + require(!frame.rdd.isEmpty(), "Test Frame is empty. Please test on a non-empty Frame.") val logRegData = model.data.convertTo[LogisticRegressionData] val logRegModel = logRegData.logRegModel if (arguments.observationColumns.isDefined) { diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/glm/LogisticRegressionTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/glm/LogisticRegressionTrainPlugin.scala index c0b8378128..99fd1325b1 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/glm/LogisticRegressionTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/classification/glm/LogisticRegressionTrainPlugin.scala @@ -57,12 +57,6 @@ class LogisticRegressionTrainPlugin extends SparkCommandPlugin[LogisticRegressio override def name: String = "model:logistic_regression/train" override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - override def numberOfJobs(arguments: LogisticRegressionTrainArgs)(implicit invocation: Invocation) = arguments.numIterations + 5 /** * Run MLLib's LogisticRegressionWithSGD() on the training frame and create a Model for it. * @@ -77,6 +71,7 @@ class LogisticRegressionTrainPlugin extends SparkCommandPlugin[LogisticRegressio val model: Model = arguments.model //create RDD from the frame + require(!frame.rdd.isEmpty(), "Train Frame is empty. Please train on a non-empty Frame.") val labeledTrainRdd = frame.rdd.toLabeledPointRDDWithFrequency(arguments.labelColumn, arguments.observationColumns, arguments.frequencyColumn) diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/GMMPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/GMMPredictPlugin.scala index d5fbfc9a6f..4cff99a7c2 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/GMMPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/GMMPredictPlugin.scala @@ -44,7 +44,6 @@ import MLLibJsonProtocol._ predicted_cluster : int Integer containing the cluster assignment.""") class GMMPredictPlugin extends SparkCommandPlugin[GMMPredictArgs, FrameReference] { - /** * The name of the command. * @@ -55,19 +54,6 @@ class GMMPredictPlugin extends SparkCommandPlugin[GMMPredictArgs, FrameReference override def apiMaturityTag = Some(ApiMaturityTag.Beta) - /** - * User documentation exposed in Python. - * - * [[http://docutils.sourceforge.net/rst.html ReStructuredText]] - */ - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: GMMPredictArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/GMMTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/GMMTrainPlugin.scala index c558545f85..adb1039ecb 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/GMMTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/GMMTrainPlugin.scala @@ -53,20 +53,6 @@ class GMMTrainPlugin extends SparkCommandPlugin[GMMTrainArgs, GMMTrainReturn] { override def name: String = "model:gmm/train" override def apiMaturityTag = Some(ApiMaturityTag.Beta) - - /** - * User documentation exposed in Python. - * - * [[http://docutils.sourceforge.net/rst.html ReStructuredText]] - */ - - /** - * Number of Spark jobs that get created by running this command - * - * (this configuration is used to prevent multiple progress bars in Python client) - */ - override def numberOfJobs(arguments: GMMTrainArgs)(implicit invocation: Invocation) = 1 - /** * Run MLLib's GaussianMixtureModel() on the training frame and create a Model for it. * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/KMeansPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/KMeansPredictPlugin.scala index eba6c2910a..cf65fe557e 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/KMeansPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/KMeansPredictPlugin.scala @@ -45,7 +45,6 @@ import scala.collection.mutable.ListBuffer 'k' columns : Each of the 'k' columns containing squared distance of that observation to the 'k'th cluster center predicted_cluster column: The cluster assignment for the observation""") class KMeansPredictPlugin extends SparkCommandPlugin[KMeansPredictArgs, FrameReference] { - /** * The name of the command. * @@ -55,20 +54,6 @@ class KMeansPredictPlugin extends SparkCommandPlugin[KMeansPredictArgs, FrameRef override def name: String = "model:k_means/predict" override def apiMaturityTag = Some(ApiMaturityTag.Beta) - - /** - * User documentation exposed in Python. - * - * [[http://docutils.sourceforge.net/rst.html ReStructuredText]] - */ - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: KMeansPredictArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * @@ -82,6 +67,7 @@ class KMeansPredictPlugin extends SparkCommandPlugin[KMeansPredictArgs, FrameRef val frame: SparkFrame = arguments.frame val model: Model = arguments.model + require(!frame.rdd.isEmpty(), "Predict Frame is empty. Please predict on a non-empty Frame.") //Extracting the KMeansModel from the stored JsObject val kmeansData = model.data.convertTo[KMeansData] val kmeansModel = kmeansData.kMeansModel diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/KMeansPublishPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/KMeansPublishPlugin.scala index 6c194f51e7..777696c025 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/KMeansPublishPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/KMeansPublishPlugin.scala @@ -54,20 +54,6 @@ class KMeansPublishPlugin extends CommandPlugin[ModelPublishArgs, ExportMetadata override def name: String = "model:k_means/publish" override def apiMaturityTag = Some(ApiMaturityTag.Beta) - - /** - * User documentation exposed in Python. - * - * [[http://docutils.sourceforge.net/rst.html ReStructuredText]] - */ - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: ModelPublishArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/KMeansTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/KMeansTrainPlugin.scala index aa294d1d45..8cf956242e 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/KMeansTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/KMeansTrainPlugin.scala @@ -49,19 +49,6 @@ class KMeansTrainPlugin extends SparkCommandPlugin[KMeansTrainArgs, KMeansTrainR override def name: String = "model:k_means/train" override def apiMaturityTag = Some(ApiMaturityTag.Beta) - - /** - * User documentation exposed in Python. - * - * [[http://docutils.sourceforge.net/rst.html ReStructuredText]] - */ - - /** - * Number of Spark jobs that get created by running this command - * - * (this configuration is used to prevent multiple progress bars in Python client) - */ - override def numberOfJobs(arguments: KMeansTrainArgs)(implicit invocation: Invocation) = 15 /** * Run MLLib's KMeans() on the training frame and create a Model for it. * @@ -77,6 +64,7 @@ class KMeansTrainPlugin extends SparkCommandPlugin[KMeansTrainArgs, KMeansTrainR val kMeans = KMeansTrainPlugin.initializeKmeans(arguments) val trainFrameRdd = frame.rdd + require(!trainFrameRdd.isEmpty(), "Train Frame is empty. Please train on a non-empty Frame.") trainFrameRdd.cache() val vectorRDD = trainFrameRdd.toDenseVectorRDDWithWeights(arguments.observationColumns, arguments.columnScalings) val kmeansModel = kMeans.run(vectorRDD) diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/PowerIterationClusteringPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/PowerIterationClusteringPlugin.scala index 34b3a7ec50..87d530faab 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/PowerIterationClusteringPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/PowerIterationClusteringPlugin.scala @@ -77,13 +77,6 @@ class PowerIterationClusteringPlugin extends SparkCommandPlugin[PowerIterationCl * e.g Python client via code generation. */ override def name: String = "model:power_iteration_clustering/predict" - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: PowerIterationClusteringArgs)(implicit invocation: Invocation) = 1 /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaPredictPlugin.scala index 3b4b94e3aa..2657b590d9 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaPredictPlugin.scala @@ -50,12 +50,6 @@ class LdaPredictPlugin extends CommandPlugin[LdaModelPredictArgs, LdaModelPredic override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - override def numberOfJobs(arguments: LdaModelPredictArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaTrainPlugin.scala index fe55860d7c..99807a8827 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/clustering/lda/LdaTrainPlugin.scala @@ -54,12 +54,6 @@ class LdaTrainPlugin override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - override def numberOfJobs(arguments: LdaTrainArgs)(implicit invocation: Invocation) = arguments.maxIterations + 14 - override def execute(arguments: LdaTrainArgs)(implicit invocation: Invocation): LdaTrainResult = { // validate arguments @@ -67,6 +61,8 @@ class LdaTrainPlugin edgeFrame.schema.requireColumnIsType(arguments.documentColumnName, DataTypes.string) edgeFrame.schema.requireColumnIsType(arguments.wordColumnName, DataTypes.string) edgeFrame.schema.requireColumnIsType(arguments.wordCountColumnName, DataTypes.isIntegerDataType) + + require(!edgeFrame.rdd.isEmpty(), "Train Frame is empty. Please train on a non-empty Frame.") require(edgeFrame.isParquet, "frame must be stored as parquet file, or support for new input format is needed") val ldaModel = LdaTrainFunctions.trainLdaModel(edgeFrame.rdd, arguments) diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringPredictPlugin.scala index d58a202b59..635f2fce44 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringPredictPlugin.scala @@ -55,6 +55,7 @@ class CollaborativeFilteringPredictPlugin override def execute(arguments: CollaborativeFilteringPredictArgs)(implicit invocation: Invocation): FrameReference = { val frames = engine.frames val edgeFrame: SparkFrame = arguments.frame + require(!edgeFrame.rdd.isEmpty(), "Edge Frame is empty. Please predict on a non-empty Frame.") val schema = edgeFrame.schema val model: Model = arguments.model val data = model.data.convertTo[CollaborativeFilteringData] diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringRecommendPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringRecommendPlugin.scala index 132ba184e8..597186cead 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringRecommendPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringRecommendPlugin.scala @@ -51,7 +51,9 @@ class CollaborativeFilteringRecommendPlugin val frames = engine.frames val userFrame = frames.loadFrameData(sc, data.userFrame) + require(!userFrame.isEmpty(), "User Frame is empty. Please use a non-empty User Frame.") val productFrame = frames.loadFrameData(sc, data.productFrame) + require(!productFrame.isEmpty(), "Product Frame is empty. Please use a non-empty Product Frame.") val alsModel = new MatrixFactorizationModel(data.rank, CollaborativeFilteringHelper.toAlsRdd(userFrame, data), CollaborativeFilteringHelper.toAlsRdd(productFrame, data)) diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringScorePlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringScorePlugin.scala index 43367b561f..28c4b6c3c8 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringScorePlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringScorePlugin.scala @@ -54,7 +54,9 @@ class CollaborativeFilteringScorePlugin val frames = engine.frames val userFrame = frames.loadFrameData(sc, data.userFrame) + require(!userFrame.isEmpty(), "User Frame is empty. Please use a non-empty User Frame.") val productFrame = frames.loadFrameData(sc, data.productFrame) + require(!productFrame.isEmpty(), "Product Frame is empty. Please use a non-empty Product Frame.") val alsModel = new MatrixFactorizationModel(data.rank, CollaborativeFilteringHelper.toAlsRdd(userFrame, data), CollaborativeFilteringHelper.toAlsRdd(productFrame, data)) diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringTrainPlugin.scala index c2a6754e71..1bcbbf40bb 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/collaborativefiltering/CollaborativeFilteringTrainPlugin.scala @@ -48,8 +48,9 @@ class CollaborativeFilteringTrainPlugin override def execute(arguments: CollaborativeFilteringTrainArgs)(implicit invocation: Invocation): UnitReturn = { val frames = engine.frames val edgeFrame: SparkFrame = arguments.frame - val schema = edgeFrame.schema + require(!edgeFrame.rdd.isEmpty(), "Edge Frame is empty. Please train on a non-empty Edge Frame.") + val schema = edgeFrame.schema schema.requireColumnIsType(arguments.sourceColumnName, DataTypes.int) schema.requireColumnIsType(arguments.destColumnName, DataTypes.int) require(edgeFrame.isParquet, "frame must be stored as parquet file, or support for new input format is needed") diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/dimensionalityreduction/PrincipalComponentsPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/dimensionalityreduction/PrincipalComponentsPredictPlugin.scala index c32aaa2f6c..acf794671f 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/dimensionalityreduction/PrincipalComponentsPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/dimensionalityreduction/PrincipalComponentsPredictPlugin.scala @@ -58,13 +58,6 @@ class PrincipalComponentsPredictPlugin extends SparkCommandPlugin[PrincipalCompo override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: PrincipalComponentsPredictArgs)(implicit invocation: Invocation) = 9 - /** * Get the predictions for observations in a test frame * @@ -79,6 +72,7 @@ class PrincipalComponentsPredictPlugin extends SparkCommandPlugin[PrincipalCompo val model: Model = arguments.model //Running MLLib + require(!frame.rdd.isEmpty(), "Predict Frame is empty. Please predict on a non-empty Frame.") val principalComponentJsObject = model.dataOption.getOrElse(throw new RuntimeException("This model has not be trained yet. Please train before trying to predict")) val principalComponentData = principalComponentJsObject.convertTo[PrincipalComponentsData] diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/dimensionalityreduction/PrincipalComponentsTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/dimensionalityreduction/PrincipalComponentsTrainPlugin.scala index e1671c73c9..9039492024 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/dimensionalityreduction/PrincipalComponentsTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/dimensionalityreduction/PrincipalComponentsTrainPlugin.scala @@ -53,12 +53,6 @@ class PrincipalComponentsTrainPlugin extends SparkCommandPlugin[PrincipalCompone */ override def name: String = "model:principal_components/train" - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - override def numberOfJobs(arguments: PrincipalComponentsTrainArgs)(implicit invocation: Invocation) = 7 - /** * Calculate principal components for the specified columns * @@ -70,6 +64,7 @@ class PrincipalComponentsTrainPlugin extends SparkCommandPlugin[PrincipalCompone override def execute(arguments: PrincipalComponentsTrainArgs)(implicit invocation: Invocation): PrincipalComponentsTrainReturn = { val model: Model = arguments.model val frame: SparkFrame = arguments.frame + require(!frame.rdd.isEmpty(), "Train Frame is empty. Please train on a non-empty Frame.") validatePrincipalComponentsArgs(frame.schema, arguments) diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmPredictPlugin.scala index f95302c281..1143e728f2 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmPredictPlugin.scala @@ -45,13 +45,6 @@ class LibSvmPredictPlugin extends SparkCommandPlugin[LibSvmPredictArgs, FrameRef override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: LibSvmPredictArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmPublishPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmPublishPlugin.scala index b19f6eb70c..c1819947a5 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmPublishPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmPublishPlugin.scala @@ -60,19 +60,6 @@ class LibSvmPublishPlugin extends CommandPlugin[ModelPublishArgs, ExportMetadata override def apiMaturityTag = Some(ApiMaturityTag.Beta) - /** - * User documentation exposed in Python. - * - * [[http://docutils.sourceforge.net/rst.html ReStructuredText]] - */ - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: ModelPublishArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmScorePlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmScorePlugin.scala index 1732dab6b0..aa73afec7a 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmScorePlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmScorePlugin.scala @@ -44,13 +44,6 @@ class LibSvmScorePlugin extends CommandPlugin[LibSvmScoreArgs, DoubleValue] { override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: LibSvmScoreArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a vector * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmTestPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmTestPlugin.scala index e9e01cd00a..07a5ce894b 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmTestPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmTestPlugin.scala @@ -56,13 +56,6 @@ class LibSvmTestPlugin extends SparkCommandPlugin[LibSvmTestArgs, Classification override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: LibSvmTestArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmTrainPlugin.scala index 8fc306c7eb..b0ad709d3c 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/libsvm/LibSvmTrainPlugin.scala @@ -49,12 +49,6 @@ class LibSvmTrainPlugin extends SparkCommandPlugin[LibSvmTrainArgs, UnitReturn] override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - override def numberOfJobs(arguments: LibSvmTrainArgs)(implicit invocation: Invocation) = 1 - /** * Run LibSvm on the training frame and create a Model for it. * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/LinearRegressionPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/LinearRegressionPredictPlugin.scala index 262032c304..060cb6d652 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/LinearRegressionPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/LinearRegressionPredictPlugin.scala @@ -61,6 +61,7 @@ class LinearRegressionPredictPlugin extends SparkCommandPlugin[LinearRegressionP val predictFrameRdd = frame.rdd val linRegJsObject = model.dataOption.getOrElse(throw new RuntimeException("This model has not be trained yet. Please train before trying to predict")) + require(!frame.rdd.isEmpty(), "Predict Frame is empty. Please predict on a non-empty Frame.") val linRegData = linRegJsObject.convertTo[LinearRegressionData] val linRegModel = linRegData.model val observationColumns = arguments.observationColumns.getOrElse(linRegData.observationColumns) diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/LinearRegressionTestPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/LinearRegressionTestPlugin.scala index 133a2ee767..566a940110 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/LinearRegressionTestPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/LinearRegressionTestPlugin.scala @@ -54,6 +54,7 @@ class LinearRegressionTestPlugin extends SparkCommandPlugin[LinearRegressionTest val model: Model = arguments.model val frame: SparkFrame = arguments.frame val testFrameRdd = frame.rdd + require(!testFrameRdd.isEmpty(), "Test Frame is empty. Please test on a non-empty Frame.") val linRegJsObject = model.dataOption.getOrElse(throw new RuntimeException("This model has not be trained yet. Please train before trying to predict")) val linRegData = linRegJsObject.convertTo[LinearRegressionData] diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/LinearRegressionTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/LinearRegressionTrainPlugin.scala index 8405210fb3..b5768cdcc2 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/LinearRegressionTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/LinearRegressionTrainPlugin.scala @@ -56,6 +56,7 @@ class LinearRegressionTrainPlugin extends SparkCommandPlugin[LinearRegressionTra val frame: SparkFrame = arguments.frame val trainFrameRdd = frame.rdd + require(!trainFrameRdd.isEmpty(), "Train Frame is empty. Please train on a non-empty Frame.") val dataFrame = trainFrameRdd.toLabeledDataFrame(arguments.valueColumn, arguments.observationColumns) val linReg = LinearRegressionTrainPlugin.initializeLinearRegressionModel(arguments) diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/RandomForestRegressorPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/RandomForestRegressorPredictPlugin.scala index aaeb459b60..b4f3f42854 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/RandomForestRegressorPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/RandomForestRegressorPredictPlugin.scala @@ -60,13 +60,6 @@ class RandomForestRegressorPredictPlugin extends SparkCommandPlugin[RandomForest override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: RandomForestRegressorPredictArgs)(implicit invocation: Invocation) = 9 - /** * Get the predictions for observations in a test frame * @@ -81,6 +74,7 @@ class RandomForestRegressorPredictPlugin extends SparkCommandPlugin[RandomForest val frame: SparkFrame = arguments.frame //Running MLLib + require(!frame.rdd.isEmpty(), "Predict Frame is empty. Please predict on a non-empty Frame.") val rfData = model.readFromStorage().convertTo[RandomForestRegressorData] val rfModel = rfData.randomForestModel if (arguments.observationColumns.isDefined) { diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/RandomForestRegressorPublishPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/RandomForestRegressorPublishPlugin.scala index 1c5d20c660..f13e4f084d 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/RandomForestRegressorPublishPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/RandomForestRegressorPublishPlugin.scala @@ -52,19 +52,6 @@ class RandomForestRegressorPublishPlugin extends CommandPlugin[ModelPublishArgs, override def apiMaturityTag = Some(ApiMaturityTag.Beta) - /** - * User documentation exposed in Python. - * - * [[http://docutils.sourceforge.net/rst.html ReStructuredText]] - */ - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: ModelPublishArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/RandomForestRegressorTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/RandomForestRegressorTrainPlugin.scala index 8affafe07e..9030709744 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/RandomForestRegressorTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/regression/RandomForestRegressorTrainPlugin.scala @@ -98,12 +98,6 @@ class RandomForestRegressorTrainPlugin extends SparkCommandPlugin[RandomForestRe override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - override def numberOfJobs(arguments: RandomForestRegressorTrainArgs)(implicit invocation: Invocation) = 109 - /** * Run MLLib's RandomForest trainRegressor on the training frame and create a Model for it. * @@ -118,6 +112,7 @@ class RandomForestRegressorTrainPlugin extends SparkCommandPlugin[RandomForestRe val model: Model = arguments.model //create RDD from the frame + require(!frame.rdd.isEmpty(), "Train Frame is empty. Please train on a non-empty Frame.") val labeledTrainRdd: RDD[LabeledPoint] = frame.rdd.toLabeledPointRDD(arguments.valueColumn, arguments.observationColumns) val randomForestModel = RandomForest.trainRegressor(labeledTrainRdd, arguments.getCategoricalFeaturesInfo, arguments.numTrees, arguments.getFeatureSubsetCategory, arguments.impurity, arguments.maxDepth, arguments.maxBins, arguments.seed) diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/timeseries/ARXPredictPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/timeseries/ARXPredictPlugin.scala index b8d154b48b..d2b52b18a4 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/timeseries/ARXPredictPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/timeseries/ARXPredictPlugin.scala @@ -54,14 +54,6 @@ class ARXPredictPlugin extends SparkCommandPlugin[ARXPredictArgs, FrameReference override def name: String = "model:arx/predict" override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: ARXPredictArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * @@ -74,7 +66,7 @@ class ARXPredictPlugin extends SparkCommandPlugin[ARXPredictArgs, FrameReference override def execute(arguments: ARXPredictArgs)(implicit invocation: Invocation): FrameReference = { val frame: SparkFrame = arguments.frame val model: Model = arguments.model - + require(!frame.rdd.isEmpty(), "Predict Frame is empty. Please predict on a non-empty Frame.") //Extracting the ARXModel from the stored JsObject val arxData = model.data.convertTo[ARXData] val arxModel = arxData.arxModel diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/timeseries/ARXPublishPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/timeseries/ARXPublishPlugin.scala index 617a8c0895..9f9256f12c 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/timeseries/ARXPublishPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/timeseries/ARXPublishPlugin.scala @@ -54,20 +54,6 @@ class ARXPublishPlugin extends CommandPlugin[ModelPublishArgs, ExportMetadata] { override def name: String = "model:arx/publish" override def apiMaturityTag = Some(ApiMaturityTag.Beta) - - /** - * User documentation exposed in Python. - * - * [[http://docutils.sourceforge.net/rst.html ReStructuredText]] - */ - - /** - * Number of Spark jobs that get created by running this command - * (this configuration is used to prevent multiple progress bars in Python client) - */ - - override def numberOfJobs(arguments: ModelPublishArgs)(implicit invocation: Invocation) = 1 - /** * Get the predictions for observations in a test frame * diff --git a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/timeseries/ARXTrainPlugin.scala b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/timeseries/ARXTrainPlugin.scala index fd804a424c..5bc09c20b0 100644 --- a/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/timeseries/ARXTrainPlugin.scala +++ b/engine-plugins/model-plugins/src/main/scala/org/trustedanalytics/atk/engine/model/plugins/timeseries/ARXTrainPlugin.scala @@ -57,14 +57,6 @@ class ARXTrainPlugin extends SparkCommandPlugin[ARXTrainArgs, ARXTrainReturn] { override def name: String = "model:arx/train" override def apiMaturityTag = Some(ApiMaturityTag.Alpha) - - /** - * Number of Spark jobs that get created by running this command - * - * (this configuration is used to prevent multiple progress bars in Python client) - */ - override def numberOfJobs(arguments: ARXTrainArgs)(implicit invocation: Invocation) = 15 - /** * Run the spark time series ARX fitmodel() on the training frame and create a Model for it. * @@ -78,6 +70,7 @@ class ARXTrainPlugin extends SparkCommandPlugin[ARXTrainArgs, ARXTrainReturn] { val frame: SparkFrame = arguments.frame val model = arguments.model val trainFrameRdd = frame.rdd + require(!trainFrameRdd.isEmpty(), "Train Frame is empty. Please train on a non-empty Frame.") trainFrameRdd.cache()