zzn2 · dwt0317 · Mar 17, 2020 · Mar 17, 2020 · Mar 26, 2020 · Mar 26, 2020
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+.git.zip
+.DS_Store
diff --git a/0.1_official_module_with_invoker.zip b/0.1_official_module_with_invoker.zip
diff --git a/0.1_official_module_with_invoker/clean_missing_data.yaml b/0.1_official_module_with_invoker/clean_missing_data.yaml
@@ -0,0 +1,133 @@
+moduleIdentifier:
+  namespace: zhizhu.com/test
+  moduleName: New Clean Missing Data
+  moduleVersion: 0.0.116.1
+isDeterministic: true
+category: Data Transformation
+description: Specifies how to handle the values missing from a dataset.
+releaseState: Release
+inputs:
+- name: Dataset
+  type: DataFrameDirectory
+  port: true
+  description: Dataset to be cleaned
+- name: Columns to be cleaned
+  type: ColumnPicker
+  description: Columns for missing values clean operation
+  columnPickerFor: Dataset
+- name: Minimum missing value ratio
+  type: Float
+  description: Clean only column with missing value ratio above specified value, out
+    of set of all selected columns
+  default: 0.0
+  min: 0.0
+  max: 1.0
+- name: Maximum missing value ratio
+  type: Float
+  default: 1.0
+  description: Clean only columns with missing value ratio below specified value,
+    out of set of all selected columns
+  min: 0.0
+  max: 1.0
+- name: Cleaning mode
+  type: Mode
+  default: Custom substitution value
+  description: Algorithm to clean missing values
+  options:
+  - Custom substitution value:
+    - name: Replacement value
+      type: String
+      default: '0'
+      optional: true
+      description: Type the value that takes the place of missing values
+    - name: Generate missing value indicator column
+      type: Boolean
+      description: Generate a column that indicates which rows were cleaned
+  - Replace with mean:
+    - name: Cols with all missing values
+      type: Mode
+      default: Remove
+      description: Cols with all missing values
+      options:
+      - Propagate
+      - Remove
+    - name: Generate missing value indicator column
+      type: Boolean
+      description: Generate a column that indicates which rows were cleaned
+  - Replace with median:
+    - name: Cols with all missing values
+      type: Mode
+      default: Remove
+      description: Cols with all missing values
+      options:
+      - Propagate
+      - Remove
+    - name: Generate missing value indicator column
+      type: Boolean
+      description: Generate a column that indicates which rows were cleaned
+  - Replace with mode:
+    - name: Cols with all missing values
+      type: Mode
+      default: Remove
+      description: Cols with all missing values
+      options:
+      - Propagate
+      - Remove
+    - name: Generate missing value indicator column
+      type: Boolean
+      description: Generate a column that indicates which rows were cleaned
+  - Remove entire row
+  - Remove entire column
+outputs:
+- name: Cleaned dataset
+  type: DataFrameDirectory
+  description: Cleaned dataset
+- name: Cleaning transformation
+  type: TransformationDirectory
+  description: Transformation to be passed to Apply Transformation module to clean
+    new data
+implementation:
+  container:
+    runConfig:
+      baseDockerImage: mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04
+      gpuSupport: false
+    conda:
+      name: project_environment
+      channels:
+      - defaults
+      dependencies:
+      - python=3.6.8
+      - pip:
+        - azureml-designer-classic-modules==0.0.116
+    command:
+    - python
+    - invoker.py
+    - python
+    - -m
+    - azureml.studio.modulehost.module_invoker
+    - --module-name=azureml.studio.modules.datatransform.clean_missing_data.clean_missing_data
+    args:
+    - --dataset
+    - inputPath: Dataset
+    - --columns-to-be-cleaned
+    - inputValue: Columns to be cleaned
+    - --minimum-missing-value-ratio
+    - inputValue: Minimum missing value ratio
+    - --maximum-missing-value-ratio
+    - inputValue: Maximum missing value ratio
+    - --cleaning-mode
+    - inputValue: Cleaning mode
+    - - --replacement-value
+      - inputValue: Replacement value
+    - - --cols-with-all-missing-values
+      - inputValue: Cols with all missing values
+    - - --generate-missing-value-indicator-column
+      - inputValue: Generate missing value indicator column
+    - --cleaned-dataset
+    - outputPath: Cleaned dataset
+    - --cleaning-transformation
+    - outputPath: Cleaning transformation
+  invoking:
+    module: azureml.studio.modules.datatransform.clean_missing_data.clean_missing_data
+    class: CleanMissingDataModule
+    func: run
diff --git a/0.1_official_module_with_invoker/invoker.py b/0.1_official_module_with_invoker/invoker.py
@@ -0,0 +1,36 @@
+import subprocess
+import sys
+
+
+def run(command: list, timeout=60000):
+    if not command:
+        return
+
+    return subprocess.Popen(command, stdout=sys.stdout, stderr=sys.stderr).wait(timeout=timeout)
+
+
+INVOKER_VERSION = '0.0.6'
+
+
+def is_invoking_official_module(args):
+    return len(args) >= 3 and args[0] == 'python' and args[1] == '-m' and args[2].startswith('azureml.studio.')
+
+
+def generate_run_command(args):
+    return [arg for arg in args]
+
+
+def execute(args):
+    is_custom_module = not is_invoking_official_module(args)
+    module_type = 'custom module' if is_custom_module else 'official module'
+    print('Invoking {} by invoker {}.'.format(module_type, INVOKER_VERSION))
+
+    ret = run(generate_run_command(args))
+
+    # set the subprocess run result as exit value
+    exit(ret)
+
+
+if __name__ == '__main__':
+    args = sys.argv[1:]
+    execute(args)
diff --git a/0_official_module.zip b/0_official_module.zip
diff --git a/0_official_module/clean_missing_data.yaml b/0_official_module/clean_missing_data.yaml
@@ -0,0 +1,131 @@
+moduleIdentifier:
+  namespace: zhizhu.com/test
+  moduleName: New Clean Missing Data
+  moduleVersion: 0.0.116
+isDeterministic: true
+category: Data Transformation
+description: Specifies how to handle the values missing from a dataset.
+releaseState: Release
+inputs:
+- name: Dataset
+  type: DataFrameDirectory
+  port: true
+  description: Dataset to be cleaned
+- name: Columns to be cleaned
+  type: ColumnPicker
+  description: Columns for missing values clean operation
+  columnPickerFor: Dataset
+- name: Minimum missing value ratio
+  type: Float
+  description: Clean only column with missing value ratio above specified value, out
+    of set of all selected columns
+  default: 0.0
+  min: 0.0
+  max: 1.0
+- name: Maximum missing value ratio
+  type: Float
+  default: 1.0
+  description: Clean only columns with missing value ratio below specified value,
+    out of set of all selected columns
+  min: 0.0
+  max: 1.0
+- name: Cleaning mode
+  type: Mode
+  default: Custom substitution value
+  description: Algorithm to clean missing values
+  options:
+  - Custom substitution value:
+    - name: Replacement value
+      type: String
+      default: '0'
+      optional: true
+      description: Type the value that takes the place of missing values
+    - name: Generate missing value indicator column
+      type: Boolean
+      description: Generate a column that indicates which rows were cleaned
+  - Replace with mean:
+    - name: Cols with all missing values
+      type: Mode
+      default: Remove
+      description: Cols with all missing values
+      options:
+      - Propagate
+      - Remove
+    - name: Generate missing value indicator column
+      type: Boolean
+      description: Generate a column that indicates which rows were cleaned
+  - Replace with median:
+    - name: Cols with all missing values
+      type: Mode
+      default: Remove
+      description: Cols with all missing values
+      options:
+      - Propagate
+      - Remove
+    - name: Generate missing value indicator column
+      type: Boolean
+      description: Generate a column that indicates which rows were cleaned
+  - Replace with mode:
+    - name: Cols with all missing values
+      type: Mode
+      default: Remove
+      description: Cols with all missing values
+      options:
+      - Propagate
+      - Remove
+    - name: Generate missing value indicator column
+      type: Boolean
+      description: Generate a column that indicates which rows were cleaned
+  - Remove entire row
+  - Remove entire column
+outputs:
+- name: Cleaned dataset
+  type: DataFrameDirectory
+  description: Cleaned dataset
+- name: Cleaning transformation
+  type: TransformationDirectory
+  description: Transformation to be passed to Apply Transformation module to clean
+    new data
+implementation:
+  container:
+    runConfig:
+      baseDockerImage: mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04
+      gpuSupport: false
+    conda:
+      name: project_environment
+      channels:
+      - defaults
+      dependencies:
+      - python=3.6.8
+      - pip:
+        - azureml-designer-classic-modules==0.0.116
+    command:
+    - python
+    - -m
+    - azureml.studio.modulehost.module_invoker
+    - --module-name=azureml.studio.modules.datatransform.clean_missing_data.clean_missing_data
+    args:
+    - --dataset
+    - inputPath: Dataset
+    - --columns-to-be-cleaned
+    - inputValue: Columns to be cleaned
+    - --minimum-missing-value-ratio
+    - inputValue: Minimum missing value ratio
+    - --maximum-missing-value-ratio
+    - inputValue: Maximum missing value ratio
+    - --cleaning-mode
+    - inputValue: Cleaning mode
+    - - --replacement-value
+      - inputValue: Replacement value
+    - - --cols-with-all-missing-values
+      - inputValue: Cols with all missing values
+    - - --generate-missing-value-indicator-column
+      - inputValue: Generate missing value indicator column
+    - --cleaned-dataset
+    - outputPath: Cleaned dataset
+    - --cleaning-transformation
+    - outputPath: Cleaning transformation
+  invoking:
+    module: azureml.studio.modules.datatransform.clean_missing_data.clean_missing_data
+    class: CleanMissingDataModule
+    func: run
diff --git a/0_official_module/invoker.py b/0_official_module/invoker.py
@@ -0,0 +1,36 @@
+import subprocess
+import sys
+
+
+def run(command: list, timeout=60000):
+    if not command:
+        return
+
+    return subprocess.Popen(command, stdout=sys.stdout, stderr=sys.stderr).wait(timeout=timeout)
+
+
+INVOKER_VERSION = '0.0.6'
+
+
+def is_invoking_official_module(args):
+    return len(args) >= 3 and args[0] == 'python' and args[1] == '-m' and args[2].startswith('azureml.studio.')
+
+
+def generate_run_command(args):
+    return [arg for arg in args]
+
+
+def execute(args):
+    is_custom_module = not is_invoking_official_module(args)
+    module_type = 'custom module' if is_custom_module else 'official module'
+    print('Invoking {} by invoker {}.'.format(module_type, INVOKER_VERSION))
+
+    ret = run(generate_run_command(args))
+
+    # set the subprocess run result as exit value
+    exit(ret)
+
+
+if __name__ == '__main__':
+    args = sys.argv[1:]
+    execute(args)
diff --git a/10_invalid_yaml.zip b/10_invalid_yaml.zip
diff --git a/10_invalid_yaml/invalid.yaml b/10_invalid_yaml/invalid.yaml
@@ -0,0 +1 @@
+hello:
diff --git a/10_invalid_yaml/invalid2.yaml b/10_invalid_yaml/invalid2.yaml
@@ -0,0 +1 @@
+incomplete_list: [
diff --git a/10_invalid_yaml/invalid3.yaml b/10_invalid_yaml/invalid3.yaml
@@ -0,0 +1 @@
+666
diff --git a/10_invalid_yaml/invalid4.yaml b/10_invalid_yaml/invalid4.yaml
@@ -0,0 +1,6 @@
+import sys
+
+
+if __name__ == '__main__':
+    print(sys.argv)
+
diff --git a/10_invalid_yaml/merge.py b/10_invalid_yaml/merge.py
@@ -0,0 +1,6 @@
+import sys
+
+
+if __name__ == '__main__':
+    print(sys.argv)
+