Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.git.zip
.DS_Store
Binary file added 0.1_official_module_with_invoker.zip
Binary file not shown.
133 changes: 133 additions & 0 deletions 0.1_official_module_with_invoker/clean_missing_data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
moduleIdentifier:
namespace: zhizhu.com/test
moduleName: New Clean Missing Data
moduleVersion: 0.0.116.1
isDeterministic: true
category: Data Transformation
description: Specifies how to handle the values missing from a dataset.
releaseState: Release
inputs:
- name: Dataset
type: DataFrameDirectory
port: true
description: Dataset to be cleaned
- name: Columns to be cleaned
type: ColumnPicker
description: Columns for missing values clean operation
columnPickerFor: Dataset
- name: Minimum missing value ratio
type: Float
description: Clean only column with missing value ratio above specified value, out
of set of all selected columns
default: 0.0
min: 0.0
max: 1.0
- name: Maximum missing value ratio
type: Float
default: 1.0
description: Clean only columns with missing value ratio below specified value,
out of set of all selected columns
min: 0.0
max: 1.0
- name: Cleaning mode
type: Mode
default: Custom substitution value
description: Algorithm to clean missing values
options:
- Custom substitution value:
- name: Replacement value
type: String
default: '0'
optional: true
description: Type the value that takes the place of missing values
- name: Generate missing value indicator column
type: Boolean
description: Generate a column that indicates which rows were cleaned
- Replace with mean:
- name: Cols with all missing values
type: Mode
default: Remove
description: Cols with all missing values
options:
- Propagate
- Remove
- name: Generate missing value indicator column
type: Boolean
description: Generate a column that indicates which rows were cleaned
- Replace with median:
- name: Cols with all missing values
type: Mode
default: Remove
description: Cols with all missing values
options:
- Propagate
- Remove
- name: Generate missing value indicator column
type: Boolean
description: Generate a column that indicates which rows were cleaned
- Replace with mode:
- name: Cols with all missing values
type: Mode
default: Remove
description: Cols with all missing values
options:
- Propagate
- Remove
- name: Generate missing value indicator column
type: Boolean
description: Generate a column that indicates which rows were cleaned
- Remove entire row
- Remove entire column
outputs:
- name: Cleaned dataset
type: DataFrameDirectory
description: Cleaned dataset
- name: Cleaning transformation
type: TransformationDirectory
description: Transformation to be passed to Apply Transformation module to clean
new data
implementation:
container:
runConfig:
baseDockerImage: mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04
gpuSupport: false
conda:
name: project_environment
channels:
- defaults
dependencies:
- python=3.6.8
- pip:
- azureml-designer-classic-modules==0.0.116
command:
- python
- invoker.py
- python
- -m
- azureml.studio.modulehost.module_invoker
- --module-name=azureml.studio.modules.datatransform.clean_missing_data.clean_missing_data
args:
- --dataset
- inputPath: Dataset
- --columns-to-be-cleaned
- inputValue: Columns to be cleaned
- --minimum-missing-value-ratio
- inputValue: Minimum missing value ratio
- --maximum-missing-value-ratio
- inputValue: Maximum missing value ratio
- --cleaning-mode
- inputValue: Cleaning mode
- - --replacement-value
- inputValue: Replacement value
- - --cols-with-all-missing-values
- inputValue: Cols with all missing values
- - --generate-missing-value-indicator-column
- inputValue: Generate missing value indicator column
- --cleaned-dataset
- outputPath: Cleaned dataset
- --cleaning-transformation
- outputPath: Cleaning transformation
invoking:
module: azureml.studio.modules.datatransform.clean_missing_data.clean_missing_data
class: CleanMissingDataModule
func: run
36 changes: 36 additions & 0 deletions 0.1_official_module_with_invoker/invoker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import subprocess
import sys


def run(command: list, timeout=60000):
if not command:
return

return subprocess.Popen(command, stdout=sys.stdout, stderr=sys.stderr).wait(timeout=timeout)


INVOKER_VERSION = '0.0.6'


def is_invoking_official_module(args):
return len(args) >= 3 and args[0] == 'python' and args[1] == '-m' and args[2].startswith('azureml.studio.')


def generate_run_command(args):
return [arg for arg in args]


def execute(args):
is_custom_module = not is_invoking_official_module(args)
module_type = 'custom module' if is_custom_module else 'official module'
print('Invoking {} by invoker {}.'.format(module_type, INVOKER_VERSION))

ret = run(generate_run_command(args))

# set the subprocess run result as exit value
exit(ret)


if __name__ == '__main__':
args = sys.argv[1:]
execute(args)
Binary file added 0_official_module.zip
Binary file not shown.
131 changes: 131 additions & 0 deletions 0_official_module/clean_missing_data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
moduleIdentifier:
namespace: zhizhu.com/test
moduleName: New Clean Missing Data
moduleVersion: 0.0.116
isDeterministic: true
category: Data Transformation
description: Specifies how to handle the values missing from a dataset.
releaseState: Release
inputs:
- name: Dataset
type: DataFrameDirectory
port: true
description: Dataset to be cleaned
- name: Columns to be cleaned
type: ColumnPicker
description: Columns for missing values clean operation
columnPickerFor: Dataset
- name: Minimum missing value ratio
type: Float
description: Clean only column with missing value ratio above specified value, out
of set of all selected columns
default: 0.0
min: 0.0
max: 1.0
- name: Maximum missing value ratio
type: Float
default: 1.0
description: Clean only columns with missing value ratio below specified value,
out of set of all selected columns
min: 0.0
max: 1.0
- name: Cleaning mode
type: Mode
default: Custom substitution value
description: Algorithm to clean missing values
options:
- Custom substitution value:
- name: Replacement value
type: String
default: '0'
optional: true
description: Type the value that takes the place of missing values
- name: Generate missing value indicator column
type: Boolean
description: Generate a column that indicates which rows were cleaned
- Replace with mean:
- name: Cols with all missing values
type: Mode
default: Remove
description: Cols with all missing values
options:
- Propagate
- Remove
- name: Generate missing value indicator column
type: Boolean
description: Generate a column that indicates which rows were cleaned
- Replace with median:
- name: Cols with all missing values
type: Mode
default: Remove
description: Cols with all missing values
options:
- Propagate
- Remove
- name: Generate missing value indicator column
type: Boolean
description: Generate a column that indicates which rows were cleaned
- Replace with mode:
- name: Cols with all missing values
type: Mode
default: Remove
description: Cols with all missing values
options:
- Propagate
- Remove
- name: Generate missing value indicator column
type: Boolean
description: Generate a column that indicates which rows were cleaned
- Remove entire row
- Remove entire column
outputs:
- name: Cleaned dataset
type: DataFrameDirectory
description: Cleaned dataset
- name: Cleaning transformation
type: TransformationDirectory
description: Transformation to be passed to Apply Transformation module to clean
new data
implementation:
container:
runConfig:
baseDockerImage: mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04
gpuSupport: false
conda:
name: project_environment
channels:
- defaults
dependencies:
- python=3.6.8
- pip:
- azureml-designer-classic-modules==0.0.116
command:
- python
- -m
- azureml.studio.modulehost.module_invoker
- --module-name=azureml.studio.modules.datatransform.clean_missing_data.clean_missing_data
args:
- --dataset
- inputPath: Dataset
- --columns-to-be-cleaned
- inputValue: Columns to be cleaned
- --minimum-missing-value-ratio
- inputValue: Minimum missing value ratio
- --maximum-missing-value-ratio
- inputValue: Maximum missing value ratio
- --cleaning-mode
- inputValue: Cleaning mode
- - --replacement-value
- inputValue: Replacement value
- - --cols-with-all-missing-values
- inputValue: Cols with all missing values
- - --generate-missing-value-indicator-column
- inputValue: Generate missing value indicator column
- --cleaned-dataset
- outputPath: Cleaned dataset
- --cleaning-transformation
- outputPath: Cleaning transformation
invoking:
module: azureml.studio.modules.datatransform.clean_missing_data.clean_missing_data
class: CleanMissingDataModule
func: run
36 changes: 36 additions & 0 deletions 0_official_module/invoker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import subprocess
import sys


def run(command: list, timeout=60000):
if not command:
return

return subprocess.Popen(command, stdout=sys.stdout, stderr=sys.stderr).wait(timeout=timeout)


INVOKER_VERSION = '0.0.6'


def is_invoking_official_module(args):
return len(args) >= 3 and args[0] == 'python' and args[1] == '-m' and args[2].startswith('azureml.studio.')


def generate_run_command(args):
return [arg for arg in args]


def execute(args):
is_custom_module = not is_invoking_official_module(args)
module_type = 'custom module' if is_custom_module else 'official module'
print('Invoking {} by invoker {}.'.format(module_type, INVOKER_VERSION))

ret = run(generate_run_command(args))

# set the subprocess run result as exit value
exit(ret)


if __name__ == '__main__':
args = sys.argv[1:]
execute(args)
Binary file added 10_invalid_yaml.zip
Binary file not shown.
1 change: 1 addition & 0 deletions 10_invalid_yaml/invalid.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello:
1 change: 1 addition & 0 deletions 10_invalid_yaml/invalid2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
incomplete_list: [
1 change: 1 addition & 0 deletions 10_invalid_yaml/invalid3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
666
6 changes: 6 additions & 0 deletions 10_invalid_yaml/invalid4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import sys


if __name__ == '__main__':
print(sys.argv)

6 changes: 6 additions & 0 deletions 10_invalid_yaml/merge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import sys


if __name__ == '__main__':
print(sys.argv)

Loading