Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
5157cb5
resolving clashes with LightGBM version 3.2.1.99
May 18, 2022
471db96
Implemented the FairGBM parameter descriptor
May 23, 2022
6bc5519
all tests now pass
May 27, 2022
dd25f6f
passing necessary group information to LightGBM c++ metadata class
May 31, 2022
b0115d3
checking whether sensitive group column is in categorical format
Jun 1, 2022
5750e9d
added tests for FairGBM openml interface
Jun 1, 2022
40c39ce
fixing bug on replace of ImmutableMap
Jun 2, 2022
43a4097
tests now pass
Jun 2, 2022
9fc17f4
remove debug messages
Jun 2, 2022
86bb609
tidying code according to PR feedback
Jun 3, 2022
33a63d3
constraint_group data is now held in int instead of float for compati…
Jun 7, 2022
2733056
applying PR feedback
Jun 14, 2022
6b021c8
updated lightgbm pom to point to latest python-api branch
AndreFCruz Jun 27, 2022
1aa9913
removed deprecated code
AndreFCruz Jun 29, 2022
a624bf3
improving memory management of SWIG data
AndreFCruz Jun 29, 2022
d103d68
running intellij code cleanup
AndreFCruz Jun 30, 2022
95c57ac
Revert H2OApp changes.
fdz-sergio-jesus Jul 1, 2022
425f69c
Revert H2OApp changes on all files.
fdz-sergio-jesus Jul 1, 2022
3e91876
Small fixes to typing, and javadocs.
fdz-sergio-jesus Jul 1, 2022
08be7c9
udpated make-lightgbm submodule
AndreFCruz Jul 6, 2022
2c88181
update lightgbm version
AndreFCruz Jul 6, 2022
e0dd26d
asserting fairnessConstrained=True before setting group data
AndreFCruz Jul 7, 2022
1e03207
moved all FairGBM-specific input handling to a separate class
AndreFCruz Jul 7, 2022
227c4f2
fairgbm input processing
AndreFCruz Jul 7, 2022
04c0ad1
ensure that loaded file is properly closed
AndreFCruz Jul 7, 2022
9927507
Update openml-lightgbm/lightgbm-provider/src/main/java/com/feedzai/op…
AndreFCruz Jul 8, 2022
92ea39d
Apply suggestions from code review
AndreFCruz Jul 8, 2022
c83737b
applied PR feedback
AndreFCruz Jul 8, 2022
e405a08
added links to appropriate GH issues
AndreFCruz Jul 8, 2022
df6ff35
speeding up system tests by running only 2 boosting iterations
AndreFCruz Jul 8, 2022
cf0dd85
Tests fix.
fdz-sergio-jesus Jul 8, 2022
ddaea86
Restore comparison to fairnessConstrained SWIG objects.
fdz-sergio-jesus Jul 8, 2022
9eb4cdc
Change to comparison when set has size of only two elements.
fdz-sergio-jesus Jul 11, 2022
a9b33db
applying PR feedback
AndreFCruz Jul 11, 2022
f5e87a6
disallowing usage of RF with FairGBM
AndreFCruz Jul 11, 2022
62030e5
DescriptorUtilTest
AndreFCruz Jul 11, 2022
905c2e9
allocating ML models with try/with
AndreFCruz Jul 11, 2022
5db8df3
testing FairGBMDescriptorUtil
AndreFCruz Jul 11, 2022
fa349d6
reducing code complexity
AndreFCruz Jul 11, 2022
023660a
Docstring update from Iva
AlbertoEAF Jul 11, 2022
3cdeebe
Tests for GBM Providers classes.
fdz-sergio-jesus Jul 12, 2022
e0ede65
Apply suggestions from documentation review
AndreFCruz Jul 12, 2022
eff6cf2
Apply suggestions from documentation review
AndreFCruz Jul 12, 2022
dd91b30
making global FPR/FNR constraint documentation clearer
AndreFCruz Jul 12, 2022
807828f
asserting sensitive attribute is only loaded for constrained optimiza…
AndreFCruz Jul 12, 2022
b0f7cd6
Change methods names in providers tests, move property to inside test…
fdz-sergio-jesus Jul 12, 2022
9dfcb7c
updating UI docstring for the global target FPR/FNR
AndreFCruz Jul 12, 2022
1a83e84
Merge branch 'fairgbm-api-implementation' of github.com:AndreFCruz/fe…
AndreFCruz Jul 12, 2022
05165d9
Set better line spacing.
fdz-sergio-jesus Jul 12, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions openml-lightgbm/lightgbm-builder/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,26 @@

<groupId>com.feedzai.openml.lightgbm</groupId>
<artifactId>lightgbm-lib</artifactId>
<version>3.0.1-with_model_locale_fix_for_java_and_streaming</version>
<version>v3.2.1-fairgbm-alpha</version>

<packaging>jar</packaging>
<name>Openml LightGBM lib</name>
<description>
LightGBM build for Java generated with make-lightgbm.
</description>
<url>https://github.com/feedzai/make-lightgbm</url>

<properties>
<!-- Microsoft hasn't merged our model-locale-fix patch yet. -->
<!--<lightgbm.repo.url>https://github.com/microsoft/LightGBM</lightgbm.repo.url>-->
<lightgbm.repo.url>https://github.com/feedzai/LightGBM.git</lightgbm.repo.url>
<lightgbmlib.version>3.0.1-with_model_locale_fix_for_java_and_streaming</lightgbmlib.version>
<lightgbm.version>v3.0.1-with_model_locale_fix_for_java_and_streaming</lightgbm.version>
<!-- Microsoft LightGBM -->
<!-- <lightgbm.repo.url>https://github.com/microsoft/LightGBM</lightgbm.repo.url> -->

<!-- Feedzai's custom LightGBM -->
<!-- <lightgbm.repo.url>https://github.com/feedzai/LightGBM.git</lightgbm.repo.url> -->

<!-- Feedzai's FairGBM! -->
<lightgbm.repo.url>https://github.com/feedzai/fairgbm.git</lightgbm.repo.url>

<lightgbm.version>main-fairgbm</lightgbm.version>
<lightgbmlib.version>v3.2.1-fairgbm-alpha</lightgbmlib.version>
</properties>

<build>
Expand Down
4 changes: 2 additions & 2 deletions openml-lightgbm/lightgbm-provider/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
<groupId>com.feedzai</groupId>
<artifactId>openml-lightgbm</artifactId>

<description>OpenML Microsoft LightGBM Machine Learning Model and Classifier provider</description>
<description>OpenML LightGBM Machine Learning Model and Classifier provider</description>

<properties>
<lightgbmlib.version>3.0.1-with_model_locale_fix_for_java_and_streaming</lightgbmlib.version>
<lightgbmlib.version>v3.2.1-fairgbm-alpha</lightgbmlib.version>
</properties>

<dependencies>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package com.feedzai.openml.provider.lightgbm;

import com.feedzai.openml.provider.descriptor.fieldtype.NumericFieldType;

public abstract class AlgoDescriptorUtil {

/**
* An alias to ease the readability of parameters' configuration that are not mandatory.
*/
protected static final boolean NOT_MANDATORY = false;

/**
* An alias to ease the readability of parameters' configuration that are not mandatory.
*/
protected static final boolean MANDATORY = true;

/**
* Helper method to return a range of type DOUBLE.
*
* @param minValue Minimum allowed value.
* @param maxValue Maximum allowed value.
* @param defaultValue Default value.
* @return Double range with the specs above.
*/
protected static NumericFieldType doubleRange(final double minValue,
final double maxValue,
final double defaultValue) {
return NumericFieldType.range(minValue, maxValue, NumericFieldType.ParameterConfigType.DOUBLE, defaultValue);
}

/**
* Helper method to return a range of type INT.
*
* @param minValue Minimum allowed value.
* @param maxValue Maximum allowed value.
* @param defaultValue Default value.
* @return Integer range with the specs above.
*/
protected static NumericFieldType intRange(final int minValue,
final int maxValue,
final int defaultValue) {
return NumericFieldType.range(minValue, maxValue, NumericFieldType.ParameterConfigType.INT, defaultValue);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
/*
* Copyright 2022 Feedzai
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package com.feedzai.openml.provider.lightgbm;

import com.feedzai.openml.provider.descriptor.ModelParameter;
import com.feedzai.openml.provider.descriptor.fieldtype.ChoiceFieldType;
import com.feedzai.openml.provider.descriptor.fieldtype.FreeTextFieldType;
import com.feedzai.openml.provider.descriptor.fieldtype.NumericFieldType;
import com.google.common.collect.ImmutableSet;

import com.google.common.collect.Sets;
import java.util.Set;
import java.util.stream.Collectors;

/**
* Utility to organize all the necessary Machine Learning Hyper-Parameters for configuring the training of LightGBM.
*
* @author Andre Cruz (andre.cruz@feedzai.com)
* @since 1.3.6
*/
public class FairGBMDescriptorUtil extends LightGBMDescriptorUtil {

public static final String CONSTRAINT_GROUP_COLUMN_PARAMETER_NAME = "constraint_group_column";

/**
* Defines the set of model parameters supported by the FairGBM algorithm.
*/
static final Set<ModelParameter> PARAMS = Sets.union(ImmutableSet.of(
// The single parameter that will change for every different dataset
new ModelParameter(
CONSTRAINT_GROUP_COLUMN_PARAMETER_NAME,
"(Fairness) Sensitive group column",
"Fairness constraints are enforced over this column.\n"
+ "This column must be in categorical format.\n"
+ "Start this string with `name:` to use the name of a column, \n"
+ "e.g., `name:age_group` for a column named `age_group`.",
MANDATORY,
new FreeTextFieldType("")
// new FreeTextFieldType("", ".+") # TODO: https://github.com/feedzai/feedzai-openml/issues/68
),

new ModelParameter(
"constraint_type",
"(Fairness) Constraint type",
"Enforces group-wise parity on the given target metric for the selected group column. "
+ "In general, FPR can be used for most detection settings "
+ "to equalize the negative outcomes on legitimate individuals "
+ "(false positives).",
NOT_MANDATORY,
new ChoiceFieldType(ImmutableSet.of("FPR", "FNR", "FPR,FNR"), "FPR")
),

// Parameters related to global constraints
new ModelParameter(
"global_constraint_type",
"(Fairness) Global constraint type",
"FairGBM modifies the output scores to meet your target FPR and/or FNR as well as "
+ "fairness at a decision threshold of approximately 0.5 (or 500 in Pulse). Set parameters "
+ "(Fairness) Global target FPR/FNR accordingly. Using decision thresholds far from 0.5 "
+ "will not ensure fairness.",
NOT_MANDATORY,
new ChoiceFieldType(ImmutableSet.of("FPR", "FNR", "FPR,FNR"), "FPR,FNR")
),
new ModelParameter(
"global_target_fpr",
"(Fairness) Global target FPR",
"This parameter is only active when '(Fairness) Global constraint type' includes "
+ "'FPR'. This is an inequality constraint: inactive when FPR is lower than the target. "
+ "Oftentimes, some tension is required between global FPR and FNR constraints in order to "
+ "achieve the target values (in these cases pick 'FPR,FNR' for the '(Fairness) Global "
+ "constraint type' parameter).",
NOT_MANDATORY,
doubleRange(0.0, 1.0, 0.05)
),
new ModelParameter(
"global_target_fnr",
"(Fairness) Global target FNR",
"This parameter is only active when '(Fairness) Global constraint type' includes "
+ "'FNR'. This is an inequality constraint: inactive when FNR is lower than the target. "
+ "Oftentimes, some tension is required between global FPR and FNR constraints in order to "
+ "achieve the target values (in these cases pick 'FPR,FNR' for the '(Fairness) Global "
+ "constraint type' parameter).",
NOT_MANDATORY,
doubleRange(0.0, 1.0, 0.5)
),

new ModelParameter(
"objective",
"(Fairness) Objective function",
"For FairGBM you must use a constrained optimization function. "
+ "`constrained_cross_entropy` is recommended for most cases.",
NOT_MANDATORY,
new ChoiceFieldType(
ImmutableSet.of("constrained_cross_entropy", "constrained_recall_objective"),
"constrained_cross_entropy")
),

// Tolerance on the fairness constraints
new ModelParameter(
"constraint_fpr_threshold",
"(Fairness) FPR tolerance for fairness",
"The tolerance when fulfilling fairness FPR constraints. "
+ "The allowed difference between group-wise FPR. "
+ "The value 0.0 enforces group-wise FPR to be *exactly* equal. "
+ "Higher values lead to a less strict fairness enforcement.",
NOT_MANDATORY,
doubleRange(0.0, 1.0, 0.0)
),
new ModelParameter(
"constraint_fnr_threshold",
"(Fairness) FNR tolerance for fairness",
"The tolerance when fulfilling fairness FNR constraints. "
+ "The allowed difference between group-wise FNR. "
+ "The value 0.0 enforces group-wise FNR to be *exactly* equal. "
+ "Higher values lead to a less strict fairness enforcement.",
NOT_MANDATORY,
doubleRange(0.0, 1.0, 0.0)
),

// Eventually we want this parameter to not depend as much on the size of the dataset
// But currently this needs to be changed for each dataset considering its size (larger for larger datasets)
// See: https://github.com/feedzai/fairgbm/issues/7
new ModelParameter(
"multiplier_learning_rate",
"(Fairness) Multipliers' learning rate",
"The Lagrangian multipliers control how strict the constraint enforcement is.",
NOT_MANDATORY,
NumericFieldType.min(Float.MIN_VALUE, NumericFieldType.ParameterConfigType.DOUBLE, 1e3)
), // NOTE: I'm using Float.MIN_VALUE here because the minimum value of a double in C++ depends on the architecture it's ran on, using float here is more conservative
new ModelParameter(
"init_multipliers",
"(Fairness) Initial multipliers",
"The Lagrangian multipliers control how strict the constraint enforcement is. "
+ "The default value is starting with zero `0` for each constraint.",
NOT_MANDATORY,
new FreeTextFieldType("")
// new FreeTextFieldType("", "^((\\d+(\\.\\d*)?,)*(\\d+(\\.\\d*)?))?$") # TODO: https://github.com/feedzai/feedzai-openml/issues/68
),

// These parameters probably shouldn't be changed in 90% of cases
new ModelParameter(
"constraint_stepwise_proxy",
"(Fairness) Stepwise proxy for fairness constraints",
"The type of proxy function to use for the fairness constraint. "
+ "We need to use a differentiable proxy function, as FPR and FNR have discontinuous gradients.",
NOT_MANDATORY,
new ChoiceFieldType(ImmutableSet.of("cross_entropy", "quadratic", "hinge"), "cross_entropy")
),
new ModelParameter(
"objective_stepwise_proxy",
"(Fairness) Stepwise proxy for global constraints",
"The proxy function to use for the objective function. "
+ "Only used when explicitly optimizing for Recall (or any other metric of the "
+ "confusion matrix). Leave blank when using standard objectives, such as cross-entropy.",
NOT_MANDATORY,
new ChoiceFieldType(ImmutableSet.of("cross_entropy", "quadratic", "hinge", ""), "")
),

// Override this parameter from LightGBM so we can disallow using RF
new ModelParameter(
BOOSTING_TYPE_PARAMETER_NAME,
"Boosting type",
"Type of boosting model:\n"
+ "'gbdt' is a good starting point,\n"
+ "'goss' is faster but slightly less accurate,\n"
+ "'dart' is much slower but might improve performance,\n"
+ "'rf' is the random forest mode.",
MANDATORY,
new ChoiceFieldType(
ImmutableSet.of("gbdt", "dart", "goss"),
"gbdt"
)
)

// TODO: assess whether these parameters would ever be useful
// // These parameters probably shouldn't be changed in 99% of cases
// new ModelParameter(
// "stepwise_proxy_margin",
// "",
// "",
// NOT_MANDATORY,
// new FreeTextFieldType("")
// ),
// new ModelParameter(
// "score_threshold",
// "",
// "",
// NOT_MANDATORY,
// new FreeTextFieldType("")
// ),
// new ModelParameter(
// "global_score_threshold",
// "",
// "",
// NOT_MANDATORY,
// new FreeTextFieldType("")
// )

), LightGBMDescriptorUtil.PARAMS.stream()
.filter(el -> !el.getName().equals(BOOSTING_TYPE_PARAMETER_NAME))
.collect(Collectors.toSet()));

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright 2020 Feedzai
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package com.feedzai.openml.provider.lightgbm;

import com.google.auto.service.AutoService;
import java.util.Optional;
import java.util.Set;

import com.feedzai.openml.provider.MachineLearningProvider;
import com.feedzai.openml.provider.TrainingMachineLearningProvider;
import com.feedzai.openml.provider.descriptor.MLAlgorithmDescriptor;
import com.feedzai.openml.util.algorithm.MLAlgorithmEnum;

/**
* This class implements Feedzai's OpenML MachineLearningProvider interface for FairGBM (constrained LightGBM).
*
* @author Andre Cruz (andre.cruz@feedzai.com)
* @since 1.3.6
*/
@AutoService(MachineLearningProvider.class)
public class FairGBMMLProvider implements TrainingMachineLearningProvider<LightGBMModelCreator> {

/**
* The reported name of this provider.
*/
private static final String PROVIDER_NAME = "Feedzai GBM";

@Override
public String getName() {
return PROVIDER_NAME;
}

@Override
public Set<MLAlgorithmDescriptor> getAlgorithms() {
return MLAlgorithmEnum.getDescriptors(new MLAlgorithmEnum[]{LightGBMAlgorithms.FAIRGBM_BINARY_CLASSIFIER});
}

@Override
public Optional<LightGBMModelCreator> getModelCreator(final String algorithmName) {
return MLAlgorithmEnum.getByName(new MLAlgorithmEnum[]{LightGBMAlgorithms.FAIRGBM_BINARY_CLASSIFIER}, algorithmName)
.map(algorithm -> new LightGBMModelCreator());
}
}
Loading