-
Notifications
You must be signed in to change notification settings - Fork 10
ref PULSEDEV-36792 openml-lightgbm: Identify and fix non-closed SWIG objects #123
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
a48323b
71fd8e5
bd3fe9f
3b96f41
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -130,21 +130,19 @@ static void fit(final Dataset dataset, | |
| final int numIterations = parseInt(params.get(NUM_ITERATIONS_PARAMETER_NAME)); | ||
| logger.debug("LightGBM model trainParams: {}", trainParams); | ||
|
|
||
| final SWIGTrainData swigTrainData = new SWIGTrainData( | ||
| numFeatures, | ||
| instancesPerChunk); | ||
| final SWIGTrainBooster swigTrainBooster = new SWIGTrainBooster(); | ||
|
|
||
| /// Create LightGBM dataset | ||
| createTrainDataset(dataset, numFeatures, trainParams, swigTrainData); | ||
| try (final SWIGTrainBooster swigTrainBooster = new SWIGTrainBooster(); | ||
| final SWIGTrainData swigTrainData = new SWIGTrainData(numFeatures, instancesPerChunk)){ | ||
| /// Create LightGBM dataset | ||
| createTrainDataset(dataset, numFeatures, trainParams, swigTrainData); | ||
|
|
||
| /// Create Booster from dataset | ||
| createBoosterStructure(swigTrainBooster, swigTrainData, trainParams); | ||
| trainBooster(swigTrainBooster.swigBoosterHandle, numIterations); | ||
| /// Create Booster from dataset | ||
| createBoosterStructure(swigTrainBooster, swigTrainData, trainParams); | ||
| trainBooster(swigTrainBooster.swigBoosterHandle, numIterations); | ||
|
|
||
| /// Save model | ||
| saveModelFileToDisk(swigTrainBooster.swigBoosterHandle, outputModelFilePath); | ||
| swigTrainBooster.close(); // Explicitly release C++ resources right away. They're no longer needed. | ||
| /// Save model | ||
| saveModelFileToDisk(swigTrainBooster.swigBoosterHandle, outputModelFilePath); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -231,28 +229,30 @@ private static void initializeLightGBMTrainDatasetFeatures(final SWIGTrainData s | |
|
|
||
| /// First generate the array that has the chunk sizes for `LGBM_DatasetCreateFromMats`. | ||
| final SWIGTYPE_p_int swigChunkSizesArray = genSWIGFeatureChunkSizesArray(swigTrainData, numFeatures); | ||
|
|
||
| /// Now create the LightGBM Dataset itself from the chunks: | ||
| logger.debug("Creating LGBM_Dataset from chunked data..."); | ||
| final int returnCodeLGBM = lightgbmlib.LGBM_DatasetCreateFromMats( | ||
| (int) swigTrainData.swigFeaturesChunkedArray.get_chunks_count(), // numChunks | ||
| swigTrainData.swigFeaturesChunkedArray.data_as_void(), // input data (void**) | ||
| lightgbmlibConstants.C_API_DTYPE_FLOAT64, | ||
| swigChunkSizesArray, | ||
| numFeatures, | ||
| 1, // rowMajor. | ||
| trainParams, // parameters. | ||
| null, // No alighment with other datasets. | ||
| swigTrainData.swigOutDatasetHandlePtr // Output LGBM Dataset | ||
| ); | ||
| if (returnCodeLGBM == -1) { | ||
| logger.error("Could not create LightGBM dataset."); | ||
| throw new LightGBMException(); | ||
| try { | ||
| /// Now create the LightGBM Dataset itself from the chunks: | ||
| logger.debug("Creating LGBM_Dataset from chunked data..."); | ||
| final int returnCodeLGBM = lightgbmlib.LGBM_DatasetCreateFromMats( | ||
| (int) swigTrainData.swigFeaturesChunkedArray.get_chunks_count(), // numChunks | ||
| swigTrainData.swigFeaturesChunkedArray.data_as_void(), // input data (void**) | ||
| lightgbmlibConstants.C_API_DTYPE_FLOAT64, | ||
| swigChunkSizesArray, | ||
| numFeatures, | ||
| 1, // rowMajor. | ||
| trainParams, // parameters. | ||
| null, // No alighment with other datasets. | ||
| swigTrainData.swigOutDatasetHandlePtr // Output LGBM Dataset | ||
| ); | ||
| if (returnCodeLGBM == -1) { | ||
| logger.error("Could not create LightGBM dataset."); | ||
| throw new LightGBMException(); | ||
| } | ||
| // FIXME is this init necessary? | ||
| swigTrainData.initSwigDatasetHandle(); | ||
| } finally { | ||
| lightgbmlib.delete_intArray(swigChunkSizesArray); | ||
| } | ||
|
|
||
| swigTrainData.initSwigDatasetHandle(); | ||
| swigTrainData.destroySwigTrainFeaturesChunkedDataArray(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @AlbertoEAF is there any reason for calling If that is the case and to simplify and be more safe I would remove this
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it's wrong to delete this line. It'll unnecessarily duplicate RAM usage on the heaviest part of the job. Maybe add an inline comment with ("// Critical to save RAM. Do not remove!") along with a comment above explaining why it's here in the first place. Transferring train data to a
Finally we can train the LGBM model from the LGBM Dataset, which can still use ample memory during train, but won't ever reach the 2x memory usage factor. |
||
| lightgbmlib.delete_intArray(swigChunkSizesArray); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -313,7 +313,6 @@ private static void setLightGBMDatasetLabelData(final SWIGTrainData swigTrainDat | |
| throw new LightGBMException(); | ||
| } | ||
|
|
||
| swigTrainData.destroySwigTrainLabelDataArray(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we shouldn't remove this line, instead we should: The reason is because We can think of an improvement by having a closeable class for this case too.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes this is also the same situation as above @mlobofeedzai and should not be removed for the same reasons. I'd say, put a comment in the same manner as in the other case so there are no doubts as to why it's there. I agree with @gandola , changing this to a try/finally is more idiomatic and better behaved in case of prior exceptions 👌. |
||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes this is necessary. It's in here we pass from a
void **to a LGBMDatasetHandle(https://lightgbm.readthedocs.io/en/latest/C-API.html#c.DatasetHandle) so we can pass it around with SWIG.