-
Notifications
You must be signed in to change notification settings - Fork 264
remove perseus from storage calculations #5601
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: unstable
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -349,25 +349,80 @@ def check_feature_flag(self, flag_name): | |
|
|
||
| def check_channel_space(self, channel): | ||
| tree_cte = With(self.get_user_active_trees().distinct(), name="trees") | ||
| files_cte = With( | ||
| tree_cte.join( | ||
| self.files.get_queryset(), contentnode__tree_id=tree_cte.col.tree_id | ||
| ) | ||
| .values("checksum") | ||
| .distinct(), | ||
| name="files", | ||
|
|
||
| user_files_cte = With( | ||
| self.files.get_queryset().only( | ||
| "id", | ||
| "checksum", | ||
| "contentnode_id", | ||
| "file_format_id", | ||
| "file_size", | ||
| "preset_id", | ||
| ), | ||
| name="user_files", | ||
| ) | ||
|
|
||
| staging_tree_files = ( | ||
| self.files.filter(contentnode__tree_id=channel.staging_tree.tree_id) | ||
| editable_files_qs = ( | ||
| user_files_cte.queryset() | ||
| .with_cte(tree_cte) | ||
| .with_cte(files_cte) | ||
| .exclude(Exists(files_cte.queryset().filter(checksum=OuterRef("checksum")))) | ||
| .values("checksum") | ||
| .distinct() | ||
| .with_cte(user_files_cte) | ||
| .filter( | ||
| Exists( | ||
| tree_cte.join( | ||
| ContentNode.objects.only("id", "tree_id"), | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| tree_id=tree_cte.col.tree_id, | ||
| ) | ||
| .with_cte(tree_cte) | ||
| .filter(id=OuterRef("contentnode_id")) | ||
| ) | ||
| ) | ||
| ) | ||
|
|
||
| editable_files_qs = self._filter_storage_billable_files(editable_files_qs) | ||
|
|
||
| existing_checksums_cte = With( | ||
| editable_files_qs.values("checksum", "file_format_id").distinct(), | ||
| name="existing_checksums", | ||
| ) | ||
|
|
||
| staging_files_qs = ( | ||
| user_files_cte.queryset() | ||
| .with_cte(user_files_cte) | ||
| .filter( | ||
| Exists( | ||
| ContentNode.objects.only("id").filter( | ||
| tree_id=channel.staging_tree.tree_id, | ||
| id=OuterRef("contentnode_id"), | ||
| ) | ||
|
Comment on lines
+393
to
+396
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like we talked about, I think this query adjustment should bring some improvement! Secondly, similar comment about |
||
| ) | ||
| ) | ||
| ) | ||
|
|
||
| staging_files_qs = self._filter_storage_billable_files(staging_files_qs) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of filtering both |
||
|
|
||
| staging_files_qs = ( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe for clarity, call this queryset something else? |
||
| staging_files_qs.with_cte(tree_cte) | ||
| .with_cte(existing_checksums_cte) | ||
| .exclude( | ||
| Exists( | ||
| existing_checksums_cte.queryset().filter( | ||
| checksum=OuterRef("checksum"), | ||
| file_format_id=OuterRef("file_format_id"), | ||
|
Comment on lines
+409
to
+410
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's possible someone could craft two different files, with different formats, but the same checksum. Although I don't know that we need to be concerned about that, i.e. we can filter solely on |
||
| ) | ||
| ) | ||
| ) | ||
| ) | ||
|
|
||
| unique_staging_ids = ( | ||
| staging_files_qs.order_by("checksum", "id") | ||
| .distinct("checksum") | ||
| .values("id") | ||
| ) | ||
| staged_size = float( | ||
| staging_tree_files.aggregate(used=Sum("file_size"))["used"] or 0 | ||
| staging_files_qs.filter(id__in=Subquery(unique_staging_ids)).aggregate( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should keep the in-subquery in mind later during unstable/hotfixes testing. I believe the query planner should make similar decisions to an |
||
| used=Sum("file_size") | ||
| )["used"] | ||
| or 0 | ||
| ) | ||
|
|
||
| if self.get_available_space() < staged_size: | ||
|
|
@@ -410,13 +465,55 @@ def get_user_active_trees(self): | |
| ) | ||
|
|
||
| def get_user_active_files(self): | ||
| cte = With(self.get_user_active_trees().distinct()) | ||
|
|
||
| return ( | ||
| cte.join(self.files.get_queryset(), contentnode__tree_id=cte.col.tree_id) | ||
| .with_cte(cte) | ||
| .values("checksum") | ||
| .distinct() | ||
| tree_cte = With(self.get_user_active_trees().distinct(), name="trees") | ||
|
|
||
| user_files_cte = With( | ||
| self.files.get_queryset().only( | ||
| "id", | ||
| "checksum", | ||
| "contentnode_id", | ||
| "file_format_id", | ||
| "file_size", | ||
| "preset_id", | ||
| ), | ||
| name="user_files", | ||
| ) | ||
|
|
||
| base_files_qs = ( | ||
| user_files_cte.queryset() | ||
| .with_cte(tree_cte) | ||
| .with_cte(user_files_cte) | ||
| .filter( | ||
| Exists( | ||
| tree_cte.join( | ||
| ContentNode.objects.only("id", "tree_id"), | ||
| tree_id=tree_cte.col.tree_id, | ||
| ) | ||
| .with_cte(tree_cte) | ||
| .filter(id=OuterRef("contentnode_id")) | ||
| ) | ||
| ) | ||
| ) | ||
|
|
||
| base_files_qs = self._filter_storage_billable_files(base_files_qs) | ||
|
|
||
| unique_file_ids = ( | ||
| base_files_qs.order_by("checksum", "id").distinct("checksum").values("id") | ||
| ) | ||
|
|
||
| files_qs = base_files_qs.filter(id__in=Subquery(unique_file_ids)) | ||
|
|
||
| return files_qs | ||
|
|
||
| def _filter_storage_billable_files(self, queryset): | ||
| """ | ||
| Perseus exports would not be included in storage calculations. | ||
| """ | ||
| if queryset is None: | ||
| return queryset | ||
| return queryset.exclude(file_format_id__isnull=True).exclude( | ||
| file_format_id=file_formats.PERSEUS | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not an immediate concern, but just a heads that when QTI assessments are more broadly available, and we are generating QTI ZIP files, then we may need to filter these too (and it would need to be on the format preset, rather than the file format id, because the format id would be 'zip'!) |
||
| ) | ||
|
|
||
| def get_space_used(self, active_files=None): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The django queryset method
onlyhas connection with using model objects, if I understand correctly. Since this doesn't deal with model objects,valuesseems more appropriate. Under the hood, they may result in the same SELECT query, but I'm unsure.