From 5c8be84328bd6d2172a641916005fb75d3a9db40 Mon Sep 17 00:00:00 2001 From: Dan Date: Tue, 15 Apr 2025 15:03:47 +0100 Subject: [PATCH 01/27] added more platforms and build types including web --- vscoffline/vsc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vscoffline/vsc.py b/vscoffline/vsc.py index bbf2a52..57bf792 100644 --- a/vscoffline/vsc.py +++ b/vscoffline/vsc.py @@ -7,9 +7,9 @@ from typing import Any, Dict, List, Union import logging as log -PLATFORMS = ["win32", "linux", "linux-deb", "linux-rpm", "darwin", "linux-snap", "server-linux", "server-linux-legacy", "cli-alpine"] -ARCHITECTURES = ["", "x64"] -BUILDTYPES = ["", "archive", "user"] +PLATFORMS = ["win32", "linux", "linux-deb", "linux-rpm", "darwin", "darwin-arm64", "darwin-universal", "linux-snap", "server-linux", "cli-alpine", "server-darwin"] +ARCHITECTURES = ["", "x64", "arm64", "armhf", "alpine"] +BUILDTYPES = ["", "archive", "user", "web"] QUALITIES = ["stable", "insider"] URL_BINUPDATES = r"https://update.code.visualstudio.com/api/update/" From fceed16eab6cd800b7de08df8182bcc976835df3 Mon Sep 17 00:00:00 2001 From: Dan Date: Tue, 15 Apr 2025 15:04:15 +0100 Subject: [PATCH 02/27] fixed the error for recomendations --- vscoffline/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 308bd65..85a19ba 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -404,7 +404,7 @@ def get_recommendations_old(self, destination): if result.status_code != 200: log.warning( f"get_recommendations failed accessing url {vsc.URL_RECOMMENDATIONS}, unhandled status code {result.status_code}") - return False + return {} # Return an empty dictionary instead of False jresult = result.json() with open(os.path.join(destination, 'recommendations.json'), 'w') as outfile: From f6829c73903ff82d1276c0b2d03a8b247df3b9ca Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 11:22:01 +0200 Subject: [PATCH 03/27] Add to do list --- CHANGELOG.md | 80 ++++++++++++++++------------------------------------ 1 file changed, 24 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 12c5b7e..3910005 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,46 +1,41 @@ # Change Log for Visual Studio Code - Offline Gallery and Updater -## [1.0.24] - 2023-06-05 - +## ToDo +- [ ] Cleanup old extension versions +- [ ] Cleanup old binary versions +- [ ] Include existing extensions in update process +- [ ] Determine VSCode version automatically +- [ ] Shorthands for command line arguments + +## `1.0.24` - 2023-06-05 ### Fixed - - Improvements to requests session handling to prevent ConnectionErrors due to repeated connections. Thanks @tomer953 for reporting. ### Added - - Note about Firefox in Readme.md. Thanks @jmorcate for highlighting this gap. ### Changed - - Sort gallery listings with simple python sort. - Removed deprecated logzero dependency, switched to logging. Thanks @bdsoha for the implementation and note. -## [1.0.23] - 2022-11-09 - +## `1.0.23` - 2022-11-09 ### Fixed - - @forky2 resolved an issue related to incorrect version ordering (from reverse-alphanumberical to reverse-chronological), which prevented extensions updating correctly by vscode clients. -## [1.0.22] - 2022-10-31 - +## `1.0.22` - 2022-10-31 ### Added - - @maxtruxa added support for specifying docker container environment variable `SSLARGS` to control SSL arguments, or disable SSL by setting `BIND=0.0.0.0:80` and `SSLARGS=` (empty). ### Changed - - @Precioussheep improved consistency of the codebase, reducing bonus code and added typing. -## [1.0.21] - 2022-08-08 - +## `1.0.21` - 2022-08-08 ### Added - - @tomer953 added support for fetching a specified number of recommended extensions `--total-recommended`. - @Ebsan added support for fetching pre-release extensions `--prerelease-extensions` and fix fetching other extensions [#31](https://github.com/LOLINTERNETZ/vscodeoffline/issues/31). - @Ebsan added support for specifying which Visual Studio Code version to masquerade as when fetching extensions `--vscode-version`. ### Changed - - Merge dependabot suggestions for CI pipeline updates. - Utilise individual requests, rather than a Requests session, for fetching extensions to improve stability of fetch process. Should resolve [#33](https://github.com/LOLINTERNETZ/vscodeoffline/issues/33). Thanks @Ebsan for the fix and @annieherram for reporting. - Updated build-in certificate and key to update its expiry [#37](https://github.com/LOLINTERNETZ/vscodeoffline/issues/37). Included CA chain aswell. Thanks for reporting @Ebsan. @@ -48,84 +43,57 @@ - Split out this changelog. ### Fixed - - @tomer953 removed a duplicate flag to QueryFlags. - @Ebsan fixed an issue with downloading cross-platform extensions [#24](https://github.com/LOLINTERNETZ/vscodeoffline/issues/24). -## [1.0.20] - +## `1.0.20` ### Fixed - - Fixed an issue when downloading multiple versions of extensions. Thanks @forky2! -## [1.0.19] - +## `1.0.19` ### Fixed - - Lots of really solid bug fixes. Thank you to @fullylegit! Resilience improvements when fetching from marketplace. Thanks @forky2 and @ebsan. -## [1.0.18] - +## `1.0.18` ### Changed - - Meta release to trigger CI. -## [1.0.17] - +## `1.0.17` ### Changed - - CORS support for gallery. Thanks @kenyon! -## [1.0.16] - +## `1.0.16` ### Changed - - Support for saving sync logs to file. Thanks @ap0yuv! -## [1.0.16] - +## `1.0.16` ### Changed - - Improve extension stats handling. -## [1.0.14] - +## `1.0.14` ### Fixed - - Fixed insider builds being re-fetched. -## [1.0.13] - +## `1.0.13` ### Added - - Added initial support for extension version handling. Hopefully this resolves issue #4. -## [1.0.12] - +## `1.0.12` ### Fixed - - @ttutko fixed a bug preventing multiple build qualities (stable/insider) from being downloaded. Thanks @darkonejr for investigating and reporting. -## [1.0.11] - +## `1.0.11` ### Fixed - - Fixed bugs in Gallery sorting, and added timeouts for Sync. -## [1.0.10] - +## `1.0.10` ### Changed - - Refactored to improve consistency. -## [1.0.9] - +## `1.0.9` ### Added - - Added support for Remote Development, currently (2019-05-12) available to insiders. Refactored various badness. -## [1.0.8] - +## `1.0.8` ### Added - - Insiders support and extension packs (remotes). From 07a3b8d968f8c97f3dbb06e9668e3b6e8fe19f85 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 11:24:39 +0200 Subject: [PATCH 04/27] Fix typo --- vscoffline/sync.py | 1684 ++++++++++++++++++++++---------------------- 1 file changed, 842 insertions(+), 842 deletions(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 41bbfc9..c55bef3 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -1,842 +1,842 @@ -from dataclasses import dataclass -import os -import sys -import argparse -import requests -import pathlib -import hashlib -import uuid -import logging -import json -import time -import datetime -from typing import List -from platform import release -import logging as log -from pytimeparse.timeparse import timeparse -import vsc -from distutils.dir_util import create_tree -from requests.adapters import HTTPAdapter, Retry - - -class VSCUpdateDefinition(object): - - def __init__(self, platform=None, architecture=None, buildtype=None, quality=None, - updateurl=None, name=None, version=None, productVersion=None, - hashs=None, timestamp=None, sha256hash=None, supportsFastUpdate=None): - - if not vsc.Utility.validate_platform(platform): - raise ValueError(f"Platform {platform} invalid or not implemented") - - if not vsc.Utility.validate_architecture(architecture): - raise ValueError( - f"Architecture {architecture} invalid or not implemented") - - if not vsc.Utility.validate_buildtype(buildtype): - raise ValueError( - f"Buildtype {buildtype} invalid or not implemented") - - if not vsc.Utility.validate_quality(quality): - raise ValueError(f"Quality {quality} invalid or not implemented") - - self.identity = platform - - if architecture: - self.identity += f'-{architecture}' - if buildtype: - self.identity += f'-{buildtype}' - - self.platform = platform - self.architecture = architecture - self.buildtype = buildtype - self.quality = quality - self.updateurl = updateurl - self.name = name - self.version = version - self.productVersion = productVersion - self.hash = hashs - self.timestamp = timestamp - self.sha256hash = sha256hash - self.supportsFastUpdate = supportsFastUpdate - self.checkedForUpdate = False - - def check_for_update(self, old_commit_id=None): - if not old_commit_id: - # To trigger the API to delta - old_commit_id = '7c4205b5c6e52a53b81c69d2b2dc8a627abaa0ba' - - url = vsc.URL_BINUPDATES + \ - f"{self.identity}/{self.quality}/{old_commit_id}" - - log.debug(f'Update url {url}') - result = requests.get(url, allow_redirects=True, timeout=vsc.TIMEOUT) - self.checkedForUpdate = True - - if result.status_code == 204: - # No update available - return False - elif result.status_code != 200: - # Unhandled response from API - log.warning( - f"Update url failed {url}. Unhandled status code {result.status_code}") - return False - - jresult = result.json() - - self.updateurl = jresult['url'] - self.name = jresult['name'] - self.version = jresult['version'] - self.productVersion = jresult['productVersion'] - self.hash = jresult['hash'] - self.timestamp = jresult['timestamp'] - self.sha256hash = jresult['sha256hash'] - - if 'supportsFastUpdate' in jresult: - self.supportsFastUpdate = jresult['supportsFastUpdate'] - if self.updateurl: - return True - else: - return False - - def download_update(self, destination): - if not self.checkedForUpdate: - log.warning( - 'Cannot download update if the update definition has not been downloaded') - return - if not self.updateurl: - log.warning( - 'Cannot download update if there is no url to download from') - return - - destination = os.path.join(destination, self.identity, self.quality) - if not os.path.isdir(destination): - os.makedirs(destination) - suffix = pathlib.Path(self.updateurl).suffix - if '.gz' in suffix: - suffix = ''.join(pathlib.Path(self.updateurl).suffixes) - destfile = os.path.join(destination, f'vscode-{self.name}{suffix}') - - if os.path.exists(destfile) and vsc.Utility.hash_file_and_check(destfile, self.sha256hash): - log.debug(f'Previously downloaded {self}') - else: - log.info(f'Downloading {self} to {destfile}') - result = requests.get( - self.updateurl, allow_redirects=True, timeout=vsc.TIMEOUT) - open(destfile, 'wb').write(result.content) - - if not vsc.Utility.hash_file_and_check(destfile, self.sha256hash): - log.warning( - f'HASH MISMATCH for {self} at {destfile} expected {self.sha256hash}. Removing local file.') - os.remove(destfile) - return False - log.debug(f'Hash ok for {self} with {self.sha256hash}') - return True - - def save_state(self, destination): - destination = os.path.join(destination, self.identity) - if not os.path.isdir(destination): - os.makedirs(destination) - # Write version details blob as latest - vsc.Utility.write_json(os.path.join( - destination, self.quality, 'latest.json'), self) - # Write version details blob as the commit id - if self.version: - vsc.Utility.write_json(os.path.join( - destination, self.quality, f'{self.version}.json'), self) - - def __repr__(self): - strs = f"<{self.__class__.__name__}> {self.quality}/{self.identity}" - if self.updateurl: - strs += f" - Version: {self.name} ({self.version})" - elif self.checkedForUpdate: - strs += f" - Latest version not available" - return strs - - -@dataclass -class File: - assetType: str - source: str - - @staticmethod - def from_dict(obj) -> 'File': - _assetType = str(obj.get("assetType")) - _source = str(obj.get("source")) - return File(_assetType, _source) - - -@dataclass -class Property: - key: str - value: str - - @staticmethod - def from_dict(obj) -> 'Property': - _key = str(obj.get("key")) - _value = str(obj.get("value")) - return Property(_key, _value) - - -@dataclass -class VSCExtensionVersionDefinition: - - version: str - flags: str - lastUpdated: str - files: List[File] - properties: List[Property] - assetUri: str - fallbackAssetUri: str - - @staticmethod - def from_dict(obj) -> 'VSCExtensionVersionDefinition': - _version = str(obj.get("version")) - _flags = str(obj.get("flags")) - _lastUpdated = str(obj.get("lastUpdated")) - _files = [File.from_dict(y) for y in obj.get("files")] - _properties = [Property.from_dict(y) for y in obj.get("properties")] if obj.get( - "properties") else [] # older versions do not have properties so we need to set to empty array - _assetUri = str(obj.get("assetUri")) - _fallbackAssetUri = str(obj.get("fallbackAssetUri")) - return VSCExtensionVersionDefinition(_version, _flags, _lastUpdated, _files, _properties, _assetUri, _fallbackAssetUri) - - def isprerelease(self): - prerelease = False - for property in self.properties: - # if property["key"] == "Microsoft.VisualStudio.Code.PreRelease" and property["value"] == "true": - if property.key == "Microsoft.VisualStudio.Code.PreRelease" and property.value == "true": - prerelease = True - return prerelease - - def __repr__(self): - strs = f"<{self.__class__.__name__}> {self.version} ({self.lastUpdate}) - Version: {self.version}" - return strs - - -class VSCExtensionDefinition(object): - - def __init__(self, identity, raw=None): - self.identity = identity - self.extensionId = None - self.recommended = False - self.versions: List[VSCExtensionVersionDefinition] = [] - if raw: - self.__dict__.update(raw) - if 'extensionId' in raw: - self.extensionId = raw['extensionId'] - - def download_assets(self, destination, session): - for version in self.versions: - targetplatform = '' - if "targetPlatform" in version: - targetplatform = version["targetPlatform"] - ver_destination = os.path.join( - destination, self.identity, version["version"], targetplatform) - for file in version["files"]: - url = file["source"] - if not url: - log.warning( - 'download_asset() cannot download update as asset url is missing') - return - asset = file["assetType"] - destfile = os.path.join(ver_destination, f'{asset}') - create_tree(os.path.abspath(os.sep), (destfile,)) - if not os.path.exists(destfile): - for i in range(5): - try: - if i == 0: - log.debug(f'Downloading {self.identity} {asset} to {destfile}') - else: - log.info(f'Retrying {i+1}, download {self.identity} {asset} to {destfile}') - result = session.get( - url, allow_redirects=True, timeout=vsc.TIMEOUT) - with open(destfile, 'wb') as dest: - dest.write(result.content) - break - except requests.exceptions.ProxyError: - log.info("ProxyError: Retrying.") - except requests.exceptions.ReadTimeout: - log.info("ReadTimeout: Retrying.") - - def process_embedded_extensions(self, destination, mp): - """ - Check an extension's Manifest for an extension pack (e.g. more extensions to download) - """ - bonusextensions = [] - for version in self.versions: - targetplatform = '' - if "targetPlatform" in version: - targetplatform = version["targetPlatform"] - manifestpath = os.path.join( - destination, self.identity, version["version"], targetplatform, 'Microsoft.VisualStudio.Code.Manifest') - manifest = vsc.Utility.load_json(manifestpath) - if manifest and 'extensionPack' in manifest: - for extname in manifest['extensionPack']: - bonusextension = mp.search_by_extension_name(extname) - if bonusextension: - bonusextensions.append(bonusextension) - return bonusextensions - - def save_state(self, destination): - destination = os.path.join(destination, self.identity) - if not os.path.isdir(destination): - os.makedirs(destination) - # Save as latest - with open(os.path.join(destination, 'latest.json'), 'w') as outfile: - json.dump(self, outfile, cls=vsc.MagicJsonEncoder, indent=4) - # Save in the version folder - for version in self.versions: - with open(os.path.join(destination, version["version"], 'extension.json'), 'w') as outfile: - json.dump(self, outfile, cls=vsc.MagicJsonEncoder, indent=4) - - def isprerelease(self): - prerelease = False - if "properties" in self.versions[0].keys(): - for property in self.versions[0]["properties"]: - if property["key"] == "Microsoft.VisualStudio.Code.PreRelease" and property["value"] == "true": - prerelease = True - return prerelease - - def get_latest_release_versions(self): - if self.versions and len(self.versions) > 1: - releaseVersions = list(filter(lambda x: VSCExtensionVersionDefinition.from_dict( - x).isprerelease() == False, self.versions)) - if(len(releaseVersions) > 0): - releaseVersions.sort( - reverse=True, key=lambda x: x["lastUpdated"]) - latestversion = releaseVersions[0]["version"] - - filteredversions = [] - for version in releaseVersions: - if version["version"] == latestversion: - filteredversions.append(version) - - return filteredversions - return self.versions - - def version(self): - if self.versions and len(self.versions) > 1: - return ";".join(list(map(lambda x: x['version'], self.versions))) - return self.versions[0]['version'] - - def set_recommended(self): - self.recommended = True - - def __repr__(self): - strs = f"<{self.__class__.__name__}> {self.identity} ({self.extensionId}) - Version: {self.version()}" - return strs - - -class VSCUpdates(object): - - @staticmethod - def latest_versions(insider=False): - versions = {} - for platform in vsc.PLATFORMS: - for architecture in vsc.ARCHITECTURES: - for buildtype in vsc.BUILDTYPES: - for quality in vsc.QUALITIES: - if quality == 'insider' and not insider: - continue - if platform == 'win32' and architecture == 'ia32': - continue - if platform == 'darwin' and (architecture != '' or buildtype != ''): - continue - if 'linux' in platform and (architecture == '' or buildtype != ''): - continue - ver = VSCUpdateDefinition( - platform, architecture, buildtype, quality) - ver.check_for_update() - log.info(ver) - versions[f'{ver.identity}-{ver.quality}'] = ver - return versions - - @staticmethod - def signal_updated(artifactdir): - signalpath = os.path.join(artifactdir, 'updated.json') - result = { - 'updated': datetime.datetime.utcnow() - } - with open(signalpath, 'w') as outfile: - json.dump(result, outfile, cls=vsc.MagicJsonEncoder, indent=4) - - -class VSCMarketplace(object): - - def __init__(self, insider, prerelease, version, session): - self.insider = insider - self.prerelease = prerelease - self.version = version - self.session = session - self.backoff = 1 - - def get_recommendations(self, destination, totalrecommended): - recommendations = self.search_top_n(totalrecommended) - recommended_old = self.get_recommendations_old(destination) - - for extension in recommendations: - # If the extension has already been found then prevent it from being collected again when processing the old recommendation list - if extension.identity in recommended_old.keys(): - del recommended_old[extension.identity] - - for packagename in recommended_old: - extension = self.search_by_extension_name(packagename) - if extension: - recommendations.append(extension) - else: - log.debug( - f'get_recommendations failed finding a recommended extension by name for {packagename}. This extension has likely been removed.') - - prereleasecount = 0 - for recommendation in recommendations: - recommendation.set_recommended() - # If the found extension is a prerelease version search for the next available release version - if not self.prerelease and recommendation.isprerelease(): - prereleasecount += 1 - extension = self.search_release_by_extension_id( - recommendation.extensionId) - if extension: - recommendation.versions = extension.get_latest_release_versions() - return recommendations - - def get_recommendations_old(self, destination): - result = self.session.get(vsc.URL_RECOMMENDATIONS, - allow_redirects=True, timeout=vsc.TIMEOUT) - if result.status_code != 200: - log.warning( - f"get_recommendations failed accessing url {vsc.URL_RECOMMENDATIONS}, unhandled status code {result.status_code}") - return False - - jresult = result.json() - with open(os.path.join(destination, 'recommendations.json'), 'w') as outfile: - json.dump(jresult, outfile, cls=vsc.MagicJsonEncoder, indent=4) - - # To dict to remove duplicates - packages = {} - for recommendation in jresult['workspaceRecommendations']: - for package in recommendation['recommendations']: - packages[package] = None - - return packages - - def get_malicious(self, destination, extensions=None): - result = self.session.get( - vsc.URL_MALICIOUS, allow_redirects=True, timeout=vsc.TIMEOUT) - if result.status_code != 200: - log.warning( - f"get_malicious failed accessing url {vsc.URL_MALICIOUS}, unhandled status code {result.status_code}") - return False - # Remove random utf-8 nbsp from server response - stripped = result.content.decode( - 'utf-8', 'ignore').replace(u'\xa0', u'') - jresult = json.loads(stripped) - with open(os.path.join(destination, 'malicious.json'), 'w') as outfile: - json.dump(jresult, outfile, cls=vsc.MagicJsonEncoder, indent=4) - - if not extensions: - return - - for malicious in jresult['malicious']: - log.debug(f'Malicious extension {malicious}') - if malicious in extensions.keys(): - log.warning( - f'Preventing malicious extension {malicious} from being downloaded') - del extensions[malicious] - - def get_specified(self, specifiedpath): - if not os.path.exists(specifiedpath): - result = { - 'extensions': [] - } - with open(specifiedpath, 'w') as outfile: - json.dump(result, outfile, cls=vsc.MagicJsonEncoder, indent=4) - log.info( - f'Created empty list of custom extensions to mirror at {specifiedpath}') - return - else: - with open(specifiedpath, 'r') as fp: - specifiedextensions = json.load(fp) - if specifiedextensions and 'extensions' in specifiedextensions: - specified = [] - for packagename in specifiedextensions['extensions']: - extension = self.search_by_extension_name(packagename) - if extension: - log.info(f'Adding extension to mirror {packagename}') - specified.append(extension) - else: - log.debug( - f'get_custom failed finding a recommended extension by name for {packagename}. This extension has likely been removed.') - return specified - - def search_by_text(self, searchtext): - if searchtext == '*': - searchtext = '' - - return self._query_marketplace(vsc.FilterType.SearchText, searchtext) - - def search_top_n(self, n=200): - log.info(f'Searching for top {n} recommended extensions') - return self._query_marketplace(vsc.FilterType.SearchText, '', limit=n, sortOrder=vsc.SortOrder.Descending, sortBy=vsc.SortBy.InstallCount) - - def search_by_extension_id(self, extensionid): - result = self._query_marketplace( - vsc.FilterType.ExtensionId, extensionid) - if result and len(result) == 1: - return result[0] - else: - log.warning(f"search_by_extension_id failed {extensionid}") - return False - - def search_by_extension_name(self, extensionname): - if self.prerelease: - result = self._query_marketplace( - vsc.FilterType.ExtensionName, extensionname) - else: - releaseQueryFlags = vsc.QueryFlags.IncludeFiles | vsc.QueryFlags.IncludeVersionProperties | vsc.QueryFlags.IncludeAssetUri | \ - vsc.QueryFlags.IncludeStatistics | vsc.QueryFlags.IncludeStatistics | vsc.QueryFlags.IncludeVersions - result = self._query_marketplace( - vsc.FilterType.ExtensionName, extensionname, queryFlags=releaseQueryFlags) - if result and len(result) == 1: - result[0].versions = result[0].get_latest_release_versions() - - if result and len(result) == 1: - return result[0] - else: - #log.debug(f"search_by_extension_name failed {extensionname} got {result}") - return False - - def search_release_by_extension_id(self, extensionid): - log.debug( - f'Searching for release candidate by extensionId: {extensionid}') - releaseQueryFlags = vsc.QueryFlags.IncludeFiles | vsc.QueryFlags.IncludeVersionProperties | vsc.QueryFlags.IncludeAssetUri | \ - vsc.QueryFlags.IncludeStatistics | vsc.QueryFlags.IncludeStatistics | vsc.QueryFlags.IncludeVersions - result = self._query_marketplace( - vsc.FilterType.ExtensionId, extensionid, queryFlags=releaseQueryFlags) - if result and len(result) == 1: - return result[0] - else: - log.warning(f"search_release_by_extension_id failed {extensionid}") - return False - - def backoff_reset(self): - self.backoff = 1 - - def backoff_sleep(self): - time.sleep(self.backoff) - self.backoff *= 2 - - def _query_marketplace(self, filtertype, filtervalue, pageNumber=0, pageSize=500, limit=0, sortOrder=vsc.SortOrder.Default, sortBy=vsc.SortBy.NoneOrRelevance, queryFlags=0): - extensions = {} - total = 0 - count = 0 - - if 0 < limit < pageSize: - pageSize = limit - - while count <= total: - # log.debug(f'Query marketplace count {count} / total {total} - pagenumber {pageNumber}, pagesize {pageSize}') - pageNumber = pageNumber + 1 - query = self._query(filtertype, filtervalue, - pageNumber, pageSize, queryFlags) - result = None - i = 0 - while i < 10: - try: - result = self.session.post(vsc.URL_MARKETPLACEQUERY, headers=self._headers( - ), json=query, allow_redirects=True, timeout=vsc.TIMEOUT) - if result: - self.backoff_reset() - break - elif result.status_code == 429: - # Server is rate limiting us. Backoff. - self.backoff_sleep() - continue - except requests.exceptions.ProxyError: - log.info("ProxyError: Retrying.") - except requests.exceptions.ReadTimeout: - log.info("ReadTimeout: Retrying.") - i += 1 - if i < 10: - log.info("Retrying pull page %d attempt %d." % - (pageNumber, i+1)) - if not result: - log.info("Failed 10 attempts to query marketplace. Giving up.") - break - jresult = result.json() - count = count + pageSize - if 'results' in jresult: - for jres in jresult['results']: - for extension in jres['extensions']: - identity = extension['publisher']['publisherName'] + \ - '.' + extension['extensionName'] - mpd = VSCExtensionDefinition( - identity=identity, raw=extension) - extensions[identity] = mpd - - if 'resultMetadata' in jres: - for resmd in jres['resultMetadata']: - if 'ResultCount' in resmd['metadataType']: - total = resmd['metadataItems'][0]['count'] - if limit > 0 and count >= limit: - break - - return list(extensions.values()) - - def _query(self, filtertype, filtervalue, pageNumber, pageSize, queryFlags=0): - if queryFlags == 0: - queryFlags = self._query_flags() - payload = { - 'assetTypes': [], - 'filters': [self._query_filter(filtertype, filtervalue, pageNumber, pageSize)], - 'flags': int(queryFlags) - } - return payload - - def _query_filter(self, filtertype, filtervalue, pageNumber, pageSize): - result = { - 'pageNumber': pageNumber, - 'pageSize': pageSize, - 'sortBy': vsc.SortBy.NoneOrRelevance, - 'sortOrder': vsc.SortOrder.Default, - 'criteria': [ - self._query_filter_criteria( - vsc.FilterType.Target, 'Microsoft.VisualStudio.Code'), - self._query_filter_criteria( - vsc.FilterType.ExcludeWithFlags, str(int(vsc.QueryFlags.Unpublished))) - ] - } - - if filtervalue != '': - result['criteria'].append( - self._query_filter_criteria(filtertype, filtervalue) - ) - - return result - - def _query_filter_criteria(self, filtertype, queryvalue): - return { - 'filterType': int(filtertype), - 'value': queryvalue - } - - def _query_flags(self): - # return QueryFlags(914) - return vsc.QueryFlags.IncludeFiles | vsc.QueryFlags.IncludeVersionProperties | vsc.QueryFlags.IncludeAssetUri | \ - vsc.QueryFlags.IncludeStatistics | vsc.QueryFlags.IncludeLatestVersionOnly - - def _headers(self): - if self.insider: - insider = '-insider' - else: - insider = '' - return { - 'content-type': 'application/json', - 'accept': 'application/json;api-version=3.0-preview.1', - 'accept-encoding': 'gzip, deflate, br', - 'User-Agent': f'VSCode {self.version}{insider}', - 'x-market-client-Id': f'VSCode {self.version}{insider}', - 'x-market-user-Id': str(uuid.uuid4()) - } - - def __repr__(self): - strs = f"<{self.__class__.__name__}>" - return strs - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='Synchronises VSCode in an Offline Environment') - parser.add_argument('--sync', dest='sync', action='store_true', - help='The basic-user sync. It includes stable binaries and typical extensions') - parser.add_argument('--syncall', dest='syncall', action='store_true', - help='The power-user sync. It includes all binaries and extensions ') - parser.add_argument('--artifacts', dest='artifactdir', - default='../artifacts/', help='Path to downloaded artifacts') - parser.add_argument('--frequency', dest='frequency', default=None, - help='The frequency to try and update (e.g. sleep for \'12h\' and try again') - - # Arguments to tweak behaviour - parser.add_argument('--check-binaries', dest='checkbinaries', - action='store_true', help='Check for updated binaries') - parser.add_argument('--check-insider', dest='checkinsider', - action='store_true', help='Check for updated insider binaries') - parser.add_argument('--check-recommended-extensions', dest='checkextensions', - action='store_true', help='Check for recommended extensions') - parser.add_argument('--check-specified-extensions', dest='checkspecified', - action='store_true', help='Check for extensions in /specified.json') - parser.add_argument('--extension-name', dest='extensionname', - help='Find a specific extension by name') - parser.add_argument('--extension-search', dest='extensionsearch', - help='Search for a set of extensions') - parser.add_argument('--prerelease-extensions', dest='prerelease', - action='store_true', help='Download prerelease extensions. Defaults to false.') - parser.add_argument('--update-binaries', dest='updatebinaries', - action='store_true', help='Download binaries') - parser.add_argument('--update-extensions', dest='updateextensions', - action='store_true', help='Download extensions') - parser.add_argument('--update-malicious-extensions', dest='updatemalicious', - action='store_true', help='Update the malicious extension list') - parser.add_argument('--skip-binaries', dest='skipbinaries', - action='store_true', help='Skip downloading binaries') - parser.add_argument('--vscode-version', dest='version', - default='1.69.2', help='VSCode version to search extensions as.') - parser.add_argument('--total-recommended', type=int, dest='totalrecommended', default=500, - help='Total number of recommended extensions to sync from Search API. Defaults to 500') - parser.add_argument('--debug', dest='debug', - action='store_true', help='Show debug output') - parser.add_argument('--logfile', dest='logfile', default=None, - help='Sets a logfile to store loggging output') - config = parser.parse_args() - - if config.debug: - loglevel = logging.DEBUG - else: - loglevel = logging.INFO - - if config.logfile: - log_dir = os.path.dirname(os.path.abspath(config.logfile)) - if not os.path.exists(log_dir): - raise FileNotFoundError( - f'Log directory does not exist at {log_dir}') - logging.basicConfig(filename=config.logfile, encoding='utf-8', level=loglevel) - else: - log.basicConfig( - format='[%(levelname)1.1s %(asctime)s %(module)s:%(lineno)d] %(message)s', - datefmt='%y%m%d %H:%M:%S', - level=loglevel - ) - - config.artifactdir_installers = os.path.join( - os.path.abspath(config.artifactdir), 'installers') - config.artifactdir_extensions = os.path.join( - os.path.abspath(config.artifactdir), 'extensions') - - if config.sync or config.syncall: - config.checkbinaries = True - config.checkextensions = True - config.updatebinaries = True - config.updateextensions = True - config.updatemalicious = True - config.checkspecified = True - if not config.frequency: - config.frequency = '12h' - - if config.syncall: - config.extensionsearch = '*' - config.checkinsider = True - - if config.artifactdir: - if not os.path.isdir(config.artifactdir): - raise FileNotFoundError( - f'Artifact directory does not exist at {config.artifactdir}') - - if config.updatebinaries and not config.checkbinaries: - config.checkbinaries = True - - if config.frequency: - config.frequency = timeparse(config.frequency) - - session = requests.Session() - retries = Retry(total=5, - backoff_factor=0.1, - status_forcelist=[ 500, 502, 503, 504 ]) - session.mount('https://', HTTPAdapter(max_retries=retries)) - - while True: - versions = [] - extensions = {} - mp = VSCMarketplace(config.checkinsider, - config.prerelease, config.version, session) - - if config.checkbinaries and not config.skipbinaries: - log.info('Syncing VS Code Update Versions') - versions = VSCUpdates.latest_versions(config.checkinsider) - - if config.updatebinaries and not config.skipbinaries: - log.info('Syncing VS Code Binaries') - for idkey in versions: - if versions[idkey].updateurl: - result = versions[idkey].download_update( - config.artifactdir_installers) - - # Only save the reference json if the download was successful - if result: - versions[idkey].save_state( - config.artifactdir_installers) - - if config.checkspecified: - log.info('Syncing VS Code Specified Extensions') - specifiedpath = os.path.join(os.path.abspath( - config.artifactdir), 'specified.json') - specified = mp.get_specified(specifiedpath) - if specified: - for item in specified: - log.info(item) - extensions[item.identity] = item - - if config.extensionsearch: - log.info( - f'Searching for VS Code Extension: {config.extensionsearch}') - results = mp.search_by_text(config.extensionsearch) - log.info(f'Found {len(results)} extensions') - for item in results: - log.debug(item) - extensions[item.identity] = item - - if config.extensionname: - log.info( - f'Checking Specific VS Code Extension: {config.extensionname}') - result = mp.search_by_extension_name(config.extensionname) - if result: - extensions[result.identity] = result - - if config.checkextensions: - log.info('Syncing VS Code Recommended Extensions') - recommended = mp.get_recommendations(os.path.abspath( - config.artifactdir), config.totalrecommended) - for item in recommended: - extensions[item.identity] = item - - if config.updatemalicious: - log.info('Syncing VS Code Malicious Extension List') - malicious = mp.get_malicious( - os.path.abspath(config.artifactdir), extensions) - - if config.updateextensions: - log.info( - f'Checking and Downloading Updates for {len(extensions)} Extensions') - count = 0 - bonus = [] - for identity in extensions: - log.debug(f'Fetching extension: {identity}') - if count % 100 == 0: - log.info( - f'Progress {count}/{len(extensions)} ({count/len(extensions)*100:.1f}%)') - extensions[identity].download_assets( - config.artifactdir_extensions, session) - bonus = extensions[identity].process_embedded_extensions( - config.artifactdir_extensions, mp) + bonus - extensions[identity].save_state(config.artifactdir_extensions) - count = count + 1 - - for bonusextension in bonus: - log.debug(f'Processing Embedded Extension: {bonusextension}') - bonusextension.download_assets(config.artifactdir_extensions, session) - bonusextension.save_state(config.artifactdir_extensions) - - # Check if we did anything - if config.checkbinaries or config.checkextensions or config.updatebinaries or config.updateextensions or config.updatemalicious or config.checkspecified or config.checkinsider: - log.info('Complete') - VSCUpdates.signal_updated(os.path.abspath(config.artifactdir)) - - # Check if we need to sleep - if config.frequency: - log.info( - f'Going to sleep for {vsc.Utility.seconds_to_human_time(config.frequency)}') - time.sleep(config.frequency) - else: - break - else: - log.info('Nothing to do') - break - +from dataclasses import dataclass +import os +import sys +import argparse +import requests +import pathlib +import hashlib +import uuid +import logging +import json +import time +import datetime +from typing import List +from platform import release +import logging as log +from pytimeparse.timeparse import timeparse +import vsc +from distutils.dir_util import create_tree +from requests.adapters import HTTPAdapter, Retry + + +class VSCUpdateDefinition(object): + + def __init__(self, platform=None, architecture=None, buildtype=None, quality=None, + updateurl=None, name=None, version=None, productVersion=None, + hashs=None, timestamp=None, sha256hash=None, supportsFastUpdate=None): + + if not vsc.Utility.validate_platform(platform): + raise ValueError(f"Platform {platform} invalid or not implemented") + + if not vsc.Utility.validate_architecture(architecture): + raise ValueError( + f"Architecture {architecture} invalid or not implemented") + + if not vsc.Utility.validate_buildtype(buildtype): + raise ValueError( + f"Buildtype {buildtype} invalid or not implemented") + + if not vsc.Utility.validate_quality(quality): + raise ValueError(f"Quality {quality} invalid or not implemented") + + self.identity = platform + + if architecture: + self.identity += f'-{architecture}' + if buildtype: + self.identity += f'-{buildtype}' + + self.platform = platform + self.architecture = architecture + self.buildtype = buildtype + self.quality = quality + self.updateurl = updateurl + self.name = name + self.version = version + self.productVersion = productVersion + self.hash = hashs + self.timestamp = timestamp + self.sha256hash = sha256hash + self.supportsFastUpdate = supportsFastUpdate + self.checkedForUpdate = False + + def check_for_update(self, old_commit_id=None): + if not old_commit_id: + # To trigger the API to delta + old_commit_id = '7c4205b5c6e52a53b81c69d2b2dc8a627abaa0ba' + + url = vsc.URL_BINUPDATES + \ + f"{self.identity}/{self.quality}/{old_commit_id}" + + log.debug(f'Update url {url}') + result = requests.get(url, allow_redirects=True, timeout=vsc.TIMEOUT) + self.checkedForUpdate = True + + if result.status_code == 204: + # No update available + return False + elif result.status_code != 200: + # Unhandled response from API + log.warning( + f"Update url failed {url}. Unhandled status code {result.status_code}") + return False + + jresult = result.json() + + self.updateurl = jresult['url'] + self.name = jresult['name'] + self.version = jresult['version'] + self.productVersion = jresult['productVersion'] + self.hash = jresult['hash'] + self.timestamp = jresult['timestamp'] + self.sha256hash = jresult['sha256hash'] + + if 'supportsFastUpdate' in jresult: + self.supportsFastUpdate = jresult['supportsFastUpdate'] + if self.updateurl: + return True + else: + return False + + def download_update(self, destination): + if not self.checkedForUpdate: + log.warning( + 'Cannot download update if the update definition has not been downloaded') + return + if not self.updateurl: + log.warning( + 'Cannot download update if there is no url to download from') + return + + destination = os.path.join(destination, self.identity, self.quality) + if not os.path.isdir(destination): + os.makedirs(destination) + suffix = pathlib.Path(self.updateurl).suffix + if '.gz' in suffix: + suffix = ''.join(pathlib.Path(self.updateurl).suffixes) + destfile = os.path.join(destination, f'vscode-{self.name}{suffix}') + + if os.path.exists(destfile) and vsc.Utility.hash_file_and_check(destfile, self.sha256hash): + log.debug(f'Previously downloaded {self}') + else: + log.info(f'Downloading {self} to {destfile}') + result = requests.get( + self.updateurl, allow_redirects=True, timeout=vsc.TIMEOUT) + open(destfile, 'wb').write(result.content) + + if not vsc.Utility.hash_file_and_check(destfile, self.sha256hash): + log.warning( + f'HASH MISMATCH for {self} at {destfile} expected {self.sha256hash}. Removing local file.') + os.remove(destfile) + return False + log.debug(f'Hash ok for {self} with {self.sha256hash}') + return True + + def save_state(self, destination): + destination = os.path.join(destination, self.identity) + if not os.path.isdir(destination): + os.makedirs(destination) + # Write version details blob as latest + vsc.Utility.write_json(os.path.join( + destination, self.quality, 'latest.json'), self) + # Write version details blob as the commit id + if self.version: + vsc.Utility.write_json(os.path.join( + destination, self.quality, f'{self.version}.json'), self) + + def __repr__(self): + strs = f"<{self.__class__.__name__}> {self.quality}/{self.identity}" + if self.updateurl: + strs += f" - Version: {self.name} ({self.version})" + elif self.checkedForUpdate: + strs += f" - Latest version not available" + return strs + + +@dataclass +class File: + assetType: str + source: str + + @staticmethod + def from_dict(obj) -> 'File': + _assetType = str(obj.get("assetType")) + _source = str(obj.get("source")) + return File(_assetType, _source) + + +@dataclass +class Property: + key: str + value: str + + @staticmethod + def from_dict(obj) -> 'Property': + _key = str(obj.get("key")) + _value = str(obj.get("value")) + return Property(_key, _value) + + +@dataclass +class VSCExtensionVersionDefinition: + + version: str + flags: str + lastUpdated: str + files: List[File] + properties: List[Property] + assetUri: str + fallbackAssetUri: str + + @staticmethod + def from_dict(obj) -> 'VSCExtensionVersionDefinition': + _version = str(obj.get("version")) + _flags = str(obj.get("flags")) + _lastUpdated = str(obj.get("lastUpdated")) + _files = [File.from_dict(y) for y in obj.get("files")] + _properties = [Property.from_dict(y) for y in obj.get("properties")] if obj.get( + "properties") else [] # older versions do not have properties so we need to set to empty array + _assetUri = str(obj.get("assetUri")) + _fallbackAssetUri = str(obj.get("fallbackAssetUri")) + return VSCExtensionVersionDefinition(_version, _flags, _lastUpdated, _files, _properties, _assetUri, _fallbackAssetUri) + + def isprerelease(self): + prerelease = False + for property in self.properties: + # if property["key"] == "Microsoft.VisualStudio.Code.PreRelease" and property["value"] == "true": + if property.key == "Microsoft.VisualStudio.Code.PreRelease" and property.value == "true": + prerelease = True + return prerelease + + def __repr__(self): + strs = f"<{self.__class__.__name__}> {self.version} ({self.lastUpdate}) - Version: {self.version}" + return strs + + +class VSCExtensionDefinition(object): + + def __init__(self, identity, raw=None): + self.identity = identity + self.extensionId = None + self.recommended = False + self.versions: List[VSCExtensionVersionDefinition] = [] + if raw: + self.__dict__.update(raw) + if 'extensionId' in raw: + self.extensionId = raw['extensionId'] + + def download_assets(self, destination, session): + for version in self.versions: + targetplatform = '' + if "targetPlatform" in version: + targetplatform = version["targetPlatform"] + ver_destination = os.path.join( + destination, self.identity, version["version"], targetplatform) + for file in version["files"]: + url = file["source"] + if not url: + log.warning( + 'download_asset() cannot download update as asset url is missing') + return + asset = file["assetType"] + destfile = os.path.join(ver_destination, f'{asset}') + create_tree(os.path.abspath(os.sep), (destfile,)) + if not os.path.exists(destfile): + for i in range(5): + try: + if i == 0: + log.debug(f'Downloading {self.identity} {asset} to {destfile}') + else: + log.info(f'Retrying {i+1}, download {self.identity} {asset} to {destfile}') + result = session.get( + url, allow_redirects=True, timeout=vsc.TIMEOUT) + with open(destfile, 'wb') as dest: + dest.write(result.content) + break + except requests.exceptions.ProxyError: + log.info("ProxyError: Retrying.") + except requests.exceptions.ReadTimeout: + log.info("ReadTimeout: Retrying.") + + def process_embedded_extensions(self, destination, mp): + """ + Check an extension's Manifest for an extension pack (e.g. more extensions to download) + """ + bonusextensions = [] + for version in self.versions: + targetplatform = '' + if "targetPlatform" in version: + targetplatform = version["targetPlatform"] + manifestpath = os.path.join( + destination, self.identity, version["version"], targetplatform, 'Microsoft.VisualStudio.Code.Manifest') + manifest = vsc.Utility.load_json(manifestpath) + if manifest and 'extensionPack' in manifest: + for extname in manifest['extensionPack']: + bonusextension = mp.search_by_extension_name(extname) + if bonusextension: + bonusextensions.append(bonusextension) + return bonusextensions + + def save_state(self, destination): + destination = os.path.join(destination, self.identity) + if not os.path.isdir(destination): + os.makedirs(destination) + # Save as latest + with open(os.path.join(destination, 'latest.json'), 'w') as outfile: + json.dump(self, outfile, cls=vsc.MagicJsonEncoder, indent=4) + # Save in the version folder + for version in self.versions: + with open(os.path.join(destination, version["version"], 'extension.json'), 'w') as outfile: + json.dump(self, outfile, cls=vsc.MagicJsonEncoder, indent=4) + + def isprerelease(self): + prerelease = False + if "properties" in self.versions[0].keys(): + for property in self.versions[0]["properties"]: + if property["key"] == "Microsoft.VisualStudio.Code.PreRelease" and property["value"] == "true": + prerelease = True + return prerelease + + def get_latest_release_versions(self): + if self.versions and len(self.versions) > 1: + releaseVersions = list(filter(lambda x: VSCExtensionVersionDefinition.from_dict( + x).isprerelease() == False, self.versions)) + if(len(releaseVersions) > 0): + releaseVersions.sort( + reverse=True, key=lambda x: x["lastUpdated"]) + latestversion = releaseVersions[0]["version"] + + filteredversions = [] + for version in releaseVersions: + if version["version"] == latestversion: + filteredversions.append(version) + + return filteredversions + return self.versions + + def version(self): + if self.versions and len(self.versions) > 1: + return ";".join(list(map(lambda x: x['version'], self.versions))) + return self.versions[0]['version'] + + def set_recommended(self): + self.recommended = True + + def __repr__(self): + strs = f"<{self.__class__.__name__}> {self.identity} ({self.extensionId}) - Version: {self.version()}" + return strs + + +class VSCUpdates(object): + + @staticmethod + def latest_versions(insider=False): + versions = {} + for platform in vsc.PLATFORMS: + for architecture in vsc.ARCHITECTURES: + for buildtype in vsc.BUILDTYPES: + for quality in vsc.QUALITIES: + if quality == 'insider' and not insider: + continue + if platform == 'win32' and architecture == 'ia32': + continue + if platform == 'darwin' and (architecture != '' or buildtype != ''): + continue + if 'linux' in platform and (architecture == '' or buildtype != ''): + continue + ver = VSCUpdateDefinition( + platform, architecture, buildtype, quality) + ver.check_for_update() + log.info(ver) + versions[f'{ver.identity}-{ver.quality}'] = ver + return versions + + @staticmethod + def signal_updated(artifactdir): + signalpath = os.path.join(artifactdir, 'updated.json') + result = { + 'updated': datetime.datetime.utcnow() + } + with open(signalpath, 'w') as outfile: + json.dump(result, outfile, cls=vsc.MagicJsonEncoder, indent=4) + + +class VSCMarketplace(object): + + def __init__(self, insider, prerelease, version, session): + self.insider = insider + self.prerelease = prerelease + self.version = version + self.session = session + self.backoff = 1 + + def get_recommendations(self, destination, totalrecommended): + recommendations = self.search_top_n(totalrecommended) + recommended_old = self.get_recommendations_old(destination) + + for extension in recommendations: + # If the extension has already been found then prevent it from being collected again when processing the old recommendation list + if extension.identity in recommended_old.keys(): + del recommended_old[extension.identity] + + for packagename in recommended_old: + extension = self.search_by_extension_name(packagename) + if extension: + recommendations.append(extension) + else: + log.debug( + f'get_recommendations failed finding a recommended extension by name for {packagename}. This extension has likely been removed.') + + prereleasecount = 0 + for recommendation in recommendations: + recommendation.set_recommended() + # If the found extension is a prerelease version search for the next available release version + if not self.prerelease and recommendation.isprerelease(): + prereleasecount += 1 + extension = self.search_release_by_extension_id( + recommendation.extensionId) + if extension: + recommendation.versions = extension.get_latest_release_versions() + return recommendations + + def get_recommendations_old(self, destination): + result = self.session.get(vsc.URL_RECOMMENDATIONS, + allow_redirects=True, timeout=vsc.TIMEOUT) + if result.status_code != 200: + log.warning( + f"get_recommendations failed accessing url {vsc.URL_RECOMMENDATIONS}, unhandled status code {result.status_code}") + return False + + jresult = result.json() + with open(os.path.join(destination, 'recommendations.json'), 'w') as outfile: + json.dump(jresult, outfile, cls=vsc.MagicJsonEncoder, indent=4) + + # To dict to remove duplicates + packages = {} + for recommendation in jresult['workspaceRecommendations']: + for package in recommendation['recommendations']: + packages[package] = None + + return packages + + def get_malicious(self, destination, extensions=None): + result = self.session.get( + vsc.URL_MALICIOUS, allow_redirects=True, timeout=vsc.TIMEOUT) + if result.status_code != 200: + log.warning( + f"get_malicious failed accessing url {vsc.URL_MALICIOUS}, unhandled status code {result.status_code}") + return False + # Remove random utf-8 nbsp from server response + stripped = result.content.decode( + 'utf-8', 'ignore').replace(u'\xa0', u'') + jresult = json.loads(stripped) + with open(os.path.join(destination, 'malicious.json'), 'w') as outfile: + json.dump(jresult, outfile, cls=vsc.MagicJsonEncoder, indent=4) + + if not extensions: + return + + for malicious in jresult['malicious']: + log.debug(f'Malicious extension {malicious}') + if malicious in extensions.keys(): + log.warning( + f'Preventing malicious extension {malicious} from being downloaded') + del extensions[malicious] + + def get_specified(self, specifiedpath): + if not os.path.exists(specifiedpath): + result = { + 'extensions': [] + } + with open(specifiedpath, 'w') as outfile: + json.dump(result, outfile, cls=vsc.MagicJsonEncoder, indent=4) + log.info( + f'Created empty list of custom extensions to mirror at {specifiedpath}') + return + else: + with open(specifiedpath, 'r') as fp: + specifiedextensions = json.load(fp) + if specifiedextensions and 'extensions' in specifiedextensions: + specified = [] + for packagename in specifiedextensions['extensions']: + extension = self.search_by_extension_name(packagename) + if extension: + log.info(f'Adding extension to mirror {packagename}') + specified.append(extension) + else: + log.debug( + f'get_custom failed finding a recommended extension by name for {packagename}. This extension has likely been removed.') + return specified + + def search_by_text(self, searchtext): + if searchtext == '*': + searchtext = '' + + return self._query_marketplace(vsc.FilterType.SearchText, searchtext) + + def search_top_n(self, n=200): + log.info(f'Searching for top {n} recommended extensions') + return self._query_marketplace(vsc.FilterType.SearchText, '', limit=n, sortOrder=vsc.SortOrder.Descending, sortBy=vsc.SortBy.InstallCount) + + def search_by_extension_id(self, extensionid): + result = self._query_marketplace( + vsc.FilterType.ExtensionId, extensionid) + if result and len(result) == 1: + return result[0] + else: + log.warning(f"search_by_extension_id failed {extensionid}") + return False + + def search_by_extension_name(self, extensionname): + if self.prerelease: + result = self._query_marketplace( + vsc.FilterType.ExtensionName, extensionname) + else: + releaseQueryFlags = vsc.QueryFlags.IncludeFiles | vsc.QueryFlags.IncludeVersionProperties | vsc.QueryFlags.IncludeAssetUri | \ + vsc.QueryFlags.IncludeStatistics | vsc.QueryFlags.IncludeStatistics | vsc.QueryFlags.IncludeVersions + result = self._query_marketplace( + vsc.FilterType.ExtensionName, extensionname, queryFlags=releaseQueryFlags) + if result and len(result) == 1: + result[0].versions = result[0].get_latest_release_versions() + + if result and len(result) == 1: + return result[0] + else: + #log.debug(f"search_by_extension_name failed {extensionname} got {result}") + return False + + def search_release_by_extension_id(self, extensionid): + log.debug( + f'Searching for release candidate by extensionId: {extensionid}') + releaseQueryFlags = vsc.QueryFlags.IncludeFiles | vsc.QueryFlags.IncludeVersionProperties | vsc.QueryFlags.IncludeAssetUri | \ + vsc.QueryFlags.IncludeStatistics | vsc.QueryFlags.IncludeStatistics | vsc.QueryFlags.IncludeVersions + result = self._query_marketplace( + vsc.FilterType.ExtensionId, extensionid, queryFlags=releaseQueryFlags) + if result and len(result) == 1: + return result[0] + else: + log.warning(f"search_release_by_extension_id failed {extensionid}") + return False + + def backoff_reset(self): + self.backoff = 1 + + def backoff_sleep(self): + time.sleep(self.backoff) + self.backoff *= 2 + + def _query_marketplace(self, filtertype, filtervalue, pageNumber=0, pageSize=500, limit=0, sortOrder=vsc.SortOrder.Default, sortBy=vsc.SortBy.NoneOrRelevance, queryFlags=0): + extensions = {} + total = 0 + count = 0 + + if 0 < limit < pageSize: + pageSize = limit + + while count <= total: + # log.debug(f'Query marketplace count {count} / total {total} - pagenumber {pageNumber}, pagesize {pageSize}') + pageNumber = pageNumber + 1 + query = self._query(filtertype, filtervalue, + pageNumber, pageSize, queryFlags) + result = None + i = 0 + while i < 10: + try: + result = self.session.post(vsc.URL_MARKETPLACEQUERY, headers=self._headers( + ), json=query, allow_redirects=True, timeout=vsc.TIMEOUT) + if result: + self.backoff_reset() + break + elif result.status_code == 429: + # Server is rate limiting us. Backoff. + self.backoff_sleep() + continue + except requests.exceptions.ProxyError: + log.info("ProxyError: Retrying.") + except requests.exceptions.ReadTimeout: + log.info("ReadTimeout: Retrying.") + i += 1 + if i < 10: + log.info("Retrying pull page %d attempt %d." % + (pageNumber, i+1)) + if not result: + log.info("Failed 10 attempts to query marketplace. Giving up.") + break + jresult = result.json() + count = count + pageSize + if 'results' in jresult: + for jres in jresult['results']: + for extension in jres['extensions']: + identity = extension['publisher']['publisherName'] + \ + '.' + extension['extensionName'] + mpd = VSCExtensionDefinition( + identity=identity, raw=extension) + extensions[identity] = mpd + + if 'resultMetadata' in jres: + for resmd in jres['resultMetadata']: + if 'ResultCount' in resmd['metadataType']: + total = resmd['metadataItems'][0]['count'] + if limit > 0 and count >= limit: + break + + return list(extensions.values()) + + def _query(self, filtertype, filtervalue, pageNumber, pageSize, queryFlags=0): + if queryFlags == 0: + queryFlags = self._query_flags() + payload = { + 'assetTypes': [], + 'filters': [self._query_filter(filtertype, filtervalue, pageNumber, pageSize)], + 'flags': int(queryFlags) + } + return payload + + def _query_filter(self, filtertype, filtervalue, pageNumber, pageSize): + result = { + 'pageNumber': pageNumber, + 'pageSize': pageSize, + 'sortBy': vsc.SortBy.NoneOrRelevance, + 'sortOrder': vsc.SortOrder.Default, + 'criteria': [ + self._query_filter_criteria( + vsc.FilterType.Target, 'Microsoft.VisualStudio.Code'), + self._query_filter_criteria( + vsc.FilterType.ExcludeWithFlags, str(int(vsc.QueryFlags.Unpublished))) + ] + } + + if filtervalue != '': + result['criteria'].append( + self._query_filter_criteria(filtertype, filtervalue) + ) + + return result + + def _query_filter_criteria(self, filtertype, queryvalue): + return { + 'filterType': int(filtertype), + 'value': queryvalue + } + + def _query_flags(self): + # return QueryFlags(914) + return vsc.QueryFlags.IncludeFiles | vsc.QueryFlags.IncludeVersionProperties | vsc.QueryFlags.IncludeAssetUri | \ + vsc.QueryFlags.IncludeStatistics | vsc.QueryFlags.IncludeLatestVersionOnly + + def _headers(self): + if self.insider: + insider = '-insider' + else: + insider = '' + return { + 'content-type': 'application/json', + 'accept': 'application/json;api-version=3.0-preview.1', + 'accept-encoding': 'gzip, deflate, br', + 'User-Agent': f'VSCode {self.version}{insider}', + 'x-market-client-Id': f'VSCode {self.version}{insider}', + 'x-market-user-Id': str(uuid.uuid4()) + } + + def __repr__(self): + strs = f"<{self.__class__.__name__}>" + return strs + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Synchronises VSCode in an Offline Environment') + parser.add_argument('--sync', dest='sync', action='store_true', + help='The basic-user sync. It includes stable binaries and typical extensions') + parser.add_argument('--syncall', dest='syncall', action='store_true', + help='The power-user sync. It includes all binaries and extensions ') + parser.add_argument('--artifacts', dest='artifactdir', + default='../artifacts/', help='Path to downloaded artifacts') + parser.add_argument('--frequency', dest='frequency', default=None, + help='The frequency to try and update (e.g. sleep for \'12h\' and try again)') + + # Arguments to tweak behaviour + parser.add_argument('--check-binaries', dest='checkbinaries', + action='store_true', help='Check for updated binaries') + parser.add_argument('--check-insider', dest='checkinsider', + action='store_true', help='Check for updated insider binaries') + parser.add_argument('--check-recommended-extensions', dest='checkextensions', + action='store_true', help='Check for recommended extensions') + parser.add_argument('--check-specified-extensions', dest='checkspecified', + action='store_true', help='Check for extensions in /specified.json') + parser.add_argument('--extension-name', dest='extensionname', + help='Find a specific extension by name') + parser.add_argument('--extension-search', dest='extensionsearch', + help='Search for a set of extensions') + parser.add_argument('--prerelease-extensions', dest='prerelease', + action='store_true', help='Download prerelease extensions. Defaults to false.') + parser.add_argument('--update-binaries', dest='updatebinaries', + action='store_true', help='Download binaries') + parser.add_argument('--update-extensions', dest='updateextensions', + action='store_true', help='Download extensions') + parser.add_argument('--update-malicious-extensions', dest='updatemalicious', + action='store_true', help='Update the malicious extension list') + parser.add_argument('--skip-binaries', dest='skipbinaries', + action='store_true', help='Skip downloading binaries') + parser.add_argument('--vscode-version', dest='version', + default='1.69.2', help='VSCode version to search extensions as.') + parser.add_argument('--total-recommended', type=int, dest='totalrecommended', default=500, + help='Total number of recommended extensions to sync from Search API. Defaults to 500') + parser.add_argument('--debug', dest='debug', + action='store_true', help='Show debug output') + parser.add_argument('--logfile', dest='logfile', default=None, + help='Sets a logfile to store loggging output') + config = parser.parse_args() + + if config.debug: + loglevel = logging.DEBUG + else: + loglevel = logging.INFO + + if config.logfile: + log_dir = os.path.dirname(os.path.abspath(config.logfile)) + if not os.path.exists(log_dir): + raise FileNotFoundError( + f'Log directory does not exist at {log_dir}') + logging.basicConfig(filename=config.logfile, encoding='utf-8', level=loglevel) + else: + log.basicConfig( + format='[%(levelname)1.1s %(asctime)s %(module)s:%(lineno)d] %(message)s', + datefmt='%y%m%d %H:%M:%S', + level=loglevel + ) + + config.artifactdir_installers = os.path.join( + os.path.abspath(config.artifactdir), 'installers') + config.artifactdir_extensions = os.path.join( + os.path.abspath(config.artifactdir), 'extensions') + + if config.sync or config.syncall: + config.checkbinaries = True + config.checkextensions = True + config.updatebinaries = True + config.updateextensions = True + config.updatemalicious = True + config.checkspecified = True + if not config.frequency: + config.frequency = '12h' + + if config.syncall: + config.extensionsearch = '*' + config.checkinsider = True + + if config.artifactdir: + if not os.path.isdir(config.artifactdir): + raise FileNotFoundError( + f'Artifact directory does not exist at {config.artifactdir}') + + if config.updatebinaries and not config.checkbinaries: + config.checkbinaries = True + + if config.frequency: + config.frequency = timeparse(config.frequency) + + session = requests.Session() + retries = Retry(total=5, + backoff_factor=0.1, + status_forcelist=[ 500, 502, 503, 504 ]) + session.mount('https://', HTTPAdapter(max_retries=retries)) + + while True: + versions = [] + extensions = {} + mp = VSCMarketplace(config.checkinsider, + config.prerelease, config.version, session) + + if config.checkbinaries and not config.skipbinaries: + log.info('Syncing VS Code Update Versions') + versions = VSCUpdates.latest_versions(config.checkinsider) + + if config.updatebinaries and not config.skipbinaries: + log.info('Syncing VS Code Binaries') + for idkey in versions: + if versions[idkey].updateurl: + result = versions[idkey].download_update( + config.artifactdir_installers) + + # Only save the reference json if the download was successful + if result: + versions[idkey].save_state( + config.artifactdir_installers) + + if config.checkspecified: + log.info('Syncing VS Code Specified Extensions') + specifiedpath = os.path.join(os.path.abspath( + config.artifactdir), 'specified.json') + specified = mp.get_specified(specifiedpath) + if specified: + for item in specified: + log.info(item) + extensions[item.identity] = item + + if config.extensionsearch: + log.info( + f'Searching for VS Code Extension: {config.extensionsearch}') + results = mp.search_by_text(config.extensionsearch) + log.info(f'Found {len(results)} extensions') + for item in results: + log.debug(item) + extensions[item.identity] = item + + if config.extensionname: + log.info( + f'Checking Specific VS Code Extension: {config.extensionname}') + result = mp.search_by_extension_name(config.extensionname) + if result: + extensions[result.identity] = result + + if config.checkextensions: + log.info('Syncing VS Code Recommended Extensions') + recommended = mp.get_recommendations(os.path.abspath( + config.artifactdir), config.totalrecommended) + for item in recommended: + extensions[item.identity] = item + + if config.updatemalicious: + log.info('Syncing VS Code Malicious Extension List') + malicious = mp.get_malicious( + os.path.abspath(config.artifactdir), extensions) + + if config.updateextensions: + log.info( + f'Checking and Downloading Updates for {len(extensions)} Extensions') + count = 0 + bonus = [] + for identity in extensions: + log.debug(f'Fetching extension: {identity}') + if count % 100 == 0: + log.info( + f'Progress {count}/{len(extensions)} ({count/len(extensions)*100:.1f}%)') + extensions[identity].download_assets( + config.artifactdir_extensions, session) + bonus = extensions[identity].process_embedded_extensions( + config.artifactdir_extensions, mp) + bonus + extensions[identity].save_state(config.artifactdir_extensions) + count = count + 1 + + for bonusextension in bonus: + log.debug(f'Processing Embedded Extension: {bonusextension}') + bonusextension.download_assets(config.artifactdir_extensions, session) + bonusextension.save_state(config.artifactdir_extensions) + + # Check if we did anything + if config.checkbinaries or config.checkextensions or config.updatebinaries or config.updateextensions or config.updatemalicious or config.checkspecified or config.checkinsider: + log.info('Complete') + VSCUpdates.signal_updated(os.path.abspath(config.artifactdir)) + + # Check if we need to sleep + if config.frequency: + log.info( + f'Going to sleep for {vsc.Utility.seconds_to_human_time(config.frequency)}') + time.sleep(config.frequency) + else: + break + else: + log.info('Nothing to do') + break + From a5c8b78e72677bb058fda7ba11e42b88dff69169 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 11:33:23 +0200 Subject: [PATCH 05/27] Remove output to "malicious.json" As the file is not used anywhere else, removed the output to it which simplifies (and presumably slightly speeds up) the method. --- vscoffline/sync.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index c55bef3..941f42d 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -419,7 +419,11 @@ def get_recommendations_old(self, destination): return packages - def get_malicious(self, destination, extensions=None): + def get_malicious(self, extensions=None): + if not extensions: + return + + # Query Microsofts list result = self.session.get( vsc.URL_MALICIOUS, allow_redirects=True, timeout=vsc.TIMEOUT) if result.status_code != 200: @@ -430,11 +434,6 @@ def get_malicious(self, destination, extensions=None): stripped = result.content.decode( 'utf-8', 'ignore').replace(u'\xa0', u'') jresult = json.loads(stripped) - with open(os.path.join(destination, 'malicious.json'), 'w') as outfile: - json.dump(jresult, outfile, cls=vsc.MagicJsonEncoder, indent=4) - - if not extensions: - return for malicious in jresult['malicious']: log.debug(f'Malicious extension {malicious}') @@ -800,7 +799,7 @@ def __repr__(self): if config.updatemalicious: log.info('Syncing VS Code Malicious Extension List') malicious = mp.get_malicious( - os.path.abspath(config.artifactdir), extensions) + extensions) if config.updateextensions: log.info( From bb29181e4886830edf5923bdc6be55283abaae94 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 11:34:55 +0200 Subject: [PATCH 06/27] Change test order of malicious extensions Instead of iterating over all malicious extensions (which can be a rather large list), iterate over all extensions that are to be downloaded. --- vscoffline/sync.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 941f42d..6c66107 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -435,12 +435,11 @@ def get_malicious(self, extensions=None): 'utf-8', 'ignore').replace(u'\xa0', u'') jresult = json.loads(stripped) - for malicious in jresult['malicious']: - log.debug(f'Malicious extension {malicious}') - if malicious in extensions.keys(): + for extension in extensions[:]: # Iterate over a copy of the extension collection + if extension in jresult['malicious']: log.warning( - f'Preventing malicious extension {malicious} from being downloaded') - del extensions[malicious] + f'Preventing malicious extension {extension} from being downloaded') + del extensions[extension] def get_specified(self, specifiedpath): if not os.path.exists(specifiedpath): From 2b7f07a3e6316b436fc6374bdc31b438895e5539 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 11:41:04 +0200 Subject: [PATCH 07/27] Detect latest VS Code version if argument is left empty This increases the runtime as the web request must be executed first. But the version will always be the latest. As it is only used for the user agent it shouldn't event matter what version it is. --- vscoffline/sync.py | 17 +++++++++++++++-- vscoffline/vscsync/requirements.txt | 7 ++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 6c66107..49dfd25 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -4,7 +4,6 @@ import argparse import requests import pathlib -import hashlib import uuid import logging import json @@ -17,6 +16,7 @@ import vsc from distutils.dir_util import create_tree from requests.adapters import HTTPAdapter, Retry +from packaging.version import Version class VSCUpdateDefinition(object): @@ -351,6 +351,19 @@ def latest_versions(insider=False): versions[f'{ver.identity}-{ver.quality}'] = ver return versions + @staticmethod + def latest_version(insider=False): + versions = VSCUpdates.latest_versions(insider) + latestVersion = Version('0.0.0') + for version in versions.items(): + productVersion = version[1].productVersion + if not productVersion: + break + productVersion = Version(productVersion) + if productVersion > latestVersion: + latestVersion = productVersion + return str(latestVersion) + @staticmethod def signal_updated(artifactdir): signalpath = os.path.join(artifactdir, 'updated.json') @@ -677,7 +690,7 @@ def __repr__(self): parser.add_argument('--skip-binaries', dest='skipbinaries', action='store_true', help='Skip downloading binaries') parser.add_argument('--vscode-version', dest='version', - default='1.69.2', help='VSCode version to search extensions as.') + default=VSCUpdates.latest_version(), help='VSCode version to search extensions as.') parser.add_argument('--total-recommended', type=int, dest='totalrecommended', default=500, help='Total number of recommended extensions to sync from Search API. Defaults to 500') parser.add_argument('--debug', dest='debug', diff --git a/vscoffline/vscsync/requirements.txt b/vscoffline/vscsync/requirements.txt index 2b98212..13ec905 100644 --- a/vscoffline/vscsync/requirements.txt +++ b/vscoffline/vscsync/requirements.txt @@ -1,3 +1,4 @@ -requests -pytimeparse -setuptools +requests +pytimeparse +setuptools +packaging \ No newline at end of file From 37bccaca614c70bcb94adf50f9d44b95796df2df Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 11:45:18 +0200 Subject: [PATCH 08/27] Add garbage collection Removes existing old extension versions that are no longer needed. --- CHANGELOG.md | 2 +- vscoffline/sync.py | 29 ++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3910005..b672bfe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ # Change Log for Visual Studio Code - Offline Gallery and Updater ## ToDo -- [ ] Cleanup old extension versions +- [X] Cleanup old extension versions - [ ] Cleanup old binary versions - [ ] Include existing extensions in update process - [ ] Determine VSCode version automatically diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 49dfd25..eba3aed 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -327,6 +327,25 @@ def __repr__(self): return strs +class VSCExtension: + + @staticmethod + def remove_old(artifactdir_extensions): + for path, directoryNames, fileNames in os.walk(artifactdir_extensions): + path = path.replace(artifactdir_extensions, '') + if path == '': + continue # Skip root directory + if str(path).count(os.path.sep) > 1: + continue # Skip any subdirectory + versions = sorted(directoryNames, key=lambda dir: Version(dir), reverse=True) + versions.remove(versions[0]) + if len(versions) == 0: + continue # Only a single version exists + for version in versions: + # Delete all left over versions + log.debug(f'Remove version {version} of {os.path.basename(path)}') + + class VSCUpdates(object): @staticmethod @@ -697,6 +716,8 @@ def __repr__(self): action='store_true', help='Show debug output') parser.add_argument('--logfile', dest='logfile', default=None, help='Sets a logfile to store loggging output') + parser.add_argument('--garbage-collection', dest='garbageCollection', + action='store_true', help='Remove old versions of artifacts (binaries / extensions)') config = parser.parse_args() if config.debug: @@ -760,7 +781,7 @@ def __repr__(self): config.prerelease, config.version, session) if config.checkbinaries and not config.skipbinaries: - log.info('Syncing VS Code Update Versions') + log.info('Syncing VS Code Update Binaries') versions = VSCUpdates.latest_versions(config.checkinsider) if config.updatebinaries and not config.skipbinaries: @@ -775,6 +796,12 @@ def __repr__(self): versions[idkey].save_state( config.artifactdir_installers) + if config.garbageCollection: + # ToDo Garbage collection for old binaries + # log.info('Removing old VS Code Binaries') + log.info('Removing old VS Code Extensions') + VSCExtension.remove_old(config.artifactdir_extensions) + if config.checkspecified: log.info('Syncing VS Code Specified Extensions') specifiedpath = os.path.join(os.path.abspath( From 3ebe486aa9533237aaa78070f27a30e0e6b346dc Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 11:51:26 +0200 Subject: [PATCH 09/27] Add update for existing extensions Existing extensions can be included in the update process. --- CHANGELOG.md | 2 +- vscoffline/sync.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b672bfe..eeab24a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## ToDo - [X] Cleanup old extension versions - [ ] Cleanup old binary versions -- [ ] Include existing extensions in update process +- [X] Include existing extensions in update process - [ ] Determine VSCode version automatically - [ ] Shorthands for command line arguments diff --git a/vscoffline/sync.py b/vscoffline/sync.py index eba3aed..2cf4244 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +import glob import os import sys import argparse @@ -555,6 +556,15 @@ def backoff_sleep(self): time.sleep(self.backoff) self.backoff *= 2 + def get_existing(self, artifactdir_extensions): + extensions=[] + for extension in glob.glob(os.path.join(artifactdir_extensions, '*', 'latest.json')): + manifest = vsc.Utility.load_json(extension) + result = self.search_by_extension_id(manifest['identity']) + if result: + extensions += result + return extensions + def _query_marketplace(self, filtertype, filtervalue, pageNumber=0, pageSize=500, limit=0, sortOrder=vsc.SortOrder.Default, sortBy=vsc.SortBy.NoneOrRelevance, queryFlags=0): extensions = {} total = 0 @@ -716,6 +726,8 @@ def __repr__(self): action='store_true', help='Show debug output') parser.add_argument('--logfile', dest='logfile', default=None, help='Sets a logfile to store loggging output') + parser.add_argument('--include-existing', dest='existing', + action='store_true', help='Include existing extensions in the update process') parser.add_argument('--garbage-collection', dest='garbageCollection', action='store_true', help='Remove old versions of artifacts (binaries / extensions)') config = parser.parse_args() @@ -752,6 +764,7 @@ def __repr__(self): config.checkspecified = True if not config.frequency: config.frequency = '12h' + config.existing = True if config.syncall: config.extensionsearch = '*' @@ -765,6 +778,9 @@ def __repr__(self): if config.updatebinaries and not config.checkbinaries: config.checkbinaries = True + if config.existing: + config.updateextensions = True + if config.frequency: config.frequency = timeparse(config.frequency) @@ -802,6 +818,13 @@ def __repr__(self): log.info('Removing old VS Code Extensions') VSCExtension.remove_old(config.artifactdir_extensions) + if config.existing and not config.skipExisting: + log.info('Get existing extensions from artifact directory') + existing = mp.get_existing(config.artifactdir_extensions) + if existing: + for item in existing: + extensions[item.identity] = item + if config.checkspecified: log.info('Syncing VS Code Specified Extensions') specifiedpath = os.path.join(os.path.abspath( From 7b50a5f2f3295b7f65c79f9f006d97bbad1362a6 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 11:54:38 +0200 Subject: [PATCH 10/27] Add garbage collection for binaries --- CHANGELOG.md | 2 +- vscoffline/sync.py | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eeab24a..48d7574 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## ToDo - [X] Cleanup old extension versions -- [ ] Cleanup old binary versions +- [X] Cleanup old binary versions - [X] Include existing extensions in update process - [ ] Determine VSCode version automatically - [ ] Shorthands for command line arguments diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 2cf4244..bb73dba 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -393,6 +393,25 @@ def signal_updated(artifactdir): with open(signalpath, 'w') as outfile: json.dump(result, outfile, cls=vsc.MagicJsonEncoder, indent=4) + @staticmethod + def remove_old(artifactdir_installers): + for path, directoryNames, fileNames in os.walk(artifactdir_installers): + path = path.replace(artifactdir_installers, '') + if path == '': + continue # Skip root directory + if str(path).count(os.path.sep) == 1: + continue # Skip version directory + if str(path).count(os.path.sep) > 2: + continue # Skip any subdirectory + filtered = filter(lambda file: not file.endswith('.json'), fileNames) + versions = sorted(filtered, key=lambda file: Version(re.findall('\d+\.\d+\.\d+', file)[0]), reverse=True) + versions.remove(versions[0]) + if len(versions) == 0: + continue # Only a single version exists + for version in versions: + # Delete all left over versions + log.debug(f'Remove version {version} of {path[path.index(os.path.sep)]}') + class VSCMarketplace(object): @@ -813,8 +832,8 @@ def __repr__(self): config.artifactdir_installers) if config.garbageCollection: - # ToDo Garbage collection for old binaries - # log.info('Removing old VS Code Binaries') + log.info('Removing old VS Code Binaries') + VSCUpdates.remove_old(config.artifactdir_installers) log.info('Removing old VS Code Extensions') VSCExtension.remove_old(config.artifactdir_extensions) From 134d959405db204668422f2a43ac7c5a5475ec40 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 11:56:59 +0200 Subject: [PATCH 11/27] Add option to skip update of existing extensions --- vscoffline/sync.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index bb73dba..12bc1a8 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -747,6 +747,8 @@ def __repr__(self): help='Sets a logfile to store loggging output') parser.add_argument('--include-existing', dest='existing', action='store_true', help='Include existing extensions in the update process') + parser.add_argument('--skip-existing', dest='skipExisting', + action='store_true', help='Skip inclusion of existing extensions in the update process') parser.add_argument('--garbage-collection', dest='garbageCollection', action='store_true', help='Remove old versions of artifacts (binaries / extensions)') config = parser.parse_args() From 1c476bd977a3b336bf8b28b659906be4f704ed5e Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 12:00:37 +0200 Subject: [PATCH 12/27] Remove default frequency for sync Changes default execution to a "one-off task" --- vscoffline/sync.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 12bc1a8..31e5c54 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -783,8 +783,6 @@ def __repr__(self): config.updateextensions = True config.updatemalicious = True config.checkspecified = True - if not config.frequency: - config.frequency = '12h' config.existing = True if config.syncall: From b96421f5506a6fb4c20dd7a66be759c63fc76e2d Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 12:06:23 +0200 Subject: [PATCH 13/27] Fix for Microsoft api change #81 This does not include an updated certificate! The certificate still is valid for the old domain. --- README.md | 4 +- vscoffline/vsc.py | 388 +++++++++++++++++++++++----------------------- 2 files changed, 196 insertions(+), 196 deletions(-) diff --git a/README.md b/README.md index f850e36..e1e5a83 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ On the non-Internet connected system: 1. On the non-Internet connected system, ensure the following DNS addresses are pointed toward the vscgallery service. * update.code.visualstudio.com - * az764295.vo.msecnd.net + * main.vscode-cdn.net * marketplace.visualstudio.com This may be achieved using a corporate DNS server, or by modifying a client's host file. @@ -90,7 +90,7 @@ This guide will setup the vscsync and vscgallery service on the same Docker host 2. Point the DNS addresses to the vscgallery service. * update.code.visualstudio.com - * az764295.vo.msecnd.net + * main.vscode-cdn.net * marketplace.visualstudio.com This may be achieved using a corporate DNS server, or by modifying a client's host file. diff --git a/vscoffline/vsc.py b/vscoffline/vsc.py index 61cb8a6..050499d 100644 --- a/vscoffline/vsc.py +++ b/vscoffline/vsc.py @@ -1,194 +1,194 @@ -import datetime -import hashlib -import json -import os -import pathlib -from enum import IntFlag -from typing import Any, Dict, List, Union -import logging as log - -PLATFORMS = ["win32", "linux", "linux-deb", "linux-rpm", "darwin", "linux-snap", "server-linux", "server-linux-legacy", "cli-alpine"] -ARCHITECTURES = ["", "x64"] -BUILDTYPES = ["", "archive", "user"] -QUALITIES = ["stable", "insider"] - -URL_BINUPDATES = r"https://update.code.visualstudio.com/api/update/" -URL_RECOMMENDATIONS = r"https://az764295.vo.msecnd.net/extensions/workspaceRecommendations.json.gz" -URL_MARKETPLACEQUERY = r"https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery" -URL_MALICIOUS = r"https://az764295.vo.msecnd.net/extensions/marketplace.json" - -URLROOT = "https://update.code.visualstudio.com" -ARTIFACTS = "/artifacts/" -ARTIFACTS_INSTALLERS = "/artifacts/installers" -ARTIFACTS_EXTENSIONS = "/artifacts/extensions" -ARTIFACT_RECOMMENDATION = "/artifacts/recommendations.json" -ARTIFACT_MALICIOUS = "/artifacts/malicious.json" - -TIMEOUT = 12 - - -class QueryFlags(IntFlag): - __no_flags_name__ = "NoneDefined" - NoneDefined = 0x0 - IncludeVersions = 0x1 - IncludeFiles = 0x2 - IncludeCategoryAndTags = 0x4 - IncludeSharedAccounts = 0x8 - IncludeVersionProperties = 0x10 - ExcludeNonValidated = 0x20 - IncludeInstallationTargets = 0x40 - IncludeAssetUri = 0x80 - IncludeStatistics = 0x100 - IncludeLatestVersionOnly = 0x200 - Unpublished = 0x1000 - - -class FilterType(IntFlag): - __no_flags_name__ = "Target" - Tag = 1 - ExtensionId = 4 - Category = 5 - ExtensionName = 7 - Target = 8 - Featured = 9 - SearchText = 10 - ExcludeWithFlags = 12 - UndefinedType = 14 - - -class SortBy(IntFlag): - __no_flags_name__ = "NoneOrRelevance" - NoneOrRelevance = 0 - LastUpdatedDate = 1 - Title = 2 - PublisherName = 3 - InstallCount = 4 - PublishedDate = 5 - AverageRating = 6 - WeightedRating = 12 - - -class SortOrder(IntFlag): - __no_flags_name__ = "Default" - Default = 0 - Ascending = 1 - Descending = 2 - - -class MagicJsonEncoder(json.JSONEncoder): - def default(self, o: Any) -> Union[str, Dict[str, Any]]: - try: - return super().default(o) - except TypeError as err: - # could be datetime - if isinstance(o, datetime.datetime): - return o.isoformat() - # could also be cls with slots - try: - return {key: getattr(o, key, None) for key in o.__slots__} - except AttributeError: - pass - # finally, should have a dict if it is a dataclass or another cls - try: - return o.__dict__ - except AttributeError: - raise TypeError( - "Can't encode object. Tried isoformat of datetime, class slots and class dict" - ) from err - - -class Utility: - """ - Utility tool - """ - - @staticmethod - def hash_file_and_check(filepath: Union[str, pathlib.Path], expectedchecksum: str) -> bool: - """ - Hashes a file and checks for the expected checksum. - Checksum is sha256 default implementation. - """ - h = hashlib.sha256() - with open(filepath, "rb") as f: - for chunk in iter(lambda: f.read(4096), b""): - h.update(chunk) - return expectedchecksum == h.hexdigest() - - @staticmethod - def load_json(filepath: Union[str, pathlib.Path]) -> Union[List[Any], Dict[str, Any]]: - if isinstance(filepath, str): - filepath: pathlib.Path = pathlib.Path(filepath) - - result = [] - if not filepath.exists(): - log.debug(f"Unable to load json from {filepath.absolute()}. Does not exist.") - return result - elif filepath.is_dir(): - log.debug(f"Cannot load json at path {filepath.absolute()}. It is a directory") - return result - - with open(filepath, "r", encoding="utf-8-sig") as fp: - try: - result = json.load(fp) - if not result: - return [] - except json.decoder.JSONDecodeError as err: - log.debug(f"JSONDecodeError while processing {filepath.absolute()} \n error: {str(err)}") - return [] - except UnicodeDecodeError as err: - log.debug(f"UnicodeDecodeError while processing {filepath.absolute()} \n error: {str(err)}") - return [] - return result - - @staticmethod - def write_json(filepath: Union[str, pathlib.Path], content: Dict[str, Any]) -> None: - with open(filepath, "w") as outfile: - json.dump(content, outfile, cls=MagicJsonEncoder, indent=4) - - @staticmethod - def first_file(filepath: Union[str, pathlib.Path], pattern: str, reverse: bool = False) -> Union[str, bool]: - if isinstance(filepath, str): - filepath = pathlib.Path(filepath) - results = [*filepath.glob(pattern)] - if not results: - return False - elif len(results) >= 1 and reverse: - results.sort(reverse=True) - return str(results[0].absolute()) - - @staticmethod - def folders_in_folder(filepath: str) -> List[str]: - listing = [f for f in os.listdir(filepath) if os.path.isdir(os.path.join(filepath, f))] - listing.sort() - return listing - - @staticmethod - def files_in_folder(filepath: str) -> List[str]: - listing = [f for f in os.listdir(filepath) if os.path.isfile(os.path.join(filepath, f))] - listing.sort() - return listing - - - @staticmethod - def seconds_to_human_time(seconds: int) -> str: - return str(datetime.timedelta(seconds=seconds)) - - @staticmethod - def from_json_datetime(jsondate: str) -> datetime.datetime: - return datetime.datetime.strptime(jsondate, "%Y-%m-%dT%H:%M:%S.%fZ") - - @staticmethod - def validate_platform(platform: str) -> bool: - return platform in PLATFORMS - - @staticmethod - def validate_architecture(arch: str) -> bool: - return arch in ARCHITECTURES - - @staticmethod - def validate_buildtype(buildtype: str) -> bool: - return buildtype in BUILDTYPES - - @staticmethod - def validate_quality(quality: str) -> bool: - return quality in QUALITIES +import datetime +import hashlib +import json +import os +import pathlib +from enum import IntFlag +from typing import Any, Dict, List, Union +import logging as log + +PLATFORMS = ["win32", "linux", "linux-deb", "linux-rpm", "darwin", "linux-snap", "server-linux", "server-linux-legacy", "cli-alpine"] +ARCHITECTURES = ["", "x64"] +BUILDTYPES = ["", "archive", "user"] +QUALITIES = ["stable", "insider"] + +URL_BINUPDATES = r"https://update.code.visualstudio.com/api/update/" +URL_RECOMMENDATIONS = r"https://main.vscode-cdn.net/extensions/marketplace.json" +URL_MARKETPLACEQUERY = r"https://marketplace.visualstudio.com/_apis/public/gallery/extensionquery" +URL_MALICIOUS = r"https://main.vscode-cdn.net/extensions/marketplace.json" + +URLROOT = "https://update.code.visualstudio.com" +ARTIFACTS = "/artifacts/" +ARTIFACTS_INSTALLERS = "/artifacts/installers" +ARTIFACTS_EXTENSIONS = "/artifacts/extensions" +ARTIFACT_RECOMMENDATION = "/artifacts/recommendations.json" +ARTIFACT_MALICIOUS = "/artifacts/malicious.json" + +TIMEOUT = 12 + + +class QueryFlags(IntFlag): + __no_flags_name__ = "NoneDefined" + NoneDefined = 0x0 + IncludeVersions = 0x1 + IncludeFiles = 0x2 + IncludeCategoryAndTags = 0x4 + IncludeSharedAccounts = 0x8 + IncludeVersionProperties = 0x10 + ExcludeNonValidated = 0x20 + IncludeInstallationTargets = 0x40 + IncludeAssetUri = 0x80 + IncludeStatistics = 0x100 + IncludeLatestVersionOnly = 0x200 + Unpublished = 0x1000 + + +class FilterType(IntFlag): + __no_flags_name__ = "Target" + Tag = 1 + ExtensionId = 4 + Category = 5 + ExtensionName = 7 + Target = 8 + Featured = 9 + SearchText = 10 + ExcludeWithFlags = 12 + UndefinedType = 14 + + +class SortBy(IntFlag): + __no_flags_name__ = "NoneOrRelevance" + NoneOrRelevance = 0 + LastUpdatedDate = 1 + Title = 2 + PublisherName = 3 + InstallCount = 4 + PublishedDate = 5 + AverageRating = 6 + WeightedRating = 12 + + +class SortOrder(IntFlag): + __no_flags_name__ = "Default" + Default = 0 + Ascending = 1 + Descending = 2 + + +class MagicJsonEncoder(json.JSONEncoder): + def default(self, o: Any) -> Union[str, Dict[str, Any]]: + try: + return super().default(o) + except TypeError as err: + # could be datetime + if isinstance(o, datetime.datetime): + return o.isoformat() + # could also be cls with slots + try: + return {key: getattr(o, key, None) for key in o.__slots__} + except AttributeError: + pass + # finally, should have a dict if it is a dataclass or another cls + try: + return o.__dict__ + except AttributeError: + raise TypeError( + "Can't encode object. Tried isoformat of datetime, class slots and class dict" + ) from err + + +class Utility: + """ + Utility tool + """ + + @staticmethod + def hash_file_and_check(filepath: Union[str, pathlib.Path], expectedchecksum: str) -> bool: + """ + Hashes a file and checks for the expected checksum. + Checksum is sha256 default implementation. + """ + h = hashlib.sha256() + with open(filepath, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + h.update(chunk) + return expectedchecksum == h.hexdigest() + + @staticmethod + def load_json(filepath: Union[str, pathlib.Path]) -> Union[List[Any], Dict[str, Any]]: + if isinstance(filepath, str): + filepath: pathlib.Path = pathlib.Path(filepath) + + result = [] + if not filepath.exists(): + log.debug(f"Unable to load json from {filepath.absolute()}. Does not exist.") + return result + elif filepath.is_dir(): + log.debug(f"Cannot load json at path {filepath.absolute()}. It is a directory") + return result + + with open(filepath, "r", encoding="utf-8-sig") as fp: + try: + result = json.load(fp) + if not result: + return [] + except json.decoder.JSONDecodeError as err: + log.debug(f"JSONDecodeError while processing {filepath.absolute()} \n error: {str(err)}") + return [] + except UnicodeDecodeError as err: + log.debug(f"UnicodeDecodeError while processing {filepath.absolute()} \n error: {str(err)}") + return [] + return result + + @staticmethod + def write_json(filepath: Union[str, pathlib.Path], content: Dict[str, Any]) -> None: + with open(filepath, "w") as outfile: + json.dump(content, outfile, cls=MagicJsonEncoder, indent=4) + + @staticmethod + def first_file(filepath: Union[str, pathlib.Path], pattern: str, reverse: bool = False) -> Union[str, bool]: + if isinstance(filepath, str): + filepath = pathlib.Path(filepath) + results = [*filepath.glob(pattern)] + if not results: + return False + elif len(results) >= 1 and reverse: + results.sort(reverse=True) + return str(results[0].absolute()) + + @staticmethod + def folders_in_folder(filepath: str) -> List[str]: + listing = [f for f in os.listdir(filepath) if os.path.isdir(os.path.join(filepath, f))] + listing.sort() + return listing + + @staticmethod + def files_in_folder(filepath: str) -> List[str]: + listing = [f for f in os.listdir(filepath) if os.path.isfile(os.path.join(filepath, f))] + listing.sort() + return listing + + + @staticmethod + def seconds_to_human_time(seconds: int) -> str: + return str(datetime.timedelta(seconds=seconds)) + + @staticmethod + def from_json_datetime(jsondate: str) -> datetime.datetime: + return datetime.datetime.strptime(jsondate, "%Y-%m-%dT%H:%M:%S.%fZ") + + @staticmethod + def validate_platform(platform: str) -> bool: + return platform in PLATFORMS + + @staticmethod + def validate_architecture(arch: str) -> bool: + return arch in ARCHITECTURES + + @staticmethod + def validate_buildtype(buildtype: str) -> bool: + return buildtype in BUILDTYPES + + @staticmethod + def validate_quality(quality: str) -> bool: + return quality in QUALITIES From a3bb37f8311bc8015e91475a8e11dae4ea166e43 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 13:24:55 +0200 Subject: [PATCH 14/27] Fix platforms win32 to win32-x64 #81 Thanks @AndreasAhlbeck https://github.com/LOLINTERNETZ/vscodeoffline/issues/81#issuecomment-2838539698 --- vscoffline/sync.py | 2 +- vscoffline/vsc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index aab19ee..c77f346 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -358,7 +358,7 @@ def latest_versions(insider=False): for quality in vsc.QUALITIES: if quality == 'insider' and not insider: continue - if platform == 'win32' and architecture == 'ia32': + if platform == 'win32-x64' and architecture == 'ia32': continue if platform == 'darwin' and (architecture != '' or buildtype != ''): continue diff --git a/vscoffline/vsc.py b/vscoffline/vsc.py index ab4f0d1..65b97c1 100644 --- a/vscoffline/vsc.py +++ b/vscoffline/vsc.py @@ -7,7 +7,7 @@ from typing import Any, Dict, List, Union import logging as log -PLATFORMS = ["win32", "linux", "linux-deb", "linux-rpm", "darwin", "darwin-arm64", "darwin-universal", "linux-snap", "server-linux", "server-linux-legacy", "cli-alpine"] +PLATFORMS = ["win32-x64", "linux", "linux-deb", "linux-rpm", "darwin", "darwin-arm64", "darwin-universal", "linux-snap", "server-linux", "server-linux-legacy", "cli-alpine"] ARCHITECTURES = ["", "x64", "arm64", "armhf", "alpine"] BUILDTYPES = ["", "archive", "user", "web"] QUALITIES = ["stable", "insider"] From e04b456b79a911836e5c89cd53d345291d32c051 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 13:28:06 +0200 Subject: [PATCH 15/27] Fix retrieval of existing extensions --- vscoffline/sync.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index c77f346..e557745 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -579,9 +579,9 @@ def get_existing(self, artifactdir_extensions): extensions=[] for extension in glob.glob(os.path.join(artifactdir_extensions, '*', 'latest.json')): manifest = vsc.Utility.load_json(extension) - result = self.search_by_extension_id(manifest['identity']) + result = self.search_by_extension_id(manifest['extensionId']) if result: - extensions += result + extensions.append(result) return extensions def _query_marketplace(self, filtertype, filtervalue, pageNumber=0, pageSize=500, limit=0, sortOrder=vsc.SortOrder.Default, sortBy=vsc.SortBy.NoneOrRelevance, queryFlags=0): From 724318078f8aa6d7a9f57803a306040f368cea0c Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 13:29:02 +0200 Subject: [PATCH 16/27] Fix retrieval of malicious extensions --- vscoffline/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index e557745..da87955 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -487,7 +487,7 @@ def get_malicious(self, extensions=None): 'utf-8', 'ignore').replace(u'\xa0', u'') jresult = json.loads(stripped) - for extension in extensions[:]: # Iterate over a copy of the extension collection + for extension in (extensions.copy()): if extension in jresult['malicious']: log.warning( f'Preventing malicious extension {extension} from being downloaded') From fb3c3227e94cbc67dde4faa3b0b145a6457d5626 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 13:46:04 +0200 Subject: [PATCH 17/27] Add shorthand for most arguments Most arguments / parameters now have a shorthand form. Eg. --sync can also be called with -s. This should simplify / shorten the command line in automations. --- CHANGELOG.md | 2 +- vscoffline/sync.py | 37 ++++++++++++++++++++----------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 48d7574..2ddcc82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ - [X] Cleanup old binary versions - [X] Include existing extensions in update process - [ ] Determine VSCode version automatically -- [ ] Shorthands for command line arguments +- [X] Shorthands for command line arguments ## `1.0.24` - 2023-06-05 ### Fixed diff --git a/vscoffline/sync.py b/vscoffline/sync.py index da87955..de9cac1 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -705,51 +705,54 @@ def __repr__(self): if __name__ == '__main__': parser = argparse.ArgumentParser( description='Synchronises VSCode in an Offline Environment') - parser.add_argument('--sync', dest='sync', action='store_true', + parser.add_argument('--sync', '-s', dest='sync', action='store_true', help='The basic-user sync. It includes stable binaries and typical extensions') - parser.add_argument('--syncall', dest='syncall', action='store_true', + parser.add_argument('--syncall', '-a', dest='syncall', action='store_true', help='The power-user sync. It includes all binaries and extensions ') - parser.add_argument('--artifacts', dest='artifactdir', + parser.add_argument('--artifacts', '-d', dest='artifactdir', default='../artifacts/', help='Path to downloaded artifacts') - parser.add_argument('--frequency', dest='frequency', default=None, + parser.add_argument('--frequency', '-f', dest='frequency', default=None, help='The frequency to try and update (e.g. sleep for \'12h\' and try again)') # Arguments to tweak behaviour + # ToDo Implement action=argparse.BooleanOptionalAction to combine --check-binaries and --skip-binaries into a single argument parser.add_argument('--check-binaries', dest='checkbinaries', action='store_true', help='Check for updated binaries') - parser.add_argument('--check-insider', dest='checkinsider', + parser.add_argument('--check-insider', '-i', dest='checkinsider', action='store_true', help='Check for updated insider binaries') parser.add_argument('--check-recommended-extensions', dest='checkextensions', action='store_true', help='Check for recommended extensions') - parser.add_argument('--check-specified-extensions', dest='checkspecified', + parser.add_argument('--check-specified-extensions', '-w', dest='checkspecified', action='store_true', help='Check for extensions in /specified.json') - parser.add_argument('--extension-name', dest='extensionname', + # ToDo Allow for list of names (action='extend' nargs='+') + parser.add_argument('--extension-name', '-n', dest='extensionname', help='Find a specific extension by name') + # ToDo Allow for list of names (action='extend' nargs='+') parser.add_argument('--extension-search', dest='extensionsearch', help='Search for a set of extensions') - parser.add_argument('--prerelease-extensions', dest='prerelease', + parser.add_argument('--prerelease-extensions', '-p', dest='prerelease', action='store_true', help='Download prerelease extensions. Defaults to false.') - parser.add_argument('--update-binaries', dest='updatebinaries', + parser.add_argument('--update-binaries', '-b', dest='updatebinaries', action='store_true', help='Download binaries') - parser.add_argument('--update-extensions', dest='updateextensions', + parser.add_argument('--update-extensions', '-u', dest='updateextensions', action='store_true', help='Download extensions') - parser.add_argument('--update-malicious-extensions', dest='updatemalicious', + parser.add_argument('--update-malicious-extensions', '-m', dest='updatemalicious', action='store_true', help='Update the malicious extension list') - parser.add_argument('--skip-binaries', dest='skipbinaries', + parser.add_argument('--skip-binaries', '-B', dest='skipbinaries', action='store_true', help='Skip downloading binaries') - parser.add_argument('--vscode-version', dest='version', + parser.add_argument('--vscode-version', '-v', dest='version', default=VSCUpdates.latest_version(), help='VSCode version to search extensions as.') parser.add_argument('--total-recommended', type=int, dest='totalrecommended', default=500, help='Total number of recommended extensions to sync from Search API. Defaults to 500') parser.add_argument('--debug', dest='debug', action='store_true', help='Show debug output') - parser.add_argument('--logfile', dest='logfile', default=None, + parser.add_argument('--logfile', '-l', dest='logfile', default=None, help='Sets a logfile to store loggging output') - parser.add_argument('--include-existing', dest='existing', + parser.add_argument('--include-existing', '-e', dest='existing', action='store_true', help='Include existing extensions in the update process') - parser.add_argument('--skip-existing', dest='skipExisting', + parser.add_argument('--skip-existing', '-E', dest='skipExisting', action='store_true', help='Skip inclusion of existing extensions in the update process') - parser.add_argument('--garbage-collection', dest='garbageCollection', + parser.add_argument('--garbage-collection', '-g', dest='garbageCollection', action='store_true', help='Remove old versions of artifacts (binaries / extensions)') config = parser.parse_args() From 8db4dd61e814a594de5df3280b6240781aff4d07 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 13:49:43 +0200 Subject: [PATCH 18/27] Change formatting of arguments to make them more readable --- vscoffline/sync.py | 153 ++++++++++++++++++++++++++++++++------------- 1 file changed, 109 insertions(+), 44 deletions(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index de9cac1..6944d98 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -705,55 +705,120 @@ def __repr__(self): if __name__ == '__main__': parser = argparse.ArgumentParser( description='Synchronises VSCode in an Offline Environment') - parser.add_argument('--sync', '-s', dest='sync', action='store_true', - help='The basic-user sync. It includes stable binaries and typical extensions') - parser.add_argument('--syncall', '-a', dest='syncall', action='store_true', - help='The power-user sync. It includes all binaries and extensions ') - parser.add_argument('--artifacts', '-d', dest='artifactdir', - default='../artifacts/', help='Path to downloaded artifacts') - parser.add_argument('--frequency', '-f', dest='frequency', default=None, - help='The frequency to try and update (e.g. sleep for \'12h\' and try again)') + parser.add_argument('--sync', '-s', + dest='sync', + action='store_true', + help='The basic-user sync. It includes stable binaries and typical extensions' + ) + parser.add_argument('--syncall', '-a', + dest='syncall', + action='store_true', + help='The power-user sync. It includes all binaries and extensions' + ) + parser.add_argument('--artifacts', '-d', + dest='artifactdir', + default='../artifacts/', + help='Path to downloaded artifacts' + ) + parser.add_argument('--frequency', '-f', + dest='frequency', + default=None, + help='The frequency to try and update (e.g. sleep for \'12h\' and try again)' + ) # Arguments to tweak behaviour # ToDo Implement action=argparse.BooleanOptionalAction to combine --check-binaries and --skip-binaries into a single argument - parser.add_argument('--check-binaries', dest='checkbinaries', - action='store_true', help='Check for updated binaries') - parser.add_argument('--check-insider', '-i', dest='checkinsider', - action='store_true', help='Check for updated insider binaries') - parser.add_argument('--check-recommended-extensions', dest='checkextensions', - action='store_true', help='Check for recommended extensions') - parser.add_argument('--check-specified-extensions', '-w', dest='checkspecified', - action='store_true', help='Check for extensions in /specified.json') + parser.add_argument('--check-binaries', + dest='checkbinaries', + action='store_true', + help='Check for updated binaries' + ) + parser.add_argument('--check-insider', '-i', + dest='checkinsider', + action='store_true', + help='Check for updated insider binaries' + ) + parser.add_argument('--check-recommended-extensions', + dest='checkextensions', + action='store_true', + help='Check for recommended extensions' + ) + parser.add_argument('--check-specified-extensions', '-w', + dest='checkspecified', + action='store_true', + help='Check for extensions in /specified.json' + ) # ToDo Allow for list of names (action='extend' nargs='+') - parser.add_argument('--extension-name', '-n', dest='extensionname', - help='Find a specific extension by name') + parser.add_argument('--extension-name', '-n', + dest='extensionname', + help='Find a specific extension by name' + ) # ToDo Allow for list of names (action='extend' nargs='+') - parser.add_argument('--extension-search', dest='extensionsearch', - help='Search for a set of extensions') - parser.add_argument('--prerelease-extensions', '-p', dest='prerelease', - action='store_true', help='Download prerelease extensions. Defaults to false.') - parser.add_argument('--update-binaries', '-b', dest='updatebinaries', - action='store_true', help='Download binaries') - parser.add_argument('--update-extensions', '-u', dest='updateextensions', - action='store_true', help='Download extensions') - parser.add_argument('--update-malicious-extensions', '-m', dest='updatemalicious', - action='store_true', help='Update the malicious extension list') - parser.add_argument('--skip-binaries', '-B', dest='skipbinaries', - action='store_true', help='Skip downloading binaries') - parser.add_argument('--vscode-version', '-v', dest='version', - default=VSCUpdates.latest_version(), help='VSCode version to search extensions as.') - parser.add_argument('--total-recommended', type=int, dest='totalrecommended', default=500, - help='Total number of recommended extensions to sync from Search API. Defaults to 500') - parser.add_argument('--debug', dest='debug', - action='store_true', help='Show debug output') - parser.add_argument('--logfile', '-l', dest='logfile', default=None, - help='Sets a logfile to store loggging output') - parser.add_argument('--include-existing', '-e', dest='existing', - action='store_true', help='Include existing extensions in the update process') - parser.add_argument('--skip-existing', '-E', dest='skipExisting', - action='store_true', help='Skip inclusion of existing extensions in the update process') - parser.add_argument('--garbage-collection', '-g', dest='garbageCollection', - action='store_true', help='Remove old versions of artifacts (binaries / extensions)') + parser.add_argument('--extension-search', + dest='extensionsearch', + help='Search for a set of extensions' + ) + parser.add_argument('--prerelease-extensions', '-p', + dest='prerelease', + action='store_true', + help='Download prerelease extensions. Defaults to false.' + ) + parser.add_argument('--update-binaries', '-b', + dest='updatebinaries', + action='store_true', + help='Download binaries' + ) + parser.add_argument('--update-extensions', '-u', + dest='updateextensions', + action='store_true', + help='Download extensions' + ) + parser.add_argument('--update-malicious-extensions', '-m', + dest='updatemalicious', + action='store_true', + help='Update the malicious extension list' + ) + parser.add_argument('--skip-binaries', '-B', + dest='skipbinaries', + action='store_true', + help='Skip downloading binaries' + ) + parser.add_argument('--vscode-version', '-v', + dest='version', + default=VSCUpdates.latest_version(), + help='VSCode version to search extensions as.' + ) + parser.add_argument('--total-recommended', + type=int, + dest='totalrecommended', + default=500, + help='Total number of recommended extensions to sync from Search API. Defaults to 500' + ) + parser.add_argument('--debug', + dest='debug', + action='store_true', + help='Show debug output' + ) + parser.add_argument('--logfile', '-l', + dest='logfile', + default=None, + help='Sets a logfile to store loggging output' + ) + parser.add_argument('--include-existing', '-e', + dest='existing', + action='store_true', + help='Include existing extensions in the update process' + ) + parser.add_argument('--skip-existing', '-E', + dest='skipExisting', + action='store_true', + help='Skip inclusion of existing extensions in the update process' + ) + parser.add_argument('--garbage-collection', '-g', + dest='garbageCollection', + action='store_true', + help='Remove old versions of artifacts (binaries / extensions)' + ) config = parser.parse_args() if config.debug: From 72e1777e3ee6e0c935a601e143635aa4f3137727 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 14:33:38 +0200 Subject: [PATCH 19/27] Update documentation --- CHANGELOG.md | 7 ------ README.md | 60 +++++++++++++++++++++++++--------------------------- 2 files changed, 29 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ddcc82..14b0343 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,5 @@ # Change Log for Visual Studio Code - Offline Gallery and Updater -## ToDo -- [X] Cleanup old extension versions -- [X] Cleanup old binary versions -- [X] Include existing extensions in update process -- [ ] Determine VSCode version automatically -- [X] Shorthands for command line arguments - ## `1.0.24` - 2023-06-05 ### Fixed - Improvements to requests session handling to prevent ConnectionErrors due to repeated connections. Thanks @tomer953 for reporting. diff --git a/README.md b/README.md index e1e5a83..b7ce771 100644 --- a/README.md +++ b/README.md @@ -116,50 +116,48 @@ These arguments can be passed as command line arguments to sync.py (e.g. --varA ### Possible Args: ``` -usage: sync.py [-h] [--sync] [--syncall] [--artifacts ARTIFACTDIR] - [--frequency FREQUENCY] [--check-binaries] [--check-insider] - [--check-recommended-extensions] [--check-specified-extensions] - [--extension-name EXTENSIONNAME] - [--extension-search EXTENSIONSEARCH] [--update-binaries] - [--update-extensions] [--update-malicious-extensions] - [--prerelease-extensions] [--vscode-version VSCODEVERSION] - [--skip-binaries] [--debug] [--logfile LOGFILE] +usage: sync.py [-h] [--sync] [--syncall] [--artifacts ARTIFACTDIR] [--frequency FREQUENCY] [--check-binaries] [--check-insider] [--check-recommended-extensions] [--check-specified-extensions] [--extension-name EXTENSIONNAME] [--extension-search EXTENSIONSEARCH] [--prerelease-extensions] + [--update-binaries] [--update-extensions] [--update-malicious-extensions] [--skip-binaries] [--vscode-version VERSION] [--total-recommended TOTALRECOMMENDED] [--debug] [--logfile LOGFILE] [--include-existing] [--skip-existing] [--garbage-collection] Synchronises VSCode in an Offline Environment -optional arguments: +options: -h, --help show this help message and exit - --sync The basic-user sync. It includes stable binaries and - typical extensions - --syncall The power-user sync. It includes all binaries and - extensions - --artifacts ARTIFACTDIR + --sync, -s The basic-user sync. It includes stable binaries and typical extensions + --syncall, -a The power-user sync. It includes all binaries and extensions + --artifacts ARTIFACTDIR, -d ARTIFACTDIR Path to downloaded artifacts - --frequency FREQUENCY - The frequency to try and update (e.g. sleep for '12h' - and try again - --total-recommended N - The number of recommended extensions to fetch - (default: 200) + --frequency FREQUENCY, -f FREQUENCY + The frequency to try and update (e.g. sleep for '12h' and try again) --check-binaries Check for updated binaries - --check-insider Check for updated insider binaries + --check-insider, -i Check for updated insider binaries --check-recommended-extensions Check for recommended extensions - --check-specified-extensions + --check-specified-extensions, -w Check for extensions in /specified.json - --extension-name EXTENSIONNAME + --extension-name EXTENSIONNAME, -n EXTENSIONNAME Find a specific extension by name --extension-search EXTENSIONSEARCH Search for a set of extensions - --update-binaries Download binaries - --update-extensions Download extensions - --update-malicious-extensions - Update the malicious extension list - --prerelease-extensions + --prerelease-extensions, -p Download prerelease extensions. Defaults to false. - --vscode-version + --update-binaries, -b + Download binaries + --update-extensions, -u + Download extensions + --update-malicious-extensions, -m + Update the malicious extension list + --skip-binaries, -B Skip downloading binaries + --vscode-version VERSION, -v VERSION VSCode version to search extensions as. - --skip-binaries Skip downloading binaries + --total-recommended TOTALRECOMMENDED + Total number of recommended extensions to sync from Search API. Defaults to 500 --debug Show debug output - --logfile LOGFILE Sets a logfile to store loggging output + --logfile LOGFILE, -l LOGFILE + Sets a logfile to store loggging output + --include-existing, -e + Include existing extensions in the update process + --skip-existing, -E Skip inclusion of existing extensions in the update process + --garbage-collection, -g + Remove old versions of artifacts (binaries / extensions) ``` From 5d7f458eedd537cfefb5c6b701d43e14c9100228 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 15:13:48 +0200 Subject: [PATCH 20/27] Add argument to skip recommended extensions Only extensions that are beaing searched for, that are specified (on the allowlist) or that already exist, would be downloaded. --- vscoffline/sync.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 6944d98..856aabd 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -738,8 +738,8 @@ def __repr__(self): action='store_true', help='Check for updated insider binaries' ) - parser.add_argument('--check-recommended-extensions', - dest='checkextensions', + parser.add_argument('--check-recommended-extensions', '-r', + dest='checkrecommended', action='store_true', help='Check for recommended extensions' ) @@ -814,6 +814,11 @@ def __repr__(self): action='store_true', help='Skip inclusion of existing extensions in the update process' ) + parser.add_argument('--skip-recommended', '-R', + dest='skipRecommended', + action='store_true', + help='Skip inclusion of existing extensions in the update process' + ) parser.add_argument('--garbage-collection', '-g', dest='garbageCollection', action='store_true', @@ -846,7 +851,7 @@ def __repr__(self): if config.sync or config.syncall: config.checkbinaries = True - config.checkextensions = True + config.checkrecommended = True config.updatebinaries = True config.updateextensions = True config.updatemalicious = True @@ -938,7 +943,7 @@ def __repr__(self): if result: extensions[result.identity] = result - if config.checkextensions: + if config.checkrecommended and not config.skipRecommended: log.info('Syncing VS Code Recommended Extensions') recommended = mp.get_recommendations(os.path.abspath( config.artifactdir), config.totalrecommended) @@ -973,7 +978,7 @@ def __repr__(self): bonusextension.save_state(config.artifactdir_extensions) # Check if we did anything - if config.checkbinaries or config.checkextensions or config.updatebinaries or config.updateextensions or config.updatemalicious or config.checkspecified or config.checkinsider: + if config.checkbinaries or config.checkrecommended or config.updatebinaries or config.updateextensions or config.updatemalicious or config.checkspecified or config.checkinsider: log.info('Complete') VSCUpdates.signal_updated(os.path.abspath(config.artifactdir)) From 48a932035529fefd1f640157fa50d1a8c963a6a4 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 15:35:11 +0200 Subject: [PATCH 21/27] Fix missing import --- vscoffline/sync.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 6944d98..51aee8d 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -2,6 +2,7 @@ import glob import os import sys +import re import argparse import requests import pathlib From 6212bf33c08f8ccc239a67faf52fae92168cef17 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 15:36:29 +0200 Subject: [PATCH 22/27] Fix unhandled exception --- vscoffline/sync.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 51aee8d..db4ec23 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -406,6 +406,10 @@ def remove_old(artifactdir_installers): continue # Skip any subdirectory filtered = filter(lambda file: not file.endswith('.json'), fileNames) versions = sorted(filtered, key=lambda file: Version(re.findall('\d+\.\d+\.\d+', file)[0]), reverse=True) + if not versions: + # Versions could not be determined + log.debug(f'Versions of {path[path.index(os.path.sep)]} could not be determined') + continue versions.remove(versions[0]) if len(versions) == 0: continue # Only a single version exists From 722db11ef47351632b01744de0be77f8a992eb8d Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 15:54:36 +0200 Subject: [PATCH 23/27] Fix RegEx pattern as raw string Fixes issue with Python 3 interpreting \d as Unicode character --- vscoffline/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index db4ec23..99581a6 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -405,7 +405,7 @@ def remove_old(artifactdir_installers): if str(path).count(os.path.sep) > 2: continue # Skip any subdirectory filtered = filter(lambda file: not file.endswith('.json'), fileNames) - versions = sorted(filtered, key=lambda file: Version(re.findall('\d+\.\d+\.\d+', file)[0]), reverse=True) + versions = sorted(filtered, key=lambda file: Version(re.findall(r'\d+\.\d+\.\d+', file)[0]), reverse=True) if not versions: # Versions could not be determined log.debug(f'Versions of {path[path.index(os.path.sep)]} could not be determined') From b042b4d4502bfb6714913faffa4a5cad2a558fd1 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 16:25:30 +0200 Subject: [PATCH 24/27] Remove deprecated attribute 'version' See https://github.com/compose-spec/compose-spec/blob/main/04-version-and-name.md --- docker-compose.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 08bdea9..8d5eec7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,3 @@ -version: '3' services: vscsync: From 4679d0a633c86d5a0405222963c383b83b5667f5 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Tue, 20 May 2025 16:39:19 +0200 Subject: [PATCH 25/27] Fix deprecated 'utcnow' method Instead use timezone aware method (still with UTC time though) 'now' --- vscoffline/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 99581a6..e1af58f 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -389,7 +389,7 @@ def latest_version(insider=False): def signal_updated(artifactdir): signalpath = os.path.join(artifactdir, 'updated.json') result = { - 'updated': datetime.datetime.utcnow() + 'updated': datetime.datetime.now(datetime.timezone.utc) } with open(signalpath, 'w') as outfile: json.dump(result, outfile, cls=vsc.MagicJsonEncoder, indent=4) From ba99907f6b965628c03990ac815a1f6a3cbcaa19 Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Wed, 21 May 2025 08:20:08 +0200 Subject: [PATCH 26/27] Add default frequency back As per the feedback from the PR readd the default frequency back as the standard should not change. --- vscoffline/sync.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 183e301..5e9928a 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -861,6 +861,8 @@ def __repr__(self): config.updateextensions = True config.updatemalicious = True config.checkspecified = True + if not config.frequency: + config.frequency = '12h' config.existing = True if config.syncall: From 1a3ad57dfcb62ddf45b4fc1af4887c504a57195d Mon Sep 17 00:00:00 2001 From: Moritz Grede Date: Wed, 21 May 2025 08:25:52 +0200 Subject: [PATCH 27/27] Add output of malicious extensions to malicious.json As per the feedback from the PR readd the output of malicious extensions to the "malicious.json". The file is used by VS Code itself so there are no ther references to the file in the source code. --- vscoffline/sync.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vscoffline/sync.py b/vscoffline/sync.py index 5e9928a..275908e 100644 --- a/vscoffline/sync.py +++ b/vscoffline/sync.py @@ -476,7 +476,7 @@ def get_recommendations_old(self, destination): return packages - def get_malicious(self, extensions=None): + def get_malicious(self, destination, extensions=None): if not extensions: return @@ -492,6 +492,11 @@ def get_malicious(self, extensions=None): 'utf-8', 'ignore').replace(u'\xa0', u'') jresult = json.loads(stripped) + # Output to malicious.json (used by VS Code) + with open(os.path.join(destination, 'malicious.json'), 'w') as outfile: + json.dump(jresult, outfile, cls=vsc.MagicJsonEncoder, indent=4) + + # Remove malicious extensions from collection for extension in (extensions.copy()): if extension in jresult['malicious']: log.warning( @@ -960,7 +965,7 @@ def __repr__(self): if config.updatemalicious: log.info('Syncing VS Code Malicious Extension List') malicious = mp.get_malicious( - extensions) + os.path.abspath(config.artifactdir), extensions) if config.updateextensions: log.info(