Skip to content
This repository was archived by the owner on Feb 4, 2020. It is now read-only.
Merged
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ clcache changelog
`CLCACHE_OBJECT_CACHE_TIMEOUT_MS` environment variable.
* Improvement: Greatly improved concurrency of clcache such that concurrent
invocations of the tool no longer block each other.
* Improvement: Improve hit rate when alternating between two identical
versions of the same source file that transitively get different contents of
the included files (a common case when switching back and forth between
branches).

## clcache 3.2.0 (2016-07-28)

Expand Down
153 changes: 86 additions & 67 deletions clcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,11 @@
# to use it as mark for relative path.
BASEDIR_REPLACEMENT = '?'

# ManifestEntry: an entry in a manifest file
# `includeFiles`: list of paths to include files, which this source file uses
# `includesContentToObjectMap`: dictionary
# key: cumulative hash of all include files' content in includeFiles
# value: key in the cache, under which the object file is stored
Manifest = namedtuple('Manifest', ['includeFiles', 'includesContentToObjectMap'])
# `includesContentsHash`: hash of the contents of the includeFiles
# `objectHash`: hash of the object in cache
ManifestEntry = namedtuple('ManifestEntry', ['includeFiles', 'includesContentHash', 'objectHash'])

CompilerArtifacts = namedtuple('CompilerArtifacts', ['objectFilePath', 'stdout', 'stderr'])

Expand Down Expand Up @@ -108,10 +108,6 @@ class IncludeNotFoundException(Exception):
pass


class IncludeChangedException(Exception):
pass


class CacheLockException(Exception):
pass

Expand All @@ -125,6 +121,24 @@ def __str__(self):
return repr(self.message)


class Manifest(object):
def __init__(self, entries=None):
if entries is None:
entries = []
self._entries = entries.copy()

def entries(self):
return self._entries

def addEntry(self, entry):
"""Adds entry at the top of the entries"""
self._entries.insert(0, entry)

def touchEntry(self, entryIndex):
"""Moves entry in entryIndex position to the top of entries()"""
self._entries.insert(0, self._entries.pop(entryIndex))


class ManifestSection(object):
def __init__(self, manifestSectionDir):
self.manifestSectionDir = manifestSectionDir
Expand All @@ -137,10 +151,14 @@ def manifestFiles(self):
return filesBeneath(self.manifestSectionDir)

def setManifest(self, manifestHash, manifest):
manifestPath = self.manifestPath(manifestHash)
printTraceStatement("Writing manifest with manifestHash = {} to {}".format(manifestHash, manifestPath))
ensureDirectoryExists(self.manifestSectionDir)
with open(self.manifestPath(manifestHash), 'w') as outFile:
with open(manifestPath, 'w') as outFile:
# Converting namedtuple to JSON via OrderedDict preserves key names and keys order
json.dump(manifest._asdict(), outFile, sort_keys=True, indent=2)
entries = [e._asdict() for e in manifest.entries()]
jsonobject = {'entries': entries}
json.dump(jsonobject, outFile, sort_keys=True, indent=2)

def getManifest(self, manifestHash):
fileName = self.manifestPath(manifestHash)
Expand All @@ -149,7 +167,8 @@ def getManifest(self, manifestHash):
try:
with open(fileName, 'r') as inFile:
doc = json.load(inFile)
return Manifest(doc['includeFiles'], doc['includesContentToObjectMap'])
return Manifest([ManifestEntry(e['includeFiles'], e['includesContentHash'], e['objectHash'])
for e in doc['entries']])
except IOError:
return None

Expand All @@ -172,7 +191,7 @@ class ManifestRepository(object):
# invalidation, such that a manifest that was stored using the old format is not
# interpreted using the new format. Instead the old file will not be touched
# again due to a new manifest hash and is cleaned away after some time.
MANIFEST_FILE_FORMAT_VERSION = 4
MANIFEST_FILE_FORMAT_VERSION = 5

def __init__(self, manifestsRootDir):
self._manifestsRootDir = manifestsRootDir
Expand Down Expand Up @@ -219,26 +238,19 @@ def getManifestHash(compilerBinary, commandLine, sourceFile):

@staticmethod
def getIncludesContentHashForFiles(includes):
listOfIncludesHashes = []
includeMissing = False
listOfHashes = []

for path in sorted(includes.keys()):
for path in includes:
try:
fileHash = getFileHash(path)
if fileHash != includes[path]:
raise IncludeChangedException()
listOfIncludesHashes.append(fileHash)
listOfHashes.append(getFileHash(path))
except FileNotFoundError:
includeMissing = True

if includeMissing:
raise IncludeNotFoundException()
raise IncludeNotFoundException
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, does this change mean that in case the statistics are counted slightly differently in case some included file has a different hash, and another included does not exist anymore?


return ManifestRepository.getIncludesContentHashForHashes(listOfIncludesHashes)
return ManifestRepository.getIncludesContentHashForHashes(listOfHashes)

@staticmethod
def getIncludesContentHashForHashes(listOfIncludesHashes):
return HashAlgorithm(','.join(listOfIncludesHashes).encode()).hexdigest()
def getIncludesContentHashForHashes(listOfHashes):
return HashAlgorithm(','.join(listOfHashes).encode()).hexdigest()


class CacheLock(object):
Expand Down Expand Up @@ -754,7 +766,8 @@ def getStringHash(dataString):
return hasher.hexdigest()


def expandBasedirPlaceholder(path, baseDir):
def expandBasedirPlaceholder(path):
baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR'))
if path.startswith(BASEDIR_REPLACEMENT):
if not baseDir:
raise LogicException('No CLCACHE_BASEDIR set, but found relative path ' + path)
Expand All @@ -763,13 +776,17 @@ def expandBasedirPlaceholder(path, baseDir):
return path


def collapseBasedirToPlaceholder(path, baseDir):
assert path == os.path.normcase(path)
assert baseDir == os.path.normcase(baseDir)
if path.startswith(baseDir):
return path.replace(baseDir, BASEDIR_REPLACEMENT, 1)
else:
def collapseBasedirToPlaceholder(path):
baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR'))
if baseDir is None:
return path
else:
assert path == os.path.normcase(path)
assert baseDir == os.path.normcase(baseDir)
if path.startswith(baseDir):
return path.replace(baseDir, BASEDIR_REPLACEMENT, 1)
else:
return path


def ensureDirectoryExists(path):
Expand Down Expand Up @@ -1371,24 +1388,20 @@ def processCacheHit(cache, objectFile, cachekey):
return 0, cachedArtifacts.stdout, cachedArtifacts.stderr, False


def createManifest(manifestHash, includePaths):
baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR'))

includes = {path:getFileHash(path) for path in includePaths}
includesContentHash = ManifestRepository.getIncludesContentHashForFiles(includes)
def createManifestEntry(manifestHash, includePaths):
includesWithHash = {path:getFileHash(path) for path in includePaths}
includesContentHash = ManifestRepository.getIncludesContentHashForHashes(includesWithHash.values())
cachekey = CompilerArtifactsRepository.computeKeyDirect(manifestHash, includesContentHash)

# Create new manifest
if baseDir:
relocatableIncludePaths = {
collapseBasedirToPlaceholder(path, baseDir):contentHash
for path, contentHash in includes.items()
}
manifest = Manifest(relocatableIncludePaths, {})
else:
manifest = Manifest(includes, {})
manifest.includesContentToObjectMap[includesContentHash] = cachekey
return manifest, cachekey
safeIncludes = [collapseBasedirToPlaceholder(path) for path in includesWithHash.keys()]
return ManifestEntry(safeIncludes, includesContentHash, cachekey)


def createOrUpdateManifest(manifestSection, manifestHash, entry):
manifest = manifestSection.getManifest(manifestHash) or Manifest()
manifest.addEntry(entry)
manifestSection.setManifest(manifestHash, manifest)
return manifest


def postprocessUnusableManifestMiss(
Expand All @@ -1401,8 +1414,8 @@ def postprocessUnusableManifestMiss(
returnCode, compilerOutput, compilerStderr = invokeRealCompiler(compiler, cmdLine, captureOutput=True)
includePaths, compilerOutput = parseIncludesSet(compilerOutput, sourceFile, stripIncludes)

if returnCode == 0 and os.path.exists(objectFile):
manifest, cachekey = createManifest(manifestHash, includePaths)
entry = createManifestEntry(manifestHash, includePaths)
cachekey = entry.objectHash

cleanupRequired = False
section = cache.compilerArtifactsRepository.section(cachekey)
Expand All @@ -1411,6 +1424,7 @@ def postprocessUnusableManifestMiss(
if returnCode == 0 and os.path.exists(objectFile):
artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr)
cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts)
manifest = createOrUpdateManifest(manifestSection, manifestHash, entry)
manifestSection.setManifest(manifestHash, manifest)

return returnCode, compilerOutput, compilerStderr, cleanupRequired
Expand Down Expand Up @@ -1551,7 +1565,6 @@ def processCompileRequest(cache, compiler, args):


def processDirect(cache, objectFile, compiler, cmdLine, sourceFile):
baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR'))
manifestHash = ManifestRepository.getManifestHash(compiler, cmdLine, sourceFile)
manifestSection = cache.manifestRepository.section(manifestHash)
with manifestSection.lock:
Expand All @@ -1561,21 +1574,27 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile):
cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine,
Statistics.registerSourceChangedMiss)

# NOTE: command line options already included in hash for manifest name
try:
includesContentHash = ManifestRepository.getIncludesContentHashForFiles({
expandBasedirPlaceholder(path, baseDir):contentHash
for path, contentHash in manifest.includeFiles.items()
})
except IncludeChangedException:
return postprocessUnusableManifestMiss(
cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine,
Statistics.registerHeaderChangedMiss)

cachekey = manifest.includesContentToObjectMap.get(includesContentHash)
assert cachekey is not None

return getOrSetArtifacts(cache, cachekey, objectFile, compiler, cmdLine, Statistics.registerEvictedMiss)
for entryIndex, entry in enumerate(manifest.entries()):
# NOTE: command line options already included in hash for manifest name
try:
includesContentHash = ManifestRepository.getIncludesContentHashForFiles(
[expandBasedirPlaceholder(path) for path in entry.includeFiles])

if entry.includesContentHash == includesContentHash:
cachekey = entry.objectHash
assert cachekey is not None
# Move manifest entry to the top of the entries in the manifest
manifest.touchEntry(entryIndex)
manifestSection.setManifest(manifestHash, manifest)

return getOrSetArtifacts(
cache, cachekey, objectFile, compiler, cmdLine, Statistics.registerEvictedMiss)
except IncludeNotFoundException:
pass

return postprocessUnusableManifestMiss(
cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine,
Statistics.registerHeaderChangedMiss)


def processNoDirect(cache, objectFile, compiler, cmdLine, environment):
Expand Down
Loading