Source code for coalib.misc.Caching

import logging
import time
import os

from coala_utils.decorators import enforce_signature
from coalib.misc.CachingUtilities import (
    pickle_load, pickle_dump, delete_files)


[docs]class FileCache: """ This object is a file cache that helps in collecting only the changed and new files since the last run. Example/Tutorial: >>> import logging >>> import copy, time >>> logging.getLogger().setLevel(logging.CRITICAL) To initialize the cache create an instance for the project: >>> cache = FileCache(None, "test", flush_cache=True) Now we can track new files by running: >>> cache.track_files(["a.c", "b.c"]) Since all cache operations are lazy (for performance), we need to explicitly write the cache to disk for persistence in future uses: (Note: The cache will automatically figure out the write location) >>> cache.write() Let's go into the future: >>> time.sleep(1) Let's create a new instance to simulate a separate run: >>> cache = FileCache(None, "test", flush_cache=False) >>> old_data = copy.deepcopy(cache.data) We can mark a file as changed by doing: >>> cache.untrack_files({"a.c"}) Again write to disk after calculating the new cache times for each file: >>> cache.write() >>> new_data = cache.data Since we marked 'a.c' as a changed file: >>> "a.c" not in cache.data True >>> "a.c" in old_data True Since 'b.c' was untouched after the second run, its time was updated to the latest value: >>> old_data["b.c"] < new_data["b.c"] True """ @enforce_signature def __init__( self, log_printer, project_dir: str, flush_cache: bool = False): """ Initialize FileCache. :param log_printer: An object to use for logging. :param project_dir: The root directory of the project to be used as a key identifier. :param flush_cache: Flush the cache and rebuild it. """ self.project_dir = project_dir self.current_time = int(time.time()) cache_data = pickle_load(None, project_dir, {}) last_time = -1 if 'time' in cache_data: last_time = cache_data['time'] if not flush_cache and last_time > self.current_time: logging.warning('It seems like you went back in time - your system ' 'time is behind the last recorded run time on this ' 'project. The cache will be force flushed.') flush_cache = True self.data = cache_data.get('files', {}) if flush_cache: self.flush_cache() # store the files to be untracked and then untrack them in the end # so that an untracked file is not tracked again by mistake in a # later section (which will happen if that file doesn't yield a # result in that section). self.to_untrack = set()
[docs] def flush_cache(self): """ Flushes the cache and deletes the relevant file. """ self.data = {} delete_files(None, [self.project_dir]) logging.debug('The file cache was successfully flushed.')
def __enter__(self): return self
[docs] def write(self): """ Update the last run time on the project for each file to the current time. Using this object as a contextmanager is preferred (that will automatically call this method on exit). """ for file in self.to_untrack: if file in self.data: del self.data[file] for file_name in self.data: self.data[file_name] = self.current_time pickle_dump( None, self.project_dir, {'time': self.current_time, 'files': self.data})
def __exit__(self, type, value, traceback): """ Update the last run time on the project for each file to the current time. """ self.write()
[docs] def untrack_files(self, files): """ Removes the given files from the cache so that they are no longer considered cached for this and the next run. :param files: A set of files to remove from cache. """ self.to_untrack.update(files)
[docs] def track_files(self, files): """ Start tracking files given in ``files`` by adding them to the database. :param files: A set of files that need to be tracked. These files are initialized with their last modified tag as -1. """ for file in files: if file not in self.data: self.data[file] = -1
[docs] def get_uncached_files(self, files): """ Returns the set of files that are not in the cache yet or have been untracked. :param files: The list of collected files. :return: A set of files that are uncached. """ if self.data == {}: # The first run on this project. So all files are new # and must be returned irrespective of whether caching is turned on. return files else: return {file for file in files if (file not in self.data or int(os.path.getmtime(file)) > self.data[file])}