| |
| |
| |
| |
| """Module containing a database to deal with packs""" |
| from gitdb.db.base import ( |
| FileDBBase, |
| ObjectDBR, |
| CachingDB |
| ) |
|
|
| from gitdb.util import LazyMixin |
|
|
| from gitdb.exc import ( |
| BadObject, |
| UnsupportedOperation, |
| AmbiguousObjectName |
| ) |
|
|
| from gitdb.pack import PackEntity |
|
|
| from functools import reduce |
|
|
| import os |
| import glob |
|
|
| __all__ = ('PackedDB', ) |
|
|
| |
|
|
|
|
| class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): |
|
|
| """A database operating on a set of object packs""" |
|
|
| |
| |
| |
| _sort_interval = 500 |
|
|
| def __init__(self, root_path): |
| super().__init__(root_path) |
| |
| |
| |
| |
| |
| self._hit_count = 0 |
| self._st_mtime = 0 |
|
|
| def _set_cache_(self, attr): |
| if attr == '_entities': |
| self._entities = list() |
| self.update_cache(force=True) |
| |
|
|
| def _sort_entities(self): |
| self._entities.sort(key=lambda l: l[0], reverse=True) |
|
|
| def _pack_info(self, sha): |
| """:return: tuple(entity, index) for an item at the given sha |
| :param sha: 20 or 40 byte sha |
| :raise BadObject: |
| **Note:** This method is not thread-safe, but may be hit in multi-threaded |
| operation. The worst thing that can happen though is a counter that |
| was not incremented, or the list being in wrong order. So we safe |
| the time for locking here, lets see how that goes""" |
| |
| if self._hit_count % self._sort_interval == 0: |
| self._sort_entities() |
| |
|
|
| for item in self._entities: |
| index = item[2](sha) |
| if index is not None: |
| item[0] += 1 |
| self._hit_count += 1 |
| return (item[1], index) |
| |
| |
|
|
| |
| |
| |
| raise BadObject(sha) |
|
|
| |
|
|
| def has_object(self, sha): |
| try: |
| self._pack_info(sha) |
| return True |
| except BadObject: |
| return False |
| |
|
|
| def info(self, sha): |
| entity, index = self._pack_info(sha) |
| return entity.info_at_index(index) |
|
|
| def stream(self, sha): |
| entity, index = self._pack_info(sha) |
| return entity.stream_at_index(index) |
|
|
| def sha_iter(self): |
| for entity in self.entities(): |
| index = entity.index() |
| sha_by_index = index.sha |
| for index in range(index.size()): |
| yield sha_by_index(index) |
| |
| |
|
|
| def size(self): |
| sizes = [item[1].index().size() for item in self._entities] |
| return reduce(lambda x, y: x + y, sizes, 0) |
|
|
| |
|
|
| |
|
|
| def store(self, istream): |
| """Storing individual objects is not feasible as a pack is designed to |
| hold multiple objects. Writing or rewriting packs for single objects is |
| inefficient""" |
| raise UnsupportedOperation() |
|
|
| |
|
|
| |
|
|
| def update_cache(self, force=False): |
| """ |
| Update our cache with the actually existing packs on disk. Add new ones, |
| and remove deleted ones. We keep the unchanged ones |
| |
| :param force: If True, the cache will be updated even though the directory |
| does not appear to have changed according to its modification timestamp. |
| :return: True if the packs have been updated so there is new information, |
| False if there was no change to the pack database""" |
| stat = os.stat(self.root_path()) |
| if not force and stat.st_mtime <= self._st_mtime: |
| return False |
| |
| self._st_mtime = stat.st_mtime |
|
|
| |
| |
| pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) |
| our_pack_files = {item[1].pack().path() for item in self._entities} |
|
|
| |
| for pack_file in (pack_files - our_pack_files): |
| |
| |
| entity = PackEntity(pack_file) |
| self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) |
| |
|
|
| |
| for pack_file in (our_pack_files - pack_files): |
| del_index = -1 |
| for i, item in enumerate(self._entities): |
| if item[1].pack().path() == pack_file: |
| del_index = i |
| break |
| |
| |
| assert del_index != -1 |
| del(self._entities[del_index]) |
| |
|
|
| |
| self._sort_entities() |
| return True |
|
|
| def entities(self): |
| """:return: list of pack entities operated upon by this database""" |
| return [item[1] for item in self._entities] |
|
|
| def partial_to_complete_sha(self, partial_binsha, canonical_length): |
| """:return: 20 byte sha as inferred by the given partial binary sha |
| :param partial_binsha: binary sha with less than 20 bytes |
| :param canonical_length: length of the corresponding canonical representation. |
| It is required as binary sha's cannot display whether the original hex sha |
| had an odd or even number of characters |
| :raise AmbiguousObjectName: |
| :raise BadObject: """ |
| candidate = None |
| for item in self._entities: |
| item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length) |
| if item_index is not None: |
| sha = item[1].index().sha(item_index) |
| if candidate and candidate != sha: |
| raise AmbiguousObjectName(partial_binsha) |
| candidate = sha |
| |
| |
|
|
| if candidate: |
| return candidate |
|
|
| |
| raise BadObject(partial_binsha) |
|
|
| |
|
|