| |
| |
| |
| |
| import binascii |
| import os |
| import mmap |
| import sys |
| import time |
| import errno |
|
|
| from io import BytesIO |
|
|
| from smmap import ( |
| StaticWindowMapManager, |
| SlidingWindowMapManager, |
| SlidingWindowMapBuffer |
| ) |
|
|
| |
| |
| mman = SlidingWindowMapManager() |
| |
|
|
| import hashlib |
|
|
| try: |
| from struct import unpack_from |
| except ImportError: |
| from struct import unpack, calcsize |
| __calcsize_cache = dict() |
|
|
| def unpack_from(fmt, data, offset=0): |
| try: |
| size = __calcsize_cache[fmt] |
| except KeyError: |
| size = calcsize(fmt) |
| __calcsize_cache[fmt] = size |
| |
| return unpack(fmt, data[offset: offset + size]) |
| |
|
|
|
|
| |
|
|
| hex_to_bin = binascii.a2b_hex |
| bin_to_hex = binascii.b2a_hex |
|
|
| |
| ENOENT = errno.ENOENT |
|
|
| |
| exists = os.path.exists |
| mkdir = os.mkdir |
| chmod = os.chmod |
| isdir = os.path.isdir |
| isfile = os.path.isfile |
| rename = os.rename |
| dirname = os.path.dirname |
| basename = os.path.basename |
| join = os.path.join |
| read = os.read |
| write = os.write |
| close = os.close |
| fsync = os.fsync |
|
|
|
|
| def _retry(func, *args, **kwargs): |
| |
| |
| if sys.platform == "win32": |
| for _ in range(10): |
| try: |
| return func(*args, **kwargs) |
| except Exception: |
| time.sleep(0.1) |
| return func(*args, **kwargs) |
| else: |
| return func(*args, **kwargs) |
|
|
|
|
| def remove(*args, **kwargs): |
| return _retry(os.remove, *args, **kwargs) |
|
|
|
|
| |
| from gitdb.const import ( |
| NULL_BIN_SHA, |
| NULL_HEX_SHA |
| ) |
|
|
| |
|
|
| |
|
|
|
|
| class _RandomAccessBytesIO: |
|
|
| """Wrapper to provide required functionality in case memory maps cannot or may |
| not be used. This is only really required in python 2.4""" |
| __slots__ = '_sio' |
|
|
| def __init__(self, buf=''): |
| self._sio = BytesIO(buf) |
|
|
| def __getattr__(self, attr): |
| return getattr(self._sio, attr) |
|
|
| def __len__(self): |
| return len(self.getvalue()) |
|
|
| def __getitem__(self, i): |
| return self.getvalue()[i] |
|
|
| def __getslice__(self, start, end): |
| return self.getvalue()[start:end] |
|
|
|
|
| def byte_ord(b): |
| """ |
| Return the integer representation of the byte string. This supports Python |
| 3 byte arrays as well as standard strings. |
| """ |
| try: |
| return ord(b) |
| except TypeError: |
| return b |
|
|
| |
|
|
| |
|
|
|
|
| def make_sha(source=b''): |
| """A python2.4 workaround for the sha/hashlib module fiasco |
| |
| **Note** From the dulwich project """ |
| try: |
| return hashlib.sha1(source) |
| except NameError: |
| import sha |
| sha1 = sha.sha(source) |
| return sha1 |
|
|
|
|
| def allocate_memory(size): |
| """:return: a file-protocol accessible memory block of the given size""" |
| if size == 0: |
| return _RandomAccessBytesIO(b'') |
| |
|
|
| try: |
| return mmap.mmap(-1, size) |
| except OSError: |
| |
| |
| |
| |
| return _RandomAccessBytesIO(b"\0" * size) |
| |
|
|
|
|
| def file_contents_ro(fd, stream=False, allow_mmap=True): |
| """:return: read-only contents of the file represented by the file descriptor fd |
| |
| :param fd: file descriptor opened for reading |
| :param stream: if False, random access is provided, otherwise the stream interface |
| is provided. |
| :param allow_mmap: if True, its allowed to map the contents into memory, which |
| allows large files to be handled and accessed efficiently. The file-descriptor |
| will change its position if this is False""" |
| try: |
| if allow_mmap: |
| |
| try: |
| return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) |
| except OSError: |
| |
| return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ) |
| |
| except OSError: |
| pass |
| |
|
|
| |
| contents = os.read(fd, os.fstat(fd).st_size) |
| if stream: |
| return _RandomAccessBytesIO(contents) |
| return contents |
|
|
|
|
| def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0): |
| """Get the file contents at filepath as fast as possible |
| |
| :return: random access compatible memory of the given filepath |
| :param stream: see ``file_contents_ro`` |
| :param allow_mmap: see ``file_contents_ro`` |
| :param flags: additional flags to pass to os.open |
| :raise OSError: If the file could not be opened |
| |
| **Note** for now we don't try to use O_NOATIME directly as the right value needs to be |
| shared per database in fact. It only makes a real difference for loose object |
| databases anyway, and they use it with the help of the ``flags`` parameter""" |
| fd = os.open(filepath, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags) |
| try: |
| return file_contents_ro(fd, stream, allow_mmap) |
| finally: |
| close(fd) |
| |
|
|
|
|
| def sliding_ro_buffer(filepath, flags=0): |
| """ |
| :return: a buffer compatible object which uses our mapped memory manager internally |
| ready to read the whole given filepath""" |
| return SlidingWindowMapBuffer(mman.make_cursor(filepath), flags=flags) |
|
|
|
|
| def to_hex_sha(sha): |
| """:return: hexified version of sha""" |
| if len(sha) == 40: |
| return sha |
| return bin_to_hex(sha) |
|
|
|
|
| def to_bin_sha(sha): |
| if len(sha) == 20: |
| return sha |
| return hex_to_bin(sha) |
|
|
|
|
| |
|
|
|
|
| |
|
|
| class LazyMixin: |
|
|
| """ |
| Base class providing an interface to lazily retrieve attribute values upon |
| first access. If slots are used, memory will only be reserved once the attribute |
| is actually accessed and retrieved the first time. All future accesses will |
| return the cached value as stored in the Instance's dict or slot. |
| """ |
|
|
| __slots__ = tuple() |
|
|
| def __getattr__(self, attr): |
| """ |
| Whenever an attribute is requested that we do not know, we allow it |
| to be created and set. Next time the same attribute is requested, it is simply |
| returned from our dict/slots. """ |
| self._set_cache_(attr) |
| |
| return object.__getattribute__(self, attr) |
|
|
| def _set_cache_(self, attr): |
| """ |
| This method should be overridden in the derived class. |
| It should check whether the attribute named by attr can be created |
| and cached. Do nothing if you do not know the attribute or call your subclass |
| |
| The derived class may create as many additional attributes as it deems |
| necessary in case a git command returns more information than represented |
| in the single attribute.""" |
| pass |
|
|
|
|
| class LockedFD: |
|
|
| """ |
| This class facilitates a safe read and write operation to a file on disk. |
| If we write to 'file', we obtain a lock file at 'file.lock' and write to |
| that instead. If we succeed, the lock file will be renamed to overwrite |
| the original file. |
| |
| When reading, we obtain a lock file, but to prevent other writers from |
| succeeding while we are reading the file. |
| |
| This type handles error correctly in that it will assure a consistent state |
| on destruction. |
| |
| **note** with this setup, parallel reading is not possible""" |
| __slots__ = ("_filepath", '_fd', '_write') |
|
|
| def __init__(self, filepath): |
| """Initialize an instance with the givne filepath""" |
| self._filepath = filepath |
| self._fd = None |
| self._write = None |
|
|
| def __del__(self): |
| |
| if self._fd is not None: |
| self.rollback() |
|
|
| def _lockfilepath(self): |
| return "%s.lock" % self._filepath |
|
|
| def open(self, write=False, stream=False): |
| """ |
| Open the file descriptor for reading or writing, both in binary mode. |
| |
| :param write: if True, the file descriptor will be opened for writing. Other |
| wise it will be opened read-only. |
| :param stream: if True, the file descriptor will be wrapped into a simple stream |
| object which supports only reading or writing |
| :return: fd to read from or write to. It is still maintained by this instance |
| and must not be closed directly |
| :raise IOError: if the lock could not be retrieved |
| :raise OSError: If the actual file could not be opened for reading |
| |
| **note** must only be called once""" |
| if self._write is not None: |
| raise AssertionError("Called %s multiple times" % self.open) |
|
|
| self._write = write |
|
|
| |
| binary = getattr(os, 'O_BINARY', 0) |
| lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary |
| try: |
| fd = os.open(self._lockfilepath(), lockmode, int("600", 8)) |
| if not write: |
| os.close(fd) |
| else: |
| self._fd = fd |
| |
| except OSError as e: |
| raise OSError("Lock at %r could not be obtained" % self._lockfilepath()) from e |
| |
|
|
| |
| if self._fd is None: |
| |
| try: |
| self._fd = os.open(self._filepath, os.O_RDONLY | binary) |
| except: |
| |
| remove(self._lockfilepath()) |
| raise |
| |
| |
|
|
| if stream: |
| |
| from gitdb.stream import FDStream |
| return FDStream(self._fd) |
| else: |
| return self._fd |
| |
|
|
| def commit(self): |
| """When done writing, call this function to commit your changes into the |
| actual file. |
| The file descriptor will be closed, and the lockfile handled. |
| |
| **Note** can be called multiple times""" |
| self._end_writing(successful=True) |
|
|
| def rollback(self): |
| """Abort your operation without any changes. The file descriptor will be |
| closed, and the lock released. |
| |
| **Note** can be called multiple times""" |
| self._end_writing(successful=False) |
|
|
| def _end_writing(self, successful=True): |
| """Handle the lock according to the write mode """ |
| if self._write is None: |
| raise AssertionError("Cannot end operation if it wasn't started yet") |
|
|
| if self._fd is None: |
| return |
|
|
| os.close(self._fd) |
| self._fd = None |
|
|
| lockfile = self._lockfilepath() |
| if self._write and successful: |
| |
| if sys.platform == "win32": |
| if isfile(self._filepath): |
| remove(self._filepath) |
| |
| |
| os.rename(lockfile, self._filepath) |
|
|
| |
| |
| |
| chmod(self._filepath, int("644", 8)) |
| else: |
| |
| remove(lockfile) |
| |
|
|
| |
|
|