Source code for osxphotos.export_db

"""Helper class for managing database used by PhotoExporter for tracking state of exports and updates"""

from __future__ import annotations

import datetime
import gzip
import json
import logging
import os
import os.path
import pathlib
import pickle
import re
import sqlite3
import sys
import threading
import time
from contextlib import suppress
from io import StringIO
from sqlite3 import Error
from typing import Any

from tenacity import retry, retry_if_not_exception_type, stop_after_attempt

import osxphotos

from ._constants import OSXPHOTOS_EXPORT_DB, SQLITE_CHECK_SAME_THREAD
from ._version import __version__
from .fileutil import FileUtil
from .sqlite_utils import (
    sqlite_backup_dbfiles,
    sqlite_delete_backup_files,
    sqlite_delete_dbfiles,
)
from .unicode import normalize_fs_path

__all__ = [
    "BatchContext",
    "ExportDB",
    "ExportDBInMemory",
    "ExportDBTemp",
]

OSXPHOTOS_EXPORTDB_VERSION = "11.0"
OSXPHOTOS_ABOUT_STRING = f"Created by osxphotos version {__version__} (https://github.com/RhetTbull/osxphotos) on {datetime.datetime.now()}"

# max retry attempts for methods which use tenacity.retry
MAX_RETRY_ATTEMPTS = os.environ.get("OSXPHOTOS_MAX_RETRY_ATTEMPTS", 3)

# maximum number of export results rows to save
MAX_EXPORT_RESULTS_DATA_ROWS = os.environ.get(
    "OSXPHOTOS_MAX_EXPORT_RESULTS_DATA_ROWS", 10
)

# batch size for history writes to reduce commit overhead
HISTORY_BATCH_SIZE = os.environ.get("OSXPHOTOS_HISTORY_BATCH_SIZE", 100)


logger = logging.getLogger("osxphotos")


def retry_log_error_no_raise(retry_state):
    """Log error for retry but don't raise exception"""
    logger.debug(
        f"Error {retry_state.outcome} for {retry_state.fn.__name__}({retry_state.args}, {retry_state.kwargs}); retrying...",
    )


def pickle_and_zip(data: Any) -> bytes:
    """
    Pickle and gzip data.

    Args:
        data: data to pickle and gzip (must be pickle-able)

    Returns:
        bytes of gzipped pickled data
    """
    pickled = pickle.dumps(data)
    return gzip.compress(pickled)


def unzip_and_unpickle(data: bytes) -> Any:
    """
    Unzip and unpickle data.

    Args:
        data: data to unzip and unpickle

    Returns:
        unpickled data
    """
    return pickle.loads(gzip.decompress(data))


class BatchContext:
    """Context manager for batching database operations in ExportDB.

    When active, commits are deferred until the batch completes.
    Supports nesting - only the outermost batch commits.
    """

    def __init__(self, export_db: "ExportDB"):
        self._export_db = export_db

    def __enter__(self):
        self._export_db._batch_mode += 1
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self._export_db._batch_mode -= 1
        if self._export_db._batch_mode == 0:
            # Outermost batch is complete, flush and commit
            self._export_db.flush_history()
            with self._export_db.lock:
                if exc_type and self._export_db._conn.in_transaction:
                    self._export_db._conn.rollback()
                elif self._export_db._conn.in_transaction:
                    self._export_db._conn.commit()


[docs] class ExportDB: """Interface to sqlite3 database used to store state information for osxphotos export command Args: dbfile: path to osxphotos export database file export_dir: path to directory where exported files are stored; if None will read value from database version: if supplied, creates database with this version; otherwise uses current version; (must be >= "4.3" and <= OSXPHOTOS_EXPORTDB_VERSION) For testing only; in normal usage, omit this argument Note: export_dir = None must only be used when opening an existing database to read and not to export """ def __init__( self, dbfile: str | os.PathLike, export_dir: str | os.PathLike | None, version: str | None = None, ): """Create a new ExportDB object Args: dbfile: path to osxphotos export database file export_dir: path to directory where exported files are stored; if None will read value from database version: if supplied, creates database with this version; otherwise uses current version; (must be >= "4.3" and <= OSXPHOTOS_EXPORTDB_VERSION) For testing only; in normal usage, omit this argument Note: export_dir = None must only be used when opening an existing database to read and not to export """ self._dbfile: str = str(dbfile) # export_dir allows the database to located in a different path than the export # all paths are stored as relative paths to this path # this allows the entire export tree to be moved to a new disk/location # whilst preserving the UUID to filename mapping self._path: str | None = str(export_dir) if export_dir else None self.was_upgraded: tuple[str, str] | tuple = () self.was_created = False self.lock = threading.Lock() # Cache for batch-loaded ExportRecord data, keyed by directory path # Each directory maps to a dict of filepath_normalized -> record data self._record_cache: dict[str, dict[str, dict[str, Any]]] = {} # Buffer for batched history writes to reduce commit overhead # Each entry is (filename_normalized, uuid, action, diff) self._history_buffer: list[tuple[str, str, str, str | None]] = [] # Batch mode counter - when > 0, commits are deferred until batch completes self._batch_mode: int = 0 self._conn = self._open_export_db(self._dbfile, version) self._perform_db_maintenance(self._conn) self._insert_run_info() self._insert_export_dir() @property def path(self) -> str: """returns path to export database""" return self._dbfile @property def export_dir(self) -> str: """returns path to export directory""" return self._path @property def connection(self) -> sqlite3.Connection: """returns sqlite3 connection""" return self._conn or self._get_db_connection(self._dbfile)
[docs] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def get_file_record(self, filename: str | os.PathLike) -> "ExportRecord" | None: """get info for filename Returns: an ExportRecord object or None if filename not found """ filename = self._relative_filepath(filename) filename_normalized = self._normalize_filepath(filename) # Check cache first dir_path = str(pathlib.Path(filename).parent) dir_normalized = self._normalize_filepath(dir_path) if dir_normalized in self._record_cache: cached_data = self._record_cache[dir_normalized].get(filename_normalized) if cached_data is not None: return ExportRecord( self.connection, self.lock, filename_normalized, cached_data, export_db=self, ) # Directory is cached but file not found return None with self.lock: conn = self.connection c = conn.cursor() result = c.execute( "SELECT uuid FROM export_data WHERE filepath_normalized = ?;", (filename_normalized,), ).fetchone() return ( ExportRecord(conn, self.lock, filename_normalized, export_db=self) if result else None )
[docs] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def prefetch_directory_records(self, directory: str | os.PathLike) -> int: """Batch-load all ExportRecord data for files in a directory. This loads all records with a single SQL query and caches them for subsequent get_file_record() calls. This is significantly faster than individual queries when processing many files in the same directory. Args: directory: Directory path to prefetch records for. Returns: Number of records loaded. """ dir_relative = self._relative_filepath(directory) dir_normalized = self._normalize_filepath(dir_relative) # Already cached if dir_normalized in self._record_cache: return len(self._record_cache[dir_normalized]) with self.lock: conn = self.connection c = conn.cursor() # Query all records in directory with LIKE prefix match # The pattern matches files in the directory (not subdirectories) # Special case for root directory (dir_normalized is ".") if dir_normalized == ".": # Root directory: match files without "/" (directly in root) results = c.execute( """SELECT filepath_normalized, uuid, digest, exifdata, export_options, dest_mode, dest_size, dest_mtime, error, date_modified FROM export_data WHERE filepath_normalized NOT LIKE '%/%';""", ).fetchall() else: results = c.execute( """SELECT filepath_normalized, uuid, digest, exifdata, export_options, dest_mode, dest_size, dest_mtime, error, date_modified FROM export_data WHERE filepath_normalized LIKE ? AND filepath_normalized NOT LIKE ?;""", (f"{dir_normalized}/%", f"{dir_normalized}/%/%"), ).fetchall() # Build cache for this directory dir_cache: dict[str, dict[str, Any]] = {} for row in results: filepath_normalized = row[0] # Parse error JSON error_val = json.loads(row[8]) if row[8] else None # Parse date_modified date_modified_val = None if row[9]: try: date_modified_val = datetime.datetime.fromisoformat(row[9]) except Exception: pass # Build dest_sig tuple mtime = int(row[7]) if row[7] is not None else None dest_sig = (row[5], row[6], mtime) dir_cache[filepath_normalized] = { "uuid": row[1], "digest": row[2], "exifdata": row[3], "export_options": row[4], "dest_sig": dest_sig, "error": error_val, "date_modified": date_modified_val, } self._record_cache[dir_normalized] = dir_cache logger.debug( f"Prefetched {len(dir_cache)} export records for directory {directory}" ) return len(dir_cache)
[docs] def invalidate_record_cache(self, directory: str | os.PathLike | None = None): """Invalidate the record cache for a directory or all directories. Args: directory: Directory to invalidate, or None to clear entire cache. """ if directory is None: self._record_cache.clear() else: dir_relative = self._relative_filepath(directory) dir_normalized = self._normalize_filepath(dir_relative) self._record_cache.pop(dir_normalized, None)
[docs] @retry( stop=stop_after_attempt(MAX_RETRY_ATTEMPTS), retry=retry_if_not_exception_type(sqlite3.IntegrityError), ) def create_file_record( self, filename: str | os.PathLike, uuid: str ) -> "ExportRecord": """create a new record for filename and uuid Returns: an ExportRecord object """ filename = self._relative_filepath(filename) filename_normalized = self._normalize_filepath(filename) with self.lock: conn = self.connection c = conn.cursor() c.execute( "INSERT INTO export_data (filepath, filepath_normalized, uuid) VALUES (?, ?, ?);", (filename, filename_normalized, uuid), ) if not self.in_batch_mode: conn.commit() return ExportRecord(conn, self.lock, filename_normalized, export_db=self)
[docs] @retry( stop=stop_after_attempt(MAX_RETRY_ATTEMPTS), retry=retry_if_not_exception_type(sqlite3.IntegrityError), ) def create_or_get_file_record( self, filename: str | os.PathLike, uuid: str ) -> "ExportRecord": """create a new record for filename and uuid or return existing record Returns: an ExportRecord object """ filename = self._relative_filepath(filename) filename_normalized = self._normalize_filepath(filename) # Check cache first - if record exists, skip the database query entirely dir_path = str(pathlib.Path(filename).parent) dir_normalized = self._normalize_filepath(dir_path) if dir_normalized in self._record_cache: cached_data = self._record_cache[dir_normalized].get(filename_normalized) if cached_data is not None: return ExportRecord( self.connection, self.lock, filename_normalized, cached_data, export_db=self, ) with self.lock: conn = self.connection c = conn.cursor() # Check if record already exists in database to avoid unnecessary INSERT + commit existing = c.execute( "SELECT 1 FROM export_data WHERE filepath_normalized = ? LIMIT 1;", (filename_normalized,), ).fetchone() if existing: # Record exists, no need to INSERT or commit return ExportRecord( conn, self.lock, filename_normalized, export_db=self ) # Record doesn't exist, INSERT and commit (unless in batch mode) c.execute( "INSERT OR IGNORE INTO export_data (filepath, filepath_normalized, uuid) VALUES (?, ?, ?);", (filename, filename_normalized, uuid), ) if not self.in_batch_mode: conn.commit() return ExportRecord(conn, self.lock, filename_normalized, export_db=self)
[docs] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def get_uuid_for_file(self, filename: str) -> str | None: """query database for filename and return UUID returns None if filename not found in database """ filepath_normalized = self._normalize_filepath_relative(filename) with self.lock: conn = self.connection c = conn.cursor() c.execute( "SELECT uuid FROM export_data WHERE filepath_normalized = ?", (filepath_normalized,), ) results = c.fetchone() return results[0] if results else None
[docs] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def get_files_for_uuid(self, uuid: str) -> list[str]: """query database for UUID and return list of files associated with UUID or empty list""" with self.lock: conn = self.connection c = conn.cursor() c.execute( "SELECT filepath FROM export_data WHERE uuid = ?", (uuid,), ) results = c.fetchall() return [os.path.join(self.export_dir, r[0]) for r in results]
[docs] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def get_photoinfo_for_uuid(self, uuid: str) -> str | None: """returns the photoinfo JSON string for a UUID or None if not found""" with self.lock: conn = self.connection c = conn.cursor() c.execute("SELECT photoinfo FROM photoinfo WHERE uuid = ?", (uuid,)) results = c.fetchone() return results[0] if results else None
[docs] @retry( stop=stop_after_attempt(MAX_RETRY_ATTEMPTS), retry=retry_if_not_exception_type(sqlite3.IntegrityError), ) def set_photoinfo_for_uuid(self, uuid: str, info: str): """sets the photoinfo JSON string for a UUID""" with self.lock: conn = self.connection c = conn.cursor() c.execute( "INSERT OR REPLACE INTO photoinfo(uuid, photoinfo) VALUES (?, ?);", (uuid, info), ) conn.commit()
[docs] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def get_target_for_file(self, uuid: str, filename: str | os.PathLike) -> str | None: """query database for file matching file name and return the matching filename if there is one; otherwise return None; looks for file.ext, file (1).ext, file (2).ext and so on to find the actual target name that was used to export filename Returns: the matching filename or None if no match found """ with self.lock: conn = self.connection c = conn.cursor() filepath_normalized = self._normalize_filepath_relative(filename) filepath_stem, filepath_ext = os.path.splitext(filepath_normalized) c.execute( "SELECT uuid, filepath, filepath_normalized FROM export_data WHERE uuid = ? AND filepath_normalized LIKE ?", ( uuid, f"{filepath_stem}%", ), ) results = c.fetchall() for result in results: result_stem, result_ext = os.path.splitext(result[2]) if result_ext != filepath_ext: continue if re.match(re.escape(filepath_stem) + r"(\s\(\d+\))?$", result_stem): return os.path.join(self.export_dir, result[1]) return None
[docs] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def get_previous_uuids(self): """returns list of UUIDs of previously exported photos found in export database""" with self.lock: conn = self.connection c = conn.cursor() c.execute("SELECT DISTINCT uuid FROM export_data") results = c.fetchall() return [row[0] for row in results]
[docs] @retry( stop=stop_after_attempt(MAX_RETRY_ATTEMPTS), retry=retry_if_not_exception_type(sqlite3.IntegrityError), ) def set_config(self, config_data: str): """set config in the database""" with self.lock: conn = self.connection dt = datetime.datetime.now().isoformat() c = conn.cursor() c.execute( "INSERT OR REPLACE INTO config(datetime, config) VALUES (?, ?);", (dt, config_data), ) conn.commit()
[docs] @retry( stop=stop_after_attempt(MAX_RETRY_ATTEMPTS), retry=retry_if_not_exception_type(sqlite3.IntegrityError), ) def set_export_results(self, results: "osxphotos.photoexporter.ExportResults"): """Store export results in database; data is pickled and gzipped for storage""" results_data = pickle_and_zip(results) with self.lock: conn = self.connection dt = datetime.datetime.now().isoformat() c = conn.cursor() c.execute( """ UPDATE export_results_data SET datetime = ?, export_results = ? WHERE datetime = (SELECT MIN(datetime) FROM export_results_data); """, (dt, results_data), ) conn.commit()
[docs] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def get_export_results( self, run: int = 0 ) -> "osxphotos.photoexporter.ExportResults" | None: """Retrieve export results from database Args: run: which run to retrieve results for; 0 = most recent run, -1 = previous run, -2 = run prior to that, etc. Returns: ExportResults object or None if no results found """ if run > 0: raise ValueError("run must be 0 or negative") run = -run with self.lock: conn = self.connection c = conn.cursor() c.execute( """ SELECT export_results FROM export_results_data ORDER BY datetime DESC """, ) rows = c.fetchall() try: data = rows[run][0] results = unzip_and_unpickle(data) if data else None except IndexError: results = None return results
[docs] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def get_exported_files(self): """Returns tuple of (uuid, filepath) for all paths of all exported files tracked in the database""" with self.lock: conn = self.connection c = conn.cursor() c.execute("SELECT uuid, filepath FROM export_data") while row := c.fetchone(): yield row[0], os.path.join(self.export_dir, row[1]) return
[docs] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def delete_data_for_uuid(self, uuid: str): """Delete all exportdb data for given UUID""" with self.lock: conn = self.connection c = conn.cursor() count = 0 c.execute("DELETE FROM export_data WHERE uuid = ?;", (uuid,)) count += c.execute("SELECT CHANGES();").fetchone()[0] c.execute("DELETE FROM photoinfo WHERE uuid = ?;", (uuid,)) count += c.execute("SELECT CHANGES();").fetchone()[0] conn.commit() return count
[docs] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def delete_data_for_filepath(self, filepath: str | os.PathLike): """Delete all exportdb data for given filepath""" with self.lock: conn = self.connection c = conn.cursor() filepath_normalized = self._normalize_filepath_relative(filepath) results = c.execute( "SELECT uuid FROM export_data WHERE filepath_normalized = ?;", (filepath_normalized,), ).fetchall() return sum(self.delete_data_for_uuid(row[0]) for row in results)
[docs] def set_history( self, filename: str | os.PathLike, uuid: str, action: str, diff: str | None, ): """Set the history record for the filepath Args: filename: path to file action: action taken on file, e.g. "exported", "skipped", "missing", "updated" diff: diff for file as a serialized JSON str Note: Records are buffered and committed in batches for better performance. Call flush_history() to force immediate commit of buffered records. """ filename_normalized = self._normalize_filepath_relative(filename) self._history_buffer.append((filename_normalized, uuid, action, diff)) if len(self._history_buffer) >= HISTORY_BATCH_SIZE: self.flush_history()
[docs] def flush_history(self): """Flush buffered history records to the database. This is called automatically when the buffer reaches HISTORY_BATCH_SIZE, and also called by close() to ensure all records are written. """ if not self._history_buffer: return with self.lock: conn = self.connection c = conn.cursor() # First, ensure all paths exist in history_path table # Use INSERT OR IGNORE to handle existing paths efficiently unique_paths = {(fn, uuid) for fn, uuid, _, _ in self._history_buffer} c.executemany( "INSERT OR IGNORE INTO history_path (filepath_normalized, uuid) VALUES (?, ?);", unique_paths, ) # Build a mapping of filepath_normalized -> id # Query all paths we need in one go path_list = [fn for fn, _, _, _ in self._history_buffer] placeholders = ",".join("?" * len(path_list)) rows = c.execute( f"SELECT filepath_normalized, id FROM history_path WHERE filepath_normalized IN ({placeholders});", path_list, ).fetchall() path_to_id = {row[0]: row[1] for row in rows} # Now batch insert all history records history_records = [ (path_to_id[fn], action, diff) for fn, _, action, diff in self._history_buffer ] c.executemany( "INSERT INTO history (filepath_id, action, diff) VALUES (?, ?, ?);", history_records, ) conn.commit() self._history_buffer.clear()
# @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS))
[docs] def get_history( self, filepath: str | os.PathLike | None = None, uuid: str | None = None ): """Get history for a filepath or uuid Args: filepath: path to file uuid: UUID of file Returns: list of history records """ with self.lock: conn = self.connection c = conn.cursor() if filepath: filepath_normalized = self._normalize_filepath_relative(filepath) results = c.execute( """ SELECT datetime, action, diff FROM history_path JOIN history ON history_path.id = history.filepath_id WHERE filepath_normalized = ? ORDER BY datetime ASC """, (filepath_normalized,), ).fetchall() elif uuid: results = c.execute( """ SELECT datetime, action, diff FROM history_path JOIN history ON history_path.id = history.filepath_id WHERE uuid = ? ORDER BY datetime ASC """, (uuid,), ).fetchall() else: raise ValueError("Must specify filepath or uuid") return results
# @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS))
[docs] def get_last_export_directory(self) -> str | None: """Return the last export directory from the database""" with self.lock: conn = self.connection c = conn.cursor() results = c.execute( """ SELECT export_directory FROM export_directory ORDER BY id DESC LIMIT 1; """ ).fetchone() if not results: return None return results[0]
[docs] @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def close(self): """Flush any buffered data and close the database connection""" if self._conn: self.flush_history() self._conn.close() self._conn = None
@property def in_batch_mode(self) -> bool: """Return True if database is in batch mode (commits are deferred)""" return self._batch_mode > 0
[docs] def batch_operations(self) -> "BatchContext": """Context manager for batching database operations. When in batch mode, commits are deferred until the batch completes. This significantly improves performance when performing many database operations in sequence. Example:: with export_db.batch_operations(): for photo in photos: record = export_db.create_or_get_file_record(...) with record: record.photoinfo = ... record.dest_sig = ... # Single commit happens here when exiting batch_operations """ return BatchContext(self)
def _open_export_db( self, dbfile: str, version: str | None = None ) -> sqlite3.Connection: """open export database and return a db connection if dbfile does not exist, will create and initialize the database if dbfile needs to be upgraded, will perform needed migrations returns: connection to the database """ if not os.path.isfile(dbfile): conn = self._get_db_connection(dbfile) self._create_or_migrate_db_tables(conn, version=version) self.was_created = True self.was_upgraded = () self.version = version or OSXPHOTOS_EXPORTDB_VERSION else: conn = self._get_db_connection(dbfile) self.was_created = False version_info = self._get_database_version(conn) if float(version_info[1]) < float(OSXPHOTOS_EXPORTDB_VERSION): self._create_or_migrate_db_tables(conn) self.was_upgraded = (version_info[1], OSXPHOTOS_EXPORTDB_VERSION) else: self.was_upgraded = () self.version = OSXPHOTOS_EXPORTDB_VERSION # turn on performance optimizations with self.lock: c = conn.cursor() c.execute("PRAGMA journal_mode=WAL;") c.execute("PRAGMA synchronous=NORMAL;") c.execute("PRAGMA cache_size=-100000;") c.execute("PRAGMA temp_store=MEMORY;") return conn @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def _get_db_connection(self, dbfile: str) -> sqlite3.Connection: """return db connection to dbname""" return sqlite3.connect(dbfile, check_same_thread=SQLITE_CHECK_SAME_THREAD) @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def _get_database_version(self, conn: sqlite3.Connection) -> tuple[str, str]: """return tuple of (osxphotos, exportdb) versions for database connection conn""" with self.lock: version_info = conn.execute( "SELECT osxphotos, exportdb, max(id) FROM version" ).fetchone() version = (version_info[0], version_info[1]) if not version[1]: # database may have not been initialized, #1435 version = (version[0], "4.3") return version def _create_or_migrate_db_tables( self, conn: sqlite3.Connection, version: str | None = None ): """create (if not already created) the necessary db tables for the export database and apply any needed migrations Args: conn: sqlite3 db connection version: if supplied, creates database with this version; otherwise uses current version; (must be >= "4.3" and <= OSXPHOTOS_EXPORTDB_VERSION) For testing only; in normal usage, omit this argument """ try: version_info = self._get_database_version(conn) except Exception as e: version_info = (__version__, "4.3") max_version = float(OSXPHOTOS_EXPORTDB_VERSION) version = float(version) if version else max_version current_version = float(version_info[1]) if version < float("4.3"): raise ValueError( f"Requested database version {version} is older than minimum supported version 4.3" ) if version > max_version: raise ValueError( f"Requested database version {version} is newer than maximum supported version {OSXPHOTOS_EXPORTDB_VERSION}" ) if current_version > version: raise ValueError( f"Database version {version_info[1]} is newer than requested version {version}" ) # Current for version 4.3, for anything greater, do a migration after creation sql_commands = [ """ CREATE TABLE IF NOT EXISTS version ( id INTEGER PRIMARY KEY, osxphotos TEXT, exportdb TEXT ); """, """ CREATE TABLE IF NOT EXISTS about ( id INTEGER PRIMARY KEY, about TEXT );""", """ CREATE TABLE IF NOT EXISTS files ( id INTEGER PRIMARY KEY, filepath TEXT NOT NULL, filepath_normalized TEXT NOT NULL, uuid TEXT, orig_mode INTEGER, orig_size INTEGER, orig_mtime REAL, exif_mode INTEGER, exif_size INTEGER, exif_mtime REAL ); """, """ CREATE TABLE IF NOT EXISTS runs ( id INTEGER PRIMARY KEY, datetime TEXT, python_path TEXT, script_name TEXT, args TEXT, cwd TEXT ); """, """ CREATE TABLE IF NOT EXISTS info ( id INTEGER PRIMARY KEY, uuid text NOT NULL, json_info JSON ); """, """ CREATE TABLE IF NOT EXISTS exifdata ( id INTEGER PRIMARY KEY, filepath_normalized TEXT NOT NULL, json_exifdata JSON ); """, """ CREATE TABLE IF NOT EXISTS edited ( id INTEGER PRIMARY KEY, filepath_normalized TEXT NOT NULL, mode INTEGER, size INTEGER, mtime REAL ); """, """ CREATE TABLE IF NOT EXISTS converted ( id INTEGER PRIMARY KEY, filepath_normalized TEXT NOT NULL, mode INTEGER, size INTEGER, mtime REAL ); """, """ CREATE TABLE IF NOT EXISTS sidecar ( id INTEGER PRIMARY KEY, filepath_normalized TEXT NOT NULL, sidecar_data TEXT, mode INTEGER, size INTEGER, mtime REAL ); """, """ CREATE TABLE IF NOT EXISTS detected_text ( id INTEGER PRIMARY KEY, uuid TEXT NOT NULL, text_data JSON ); """, """ CREATE UNIQUE INDEX IF NOT EXISTS idx_files_filepath_normalized on files (filepath_normalized); """, """ CREATE UNIQUE INDEX IF NOT EXISTS idx_info_uuid on info (uuid); """, """ CREATE UNIQUE INDEX IF NOT EXISTS idx_exifdata_filename on exifdata (filepath_normalized); """, """ CREATE UNIQUE INDEX IF NOT EXISTS idx_edited_filename on edited (filepath_normalized);""", """ CREATE UNIQUE INDEX IF NOT EXISTS idx_converted_filename on converted (filepath_normalized);""", """ CREATE UNIQUE INDEX IF NOT EXISTS idx_sidecar_filename on sidecar (filepath_normalized);""", """ CREATE UNIQUE INDEX IF NOT EXISTS idx_detected_text on detected_text (uuid);""", ] # create the tables if needed with self.lock: c = conn.cursor() for cmd in sql_commands: c.execute(cmd) c.execute( "INSERT INTO version(osxphotos, exportdb) VALUES (?, ?);", (__version__, version), ) c.execute("INSERT INTO about(about) VALUES (?);", (OSXPHOTOS_ABOUT_STRING,)) conn.commit() # perform needed migrations # compare float(str) to avoid any issues with comparing float(str) to float if current_version < float("4.3"): self._migrate_normalized_filepath(conn) if current_version < float("5.0") and version >= float("5.0"): self._migrate_4_3_to_5_0(conn) if current_version < float("6.0") and version >= float("6.0"): # create export_data table self._migrate_5_0_to_6_0(conn) if current_version < float("7.0") and version >= float("7.0"): # create report_data table self._migrate_6_0_to_7_0(conn) if current_version < float("7.1") and version >= float("7.1"): # add timestamp to export_data self._migrate_7_0_to_7_1(conn) if current_version < float("8.0") and version >= float("8.0"): # add error to export_data self._migrate_7_1_to_8_0(conn) if current_version < float("9.0") and version >= float("9.0"): # add history table self._migrate_8_0_to_9_0(conn) if current_version < float("9.1") and version >= float("9.1"): # Add history index self._migrate_9_0_to_9_1(conn) if current_version < float("10.0") and version >= float("10.0"): self._migrate_9_1_to_10_0(conn) if current_version < float("10.1") and version >= float("10.1"): self._migrate_10_0_to_10_1(conn) if current_version < float("11.0") and version >= float("11.0"): self._migrate_10_1_to_11_0(conn) with self.lock: conn.execute("VACUUM;") conn.commit() def __del__(self): """ensure the database connection is closed""" with suppress(Exception): self._conn.close() @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def _insert_run_info(self): """Insert run info into runs table""" dt = datetime.datetime.now(datetime.timezone.utc).isoformat() python_path = sys.executable cmd = sys.argv[0] args = " ".join(sys.argv[1:]) if len(sys.argv) > 1 else "" cwd = os.getcwd() with self.lock: conn = self.connection c = conn.cursor() c.execute( "INSERT INTO runs (datetime, python_path, script_name, args, cwd) VALUES (?, ?, ?, ?, ?)", (dt, python_path, cmd, args, cwd), ) conn.commit() @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def _insert_export_dir(self): """Insert export_directory info into export_directory table""" if float(self.version) < float("10.0"): return if self._path is None: # database opened without a path specified, do not update return with self.lock: conn = self.connection c = conn.cursor() c.execute( """ INSERT INTO export_directory (export_directory) SELECT ? WHERE ? <> ( SELECT export_directory FROM export_directory ORDER BY rowid DESC LIMIT 1 ) OR NOT EXISTS ( SELECT 1 FROM export_directory ) """, (self._path, self._path), ) conn.commit() def _relative_filepath(self, filepath: str | os.PathLike) -> str: """return filepath relative to self.export_dir""" return str(pathlib.Path(filepath).relative_to(self.export_dir)) def _normalize_filepath(self, filepath: str | os.PathLike) -> str: """normalize filepath for unicode, lower case""" return normalize_fs_path(str(filepath)).lower() def _normalize_filepath_relative(self, filepath: str | os.PathLike) -> str: """normalize filepath for unicode, relative path (to export dir), lower case""" filepath = self._relative_filepath(filepath) return normalize_fs_path(str(filepath)).lower() def _migrate_normalized_filepath(self, conn: sqlite3.Connection): """Fix all filepath_normalized columns for unicode normalization""" # Prior to database version 4.3, filepath_normalized was not normalized for unicode migration_sql = [ """ CREATE TABLE IF NOT EXISTS files_migrate ( id INTEGER PRIMARY KEY, filepath TEXT NOT NULL, filepath_normalized TEXT NOT NULL, uuid TEXT, orig_mode INTEGER, orig_size INTEGER, orig_mtime REAL, exif_mode INTEGER, exif_size INTEGER, exif_mtime REAL, UNIQUE(filepath_normalized) ); """, """ INSERT INTO files_migrate SELECT * FROM files;""", """ DROP TABLE files;""", """ ALTER TABLE files_migrate RENAME TO files;""", ] with self.lock: c = conn.cursor() for sql in migration_sql: c.execute(sql) conn.commit() for table in ["converted", "edited", "exifdata", "files", "sidecar"]: old_values = c.execute( f"SELECT filepath_normalized, id FROM {table}" ).fetchall() new_values = [ (self._normalize_filepath(filepath_normalized), id_) for filepath_normalized, id_ in old_values ] c.executemany( f"UPDATE {table} SET filepath_normalized=? WHERE id=?", new_values ) conn.commit() def _migrate_4_3_to_5_0(self, conn: sqlite3.Connection): """Migrate database from version 4.3 to 5.0""" with self.lock: c = conn.cursor() # add metadata column to files to support --force-update c.execute("ALTER TABLE files ADD COLUMN metadata TEXT;") conn.commit() def _migrate_5_0_to_6_0(self, conn: sqlite3.Connection): with self.lock: c = conn.cursor() # add export_data table c.execute( """ CREATE TABLE IF NOT EXISTS export_data( id INTEGER PRIMARY KEY, filepath_normalized TEXT NOT NULL, filepath TEXT NOT NULL, uuid TEXT NOT NULL, src_mode INTEGER, src_size INTEGER, src_mtime REAL, dest_mode INTEGER, dest_size INTEGER, dest_mtime REAL, digest TEXT, exifdata JSON, export_options INTEGER, UNIQUE(filepath_normalized) ); """, ) c.execute( """ CREATE UNIQUE INDEX IF NOT EXISTS idx_export_data_filepath_normalized on export_data (filepath_normalized); """, ) # migrate data c.execute( """ INSERT INTO export_data (filepath_normalized, filepath, uuid) SELECT filepath_normalized, filepath, uuid FROM files;""", ) c.execute( """ UPDATE export_data SET (src_mode, src_size, src_mtime) = (SELECT mode, size, mtime FROM edited WHERE export_data.filepath_normalized = edited.filepath_normalized); """, ) c.execute( """ UPDATE export_data SET (dest_mode, dest_size, dest_mtime) = (SELECT orig_mode, orig_size, orig_mtime FROM files WHERE export_data.filepath_normalized = files.filepath_normalized); """, ) c.execute( """ UPDATE export_data SET digest = (SELECT metadata FROM files WHERE files.filepath_normalized = export_data.filepath_normalized ); """ ) c.execute( """ UPDATE export_data SET exifdata = (SELECT json_exifdata FROM exifdata WHERE exifdata.filepath_normalized = export_data.filepath_normalized ); """ ) # create config table c.execute( """ CREATE TABLE IF NOT EXISTS config ( id INTEGER PRIMARY KEY, datetime TEXT, config TEXT ); """ ) # create photoinfo table c.execute( """ CREATE TABLE IF NOT EXISTS photoinfo ( id INTEGER PRIMARY KEY, uuid TEXT NOT NULL, photoinfo JSON, UNIQUE(uuid) ); """ ) c.execute( """CREATE UNIQUE INDEX IF NOT EXISTS idx_photoinfo_uuid on photoinfo (uuid);""" ) c.execute( """ INSERT INTO photoinfo (uuid, photoinfo) SELECT uuid, json_info FROM info;""" ) # drop indexes no longer needed c.execute("DROP INDEX IF EXISTS idx_files_filepath_normalized;") c.execute("DROP INDEX IF EXISTS idx_exifdata_filename;") c.execute("DROP INDEX IF EXISTS idx_edited_filename;") c.execute("DROP INDEX IF EXISTS idx_converted_filename;") c.execute("DROP INDEX IF EXISTS idx_sidecar_filename;") c.execute("DROP INDEX IF EXISTS idx_detected_text;") # drop tables no longer needed c.execute("DROP TABLE IF EXISTS files;") c.execute("DROP TABLE IF EXISTS info;") c.execute("DROP TABLE IF EXISTS exifdata;") c.execute("DROP TABLE IF EXISTS edited;") c.execute("DROP TABLE IF EXISTS converted;") c.execute("DROP TABLE IF EXISTS sidecar;") c.execute("DROP TABLE IF EXISTS detected_text;") conn.commit() def _migrate_6_0_to_7_0(self, conn: sqlite3.Connection): with self.lock: c = conn.cursor() c.execute( """CREATE TABLE IF NOT EXISTS export_results_data ( id INTEGER PRIMARY KEY, datetime TEXT, export_results BLOB );""" ) # pre-populate report_data table with blank fields # ExportDB will use these as circular buffer always writing to the oldest record for _ in range(MAX_EXPORT_RESULTS_DATA_ROWS): c.execute( """INSERT INTO export_results_data (datetime, export_results) VALUES (?, ?);""", (datetime.datetime.now().isoformat(), b""), ) # sleep a tiny bit just to ensure time stamps increment time.sleep(0.001) conn.commit() def _migrate_7_0_to_7_1(self, conn: sqlite3.Connection): """Add timestamp column to export_data table and triggers to update it on insert and update.""" with self.lock: c = conn.cursor() # timestamp column should not exist but this prevents error if migration is run on an already migrated database # reference #794 results = c.execute( "SELECT COUNT(*) FROM pragma_table_info('export_data') WHERE name='timestamp';" ).fetchone() if results[0] == 0: c.execute("""ALTER TABLE export_data ADD COLUMN timestamp DATETIME;""") c.execute( """ CREATE TRIGGER IF NOT EXISTS insert_timestamp_trigger AFTER INSERT ON export_data BEGIN UPDATE export_data SET timestamp = STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW') WHERE id = NEW.id; END; """ ) c.execute( """ CREATE TRIGGER IF NOT EXISTS update_timestamp_trigger AFTER UPDATE On export_data BEGIN UPDATE export_data SET timestamp = STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW') WHERE id = NEW.id; END; """ ) conn.commit() def _migrate_7_1_to_8_0(self, conn: sqlite3.Connection): """Add error column to export_data table""" with self.lock: c = conn.cursor() results = c.execute( "SELECT COUNT(*) FROM pragma_table_info('export_data') WHERE name='error';" ).fetchone() if results[0] == 0: c.execute("""ALTER TABLE export_data ADD COLUMN error JSON;""") conn.commit() def _migrate_8_0_to_9_0(self, conn: sqlite3.Connection): """Add history table""" with self.lock: c = conn.cursor() # the export_data table does not contain data on missing files # so create a history_paths table to track every path that has ever # been exported or attempted to export (but was missing) # so this can be used by history table to find the associated file path c.execute( """ CREATE TABLE IF NOT EXISTS history_path ( id INTEGER PRIMARY KEY, filepath_normalized TEXT NOT NULL, uuid TEXT NOT NULL ); """ ) # filepath_id is foreign key to history_path table c.execute( """ CREATE TABLE IF NOT EXISTS history ( id INTEGER PRIMARY KEY, filepath_id INTEGER, datetime DATETIME, action TEXT, diff JSON ); """ ) c.execute( """ CREATE TRIGGER IF NOT EXISTS insert_history_datetime_trigger AFTER INSERT ON history BEGIN UPDATE history SET datetime = STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW') WHERE id = NEW.id; END; """ ) conn.commit() def _migrate_9_0_to_9_1(self, conn: sqlite3.Connection): """Add history index""" with self.lock: c = conn.cursor() # add index to the history_path table c.execute( """ CREATE UNIQUE INDEX IF NOT EXISTS idx_history_path_filepath_normalized ON history_path (filepath_normalized); """ ) conn.commit() def _migrate_9_1_to_10_0(self, conn: sqlite3.Connection): """Add export_directory table""" with self.lock: c = conn.cursor() c.execute( """ CREATE TABLE IF NOT EXISTS export_directory ( id INTEGER PRIMARY KEY, export_directory TEXT ); """ ) conn.commit() def _migrate_10_0_to_10_1(self, conn: sqlite3.Connection): """Add history related indexes""" with self.lock: c = conn.cursor() c.execute( """ CREATE INDEX IF NOT EXISTS idx_history_filepath_id ON history (filepath_id); """ ) c.execute( """ CREATE INDEX IF NOT EXISTS idx_history_path_uuid ON history_path (uuid); """ ) conn.commit() def _migrate_10_1_to_11_0(self, conn: sqlite3.Connection): """Add modified date""" with self.lock: c = conn.cursor() results = c.execute( "SELECT COUNT(*) FROM pragma_table_info('export_data') WHERE name='date_modified';" ).fetchone() if results[0] == 0: c.execute( """ALTER TABLE export_data ADD COLUMN date_modified DATETIME;""" ) conn.commit() def _perform_db_maintenance(self, conn: sqlite3.Connection): """Perform database maintenance""" if float(self.version) < float("6.0"): return with self.lock: c = conn.cursor() c.execute( """DELETE FROM config WHERE id < ( SELECT MIN(id) FROM (SELECT id FROM config ORDER BY id DESC LIMIT 9) ); """ ) conn.commit()
class ExportDBInMemory(ExportDB): """In memory version of ExportDB Args: dbfile: path to osxphotos export database file export_dir: path to directory where exported files are stored; if None will read value from database version: if supplied, creates database with this version; otherwise uses current version; (must be >= "4.3" and <= OSXPHOTOS_EXPORTDB_VERSION) For testing only; in normal usage, omit this argument Note: Copies the on-disk database into memory so it may be operated on without modifying the on-disk version export_dir = None must only be used when opening an existing database to read and not to export """ def __init__( self, dbfile: str | os.PathLike, export_dir: str | os.PathLike | None, version: str | None = None, ): """ "Initialize ExportDBInMemory Args: dbfile: path to osxphotos export database file export_dir: path to directory where exported files are stored; if None will read value from database version: if supplied, creates database with this version; otherwise uses current version; (must be >= "4.3" and <= OSXPHOTOS_EXPORTDB_VERSION) For testing only; in normal usage, omit this argument Note: export_dir = None must only be used when opening an existing database to read and not to export """ self._dbfile: str = str(dbfile) or f"./{OSXPHOTOS_EXPORT_DB}" # export_dir allows the database to located in a different path than the export # all paths are stored as relative paths to this path # this allows the entire export tree to be moved to a new disk/location # whilst preserving the UUID to filename mapping self._path: str | None = str(export_dir) if export_dir else None self.was_upgraded: tuple[str, str] | tuple = () self.was_created = False self.lock = threading.Lock() # Cache for batch-loaded ExportRecord data self._record_cache: dict[str, dict[str, dict[str, Any]]] = {} # Buffer for batched history writes to reduce commit overhead self._history_buffer: list[tuple[str, str, str, str | None]] = [] # Batch mode counter - when > 0, commits are deferred until batch completes self._batch_mode: int = 0 self._conn = self._open_export_db(self._dbfile, version=version) self._insert_run_info() self._insert_export_dir() @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def write_to_disk(self): """Write changes from in-memory database back to disk""" # dump the database with self.lock: conn = self.connection conn.commit() # backup the old database to disk # this is just in case the write fails # so the user can recover the database try: sqlite_backup_dbfiles(self._dbfile) except Exception as e: logger.warning(f"Error backing up export db: {e}") # delete the old database if it exists if os.path.isfile(self._dbfile): sqlite_delete_dbfiles(self._dbfile) # write the new database to disk conn_on_disk = sqlite3.connect( str(self._dbfile), check_same_thread=SQLITE_CHECK_SAME_THREAD ) conn.backup(conn_on_disk, pages=1) conn_on_disk.commit() conn_on_disk.close() # delete the backup try: sqlite_delete_backup_files(self._dbfile) except Exception as e: logger.warning(f"Error deleting export database backup: {e}") @retry( stop=stop_after_attempt(MAX_RETRY_ATTEMPTS), retry_error_callback=retry_log_error_no_raise, # #999 ) def close(self): """close the database connection""" if self._conn: self._conn.close() self._conn = None @retry(stop=stop_after_attempt(MAX_RETRY_ATTEMPTS)) def _open_export_db( self, dbfile: str, version: str | None = None ): # sourcery skip: raise-specific-error """Open export database and return a db connection Args: dbfile: path to database file version: database version to create if database doesn't exist Returns: connection to the database """ if not os.path.isfile(dbfile): # database doesn't exist so create it in-memory src = self._get_db_connection() if not src: raise Exception("Error getting connection to in-memory database") self._create_or_migrate_db_tables(src, version=version) self.was_created = True self.was_upgraded = () self.version = OSXPHOTOS_EXPORTDB_VERSION return src if version: raise ValueError("Cannot specify version when opening an existing database") # database exists so copy it to memory src = sqlite3.connect(dbfile, check_same_thread=SQLITE_CHECK_SAME_THREAD) # Create a database in memory by backing up the on-disk database dst = sqlite3.connect(":memory:", check_same_thread=SQLITE_CHECK_SAME_THREAD) with dst: src.backup(dst, pages=1) src.close() self.was_created = False version_info = self._get_database_version(dst) if float(version_info[1]) < float(OSXPHOTOS_EXPORTDB_VERSION): self._create_or_migrate_db_tables(dst) self.was_upgraded = (version_info[1], OSXPHOTOS_EXPORTDB_VERSION) else: self.was_upgraded = () self.version = OSXPHOTOS_EXPORTDB_VERSION return dst def _get_db_connection(self): """return db connection to in memory database""" return sqlite3.connect(":memory:", check_same_thread=SQLITE_CHECK_SAME_THREAD) def _dump_db(self, conn: sqlite3.Connection) -> StringIO: """dump sqlite db to a string buffer""" dbdump = StringIO() for line in conn.iterdump(): dbdump.write("%s\n" % line) dbdump.seek(0) return dbdump def __del__(self): """close the database connection""" with suppress(Error): self.close()
[docs] class ExportDBTemp(ExportDBInMemory): """Temporary in-memory version of ExportDB Args: version: if supplied, creates database with this version; otherwise uses current version; (must be >= "4.3" and <= OSXPHOTOS_EXPORTDB_VERSION) For testing only; in normal usage, omit this argument """ def __init__(self, version: str | None = None): self._dbfile = ":memory:" self._path = "./" self.lock = threading.Lock() # Cache for batch-loaded ExportRecord data self._record_cache: dict[str, dict[str, dict[str, Any]]] = {} # Buffer for batched history writes to reduce commit overhead self._history_buffer: list[tuple[str, str, str, str | None]] = [] # Batch mode counter - when > 0, commits are deferred until batch completes self._batch_mode: int = 0 self.was_upgraded = () self.was_created = False self._conn = self._open_export_db(self._dbfile, version) self._insert_run_info() self._insert_export_dir() def _relative_filepath(self, filepath: str | os.PathLike) -> str: """Overrides _relative_filepath to return a path for use in the temp db""" filepath = str(filepath) return filepath[1:] if filepath[0] == "/" else filepath
class ExportRecord: """ExportRecord class""" # Implementation note:all properties and setters must be aware of whether or not running # as a context manager. If running as a context manager, the lock is not acquired by the # getter/setter as the lock is acquired by the context manager. If not running as a # context manager, the lock is acquired by the getter/setter. __slots__ = [ "_conn", "_context_manager", "_filepath_normalized", "_cached_data", "_export_db", "lock", ] def __init__( self, conn: sqlite3.Connection, lock: threading.Lock, filepath_normalized: str, cached_data: dict[str, Any] | None = None, export_db: "ExportDB | None" = None, ): self._conn = conn self.lock = lock self._filepath_normalized = filepath_normalized self._cached_data = cached_data self._export_db = export_db self._context_manager = False @property def connection(self) -> sqlite3.Connection: """return connection""" return self._conn @property def filepath(self) -> str: """return filepath""" if self._context_manager: return self._filepath() with self.lock: return self._filepath() def _filepath(self) -> str: """return filepath""" conn = self.connection c = conn.cursor() if row := c.execute( "SELECT filepath FROM export_data WHERE filepath_normalized = ?;", (self._filepath_normalized,), ).fetchone(): return row[0] raise ValueError( f"No filepath found in database for {self._filepath_normalized}" ) @property def filepath_normalized(self) -> str: """return filepath_normalized""" return self._filepath_normalized @property def uuid(self) -> str: """return uuid""" if self._context_manager: return self._uuid() with self.lock: return self._uuid() def _uuid(self) -> str: """return uuid""" conn = self.connection c = conn.cursor() if row := c.execute( "SELECT uuid FROM export_data WHERE filepath_normalized = ?;", (self._filepath_normalized,), ).fetchone(): return row[0] raise ValueError(f"No uuid found in database for {self._filepath_normalized}") @property def digest(self) -> str: """returns the digest value""" if self._context_manager: return self._digest() with self.lock: return self._digest() @digest.setter def digest(self, value: str): """set digest value""" if self._context_manager: self._digest_setter(value) else: with self.lock: self._digest_setter(value) def _digest(self) -> str: """returns the digest value""" if self._cached_data is not None: return self._cached_data.get("digest") conn = self.connection c = conn.cursor() if row := c.execute( "SELECT digest FROM export_data WHERE filepath_normalized = ?;", (self._filepath_normalized,), ).fetchone(): return row[0] raise ValueError(f"No digest found in database for {self._filepath_normalized}") def _digest_setter(self, value: str): """set digest value""" conn = self.connection c = conn.cursor() c.execute( "UPDATE export_data SET digest = ? WHERE filepath_normalized = ?;", (value, self._filepath_normalized), ) if not self._context_manager: conn.commit() @property def exifdata(self) -> str: """returns exifdata value for record""" if self._context_manager: return self._exifdata() with self.lock: return self._exifdata() @exifdata.setter def exifdata(self, value: str): """set exifdata value""" if self._context_manager: self._exifdata_setter(value) else: with self.lock: self._exifdata_setter(value) def _exifdata(self) -> str: """returns exifdata value for record""" if self._cached_data is not None: return self._cached_data.get("exifdata") conn = self.connection c = conn.cursor() if row := c.execute( "SELECT exifdata FROM export_data WHERE filepath_normalized = ?;", (self._filepath_normalized,), ).fetchone(): return row[0] raise ValueError( f"No exifdata found in database for {self._filepath_normalized}" ) def _exifdata_setter(self, value: str): """set exifdata value""" conn = self.connection c = conn.cursor() c.execute( "UPDATE export_data SET exifdata = ? WHERE filepath_normalized = ?;", ( value, self._filepath_normalized, ), ) if not self._context_manager: conn.commit() @property def src_sig(self) -> tuple[int, int, int | None]: """return source file signature value""" if self._context_manager: return self._src_sig() with self.lock: return self._src_sig() @src_sig.setter def src_sig(self, value: tuple[int, int, int | None]): """set source file signature value""" if self._context_manager: self._src_sig_setter(value) else: with self.lock: self._src_sig_setter(value) def _src_sig(self) -> tuple[int, int, int | None]: """return source file signature value""" conn = self.connection c = conn.cursor() if row := c.execute( "SELECT src_mode, src_size, src_mtime FROM export_data WHERE filepath_normalized = ?;", (self._filepath_normalized,), ).fetchone(): mtime = int(row[2]) if row[2] is not None else None return (row[0], row[1], mtime) raise ValueError( f"No src_sig found in database for {self._filepath_normalized}" ) def _src_sig_setter(self, value: tuple[int, int, int | None]): """set source file signature value""" conn = self.connection c = conn.cursor() c.execute( "UPDATE export_data SET src_mode = ?, src_size = ?, src_mtime = ? WHERE filepath_normalized = ?;", ( value[0], value[1], value[2], self._filepath_normalized, ), ) if not self._context_manager: conn.commit() @property def dest_sig(self) -> tuple[int, int, int | None]: """return destination file signature""" if self._context_manager: return self._dest_sig() with self.lock: return self._dest_sig() @dest_sig.setter def dest_sig(self, value: tuple[int, int, int | None]): """set destination file signature""" if self._context_manager: self._dest_sig_setter(value) else: with self.lock: self._dest_sig_setter(value) def _dest_sig(self) -> tuple[int, int, int | None]: """return destination file signature""" if self._cached_data is not None: return self._cached_data.get("dest_sig") conn = self.connection c = conn.cursor() if row := c.execute( "SELECT dest_mode, dest_size, dest_mtime FROM export_data WHERE filepath_normalized = ?;", (self._filepath_normalized,), ).fetchone(): mtime = int(row[2]) if row[2] is not None else None return (row[0], row[1], mtime) raise ValueError( f"No dest_sig found in database for {self._filepath_normalized}" ) def _dest_sig_setter(self, value: tuple[int, int, int | None]): """set destination file signature""" conn = self.connection c = conn.cursor() c.execute( "UPDATE export_data SET dest_mode = ?, dest_size = ?, dest_mtime = ? WHERE filepath_normalized = ?;", ( value[0], value[1], value[2], self._filepath_normalized, ), ) if not self._context_manager: conn.commit() @property def photoinfo(self) -> str: """Returns info value""" if self._context_manager: return self._photoinfo() with self.lock: return self._photoinfo() @photoinfo.setter def photoinfo(self, value: str): """Sets info value""" if self._context_manager: self._photoinfo_setter(value) else: with self.lock: self._photoinfo_setter(value) def _photoinfo(self) -> str: """Returns info value""" conn = self.connection c = conn.cursor() if row := c.execute( "SELECT uuid FROM export_data WHERE filepath_normalized = ?;", (self._filepath_normalized,), ).fetchone(): uuid = row[0] else: raise ValueError( f"No uuid found in database for {self._filepath_normalized}" ) row = c.execute( "SELECT photoinfo from photoinfo where uuid = ?;", (uuid,), ).fetchone() return row[0] if row else None def _photoinfo_setter(self, value: str): """Sets info value""" conn = self.connection c = conn.cursor() if row := c.execute( "SELECT uuid FROM export_data WHERE filepath_normalized = ?;", (self._filepath_normalized,), ).fetchone(): uuid = row[0] else: raise ValueError( f"No uuid found in database for {self._filepath_normalized}" ) c.execute( "INSERT OR REPLACE INTO photoinfo (uuid, photoinfo) VALUES (?, ?);", (uuid, value), ) if not self._context_manager: conn.commit() @property def export_options(self) -> str: """Get export_options value""" if self._context_manager: return self._export_options() with self.lock: return self._export_options() @export_options.setter def export_options(self, value: str): """Set export_options value""" if self._context_manager: self._export_options_setter(value) else: with self.lock: self._export_options_setter(value) def _export_options(self) -> str: """Get export_options value""" if self._cached_data is not None: return self._cached_data.get("export_options") conn = self.connection c = conn.cursor() row = c.execute( "SELECT export_options from export_data where filepath_normalized = ?;", (self._filepath_normalized,), ).fetchone() return row[0] if row else None def _export_options_setter(self, value: str): """Set export_options value""" conn = self.connection c = conn.cursor() c.execute( "UPDATE export_data SET export_options = ? WHERE filepath_normalized = ?;", (value, self._filepath_normalized), ) if not self._context_manager: conn.commit() @property def timestamp(self) -> str: """returns the timestamp value""" if self._context_manager: return self._timestamp() with self.lock: return self._timestamp() def _timestamp(self) -> str: """returns the timestamp value""" conn = self.connection c = conn.cursor() if row := c.execute( "SELECT timestamp FROM export_data WHERE filepath_normalized = ?;", (self._filepath_normalized,), ).fetchone(): return row[0] raise ValueError( f"No timestamp found in database for {self._filepath_normalized}" ) @property def error(self) -> dict[str, Any] | None: """Return error value""" if self._context_manager: return self._error() with self.lock: return self._error() @error.setter def error(self, value: dict[str, str] | None): """Set error value""" if self._context_manager: self._error_setter(value) else: with self.lock: self._error_setter(value) def _error(self) -> dict[str, Any] | None: """Return error value""" if self._cached_data is not None: return self._cached_data.get("error") conn = self.connection c = conn.cursor() if row := c.execute( "SELECT error FROM export_data WHERE filepath_normalized = ?;", (self._filepath_normalized,), ).fetchone(): return json.loads(row[0]) if row[0] else None raise ValueError(f"No error found in database for {self._filepath_normalized}") def _error_setter(self, value: dict[str, str] | None): """Set error value""" value = value or None conn = self.connection c = conn.cursor() # use default=str because some of the values are Path objects error = json.dumps(value, default=str) if value else None c.execute( "UPDATE export_data SET error = ? WHERE filepath_normalized = ?;", (error, self._filepath_normalized), ) if not self._context_manager: conn.commit() @property def date_modified(self) -> datetime.datetime | None: """return asset date modified (date modified in Photos)""" if self._context_manager: return self._date_modified() with self.lock: return self._date_modified() @date_modified.setter def date_modified(self, value: datetime.datetime | None): """set asset date modified (date modified in Photos)""" if self._context_manager: self._date_modified_setter(value) else: with self.lock: self._date_modified_setter(value) def _date_modified(self) -> datetime.datetime | None: """return date modified""" if self._cached_data is not None: return self._cached_data.get("date_modified") conn = self.connection c = conn.cursor() if row := c.execute( "SELECT date_modified FROM export_data WHERE filepath_normalized = ?;", (self._filepath_normalized,), ).fetchone(): try: return datetime.datetime.fromisoformat(row[0]) if row[0] else None except Exception as e: return None raise ValueError( f"No date_modified found in database for {self._filepath_normalized}" ) def _date_modified_setter(self, value: datetime.datetime | None): """set date_modified""" conn = self.connection c = conn.cursor() c.execute( "UPDATE export_data SET date_modified = ? WHERE filepath_normalized = ?;", ( value.isoformat() if value else None, self._filepath_normalized, ), ) if not self._context_manager: conn.commit() def asdict(self) -> dict[str, Any]: """Return dict of self""" exifdata = json.loads(self.exifdata) if self.exifdata else None photoinfo = json.loads(self.photoinfo) if self.photoinfo else None return { "filepath": self.filepath, "filepath_normalized": self.filepath_normalized, "uuid": self.uuid, "timestamp": self.timestamp, "digest": self.digest, "src_sig": self.src_sig, "dest_sig": self.dest_sig, "export_options": self.export_options, "exifdata": exifdata, "error": self.error, "photoinfo": photoinfo, "date_modified": self.date_modified, } def json(self, indent=None) -> str: """Return json string of self""" def datetime_handler(obj): if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)): return obj.isoformat() raise TypeError( "Object of type %s is not JSON serializable" % type(obj).__name__ ) return json.dumps(self.asdict(), indent=indent, default=datetime_handler) def __enter__(self): self._context_manager = True self.lock.acquire() return self def __exit__(self, exc_type, exc_value, traceback): # Check if we're in batch mode - if so, defer the commit in_batch = self._export_db is not None and self._export_db.in_batch_mode if exc_type and self._conn.in_transaction: self._conn.rollback() elif self._conn.in_transaction and not in_batch: self._conn.commit() self._context_manager = False self.lock.release()