import sys from pypdf import PdfWriter, PdfReader, Transformation import pdfplumber import io from dataclasses import dataclass @dataclass class Setting: width: float = 288 # 4 * 72 height: float = 432 # 6 * 72 def event_argument(self, event, index, field=None): if field is None: field = index try: raw = event[index] p_type = self.__annotations__[field] except KeyError: return if p_type.__name__ == 'boolean': setattr(self, field, raw == True or raw == 'true') if p_type.__name__ == 'float': setattr(self, field, float(raw)) if p_type.__name__ == 'integer': setattr(self, field, int(raw)) @dataclass class AutoSettings(Setting): margin: float = 7.2 # 0.1 * 72 @dataclass class ManualSettings(Setting): start_x: float = 90 start_y: float = 90 scale_x: float = 1 scale_y: float = 1 rotate: int = 90 messages = list() def argument_default(event, index, default, cast): try: val = event[index] if cast == 'float': return float(val) if cast == 'integer': return int(val) if cast == 'boolean': return val == 'true' return val except KeyError: return default def extents_by_rectangle(page) -> tuple[int|None, int|None, int|None, int|None]: largest_area = None largest_rect = None for rect in page.rects: area = rect["width"] * rect["height"] if largest_area is None or area > largest_area: largest_area = area largest_rect = rect if largest_rect is None: return None, None, None, None return largest_rect["x0"], largest_rect["y0"], largest_rect["x1"], largest_rect["y1"] def extents_by_image(page) -> tuple[int|None, int|None, int|None, int|None]: min_x = None min_y = None max_x = None max_y = None for obj in page.images: if min_x is None or obj["x0"] < min_x: min_x = obj["x0"] if min_y is None or obj["y0"] < min_y: min_y = obj["y0"] if max_x is None or obj["x1"] > max_x: max_x = obj["x1"] if max_y is None or obj["y1"] > max_y: max_y = obj["y1"] return min_x, min_y, max_x, max_y # def extents_by_all(page) -> tuple[int|None, int|None, int|None, int|None]: # min_x = None # min_y = None # max_x = None # max_y = None # # for obj in page.objects: # if obj == 'image': # x0, y0, x1, y1 = extents_by_image(page) # if x0 is None or y0 is None or x1 is None or y1 is None: # continue # if min_x is None or x0 < min_x: # min_x = x0 # if min_y is None or y0 < min_y: # min_y = y0 # if max_x is None or x1 > max_x: # max_x = x1 # if max_y is None or y1 > max_y: # max_y = y1 # # return min_x, min_y, max_x, max_y def auto_crop(content: io.BytesIO, settings: AutoSettings) -> io.BytesIO: messages.append("Using auto-crop mode") extents_method = 'rectangle' with pdfplumber.open(content) as pdf: page = pdf.pages[0] min_x, min_y, max_x, max_y = extents_by_rectangle(page) if min_x is None or min_y is None or max_x is None or max_y is None: extents_method = 'image' min_x, min_y, max_x, max_y = extents_by_image(page) if min_x is None or min_y is None or max_x is None or max_y is None: raise Exception("Unable to find the extents of the document") messages.append(f"Used {extents_method} to find extents") min_x = round(min_x - settings.margin) min_y = round(min_y - settings.margin) max_x = round(max_x + settings.margin) max_y = round(max_y + settings.margin) width = max_x - min_x height = max_y - min_y messages.append(f"Document extents: {width}x{height}") rotate = False if width > height: h = width width = height height = h rotate = True scale = 1 if width > settings.width or height > settings.height: x_scale = settings.width / width y_scale = settings.height / height scale = min(x_scale, y_scale) min_x = round((min_x * scale) - settings.margin) min_y = round((min_y * scale) - settings.margin) max_x = round((max_x * scale) + settings.margin) max_y = round((max_y * scale) + settings.margin) reader = PdfReader(content) writer = PdfWriter() p1 = reader.pages[0] p1.cropbox.upper_left = (min_x, min_y) p1.cropbox.upper_right = (max_x, min_y) p1.cropbox.lower_left = (min_x, max_y) p1.cropbox.lower_right = (max_x, max_y) if rotate: p1.rotate(90) messages.append("Rotated 90 degrees") if scale != 1: op = Transformation().scale(sx=scale, sy=scale) p1.add_transformation(op) messages.append(f"Scaled by {scale}") writer.add_page(p1) buffer = io.BytesIO() writer.write(buffer) buffer.seek(0) return buffer def manual_crop(content: io.BytesIO, settings: ManualSettings) -> io.BytesIO: messages.append("Using manual-crop mode") # Swap directions to be less confusing for inputs if settings.rotate == 90 or settings.rotate == -90: sx = settings.start_x settings.start_x = settings.start_y settings.start_y = sx h = settings.height settings.height = settings.width settings.width = h sy = settings.scale_y settings.scale_y = settings.scale_x settings.scale_x = sy reader = PdfReader(content) writer = PdfWriter() p1 = reader.pages[0] top = p1.mediabox.top - settings.start_y bottom = top - (settings.height * 72) left = settings.start_x right = left + (settings.width * 72) p1.mediabox.upper_left = (left, top) p1.mediabox.upper_right = (right, top) p1.mediabox.lower_left = (left, bottom) p1.mediabox.lower_right = (right, bottom) if settings.scale_x != 1 or settings.scale_y != 1: op = Transformation().scale(sx=settings.scale_x, sy=settings.scale_y) p1.add_transformation(op) messages.append(f"Scaled by {settings.scale_x}x{settings.scale_y}") if settings.rotate != 0: writer.add_page(p1.rotate(settings.rotate)) messages.append(f"Rotated {settings.rotate} degrees") else: writer.add_page(p1) buffer = io.BytesIO() writer.write(buffer) buffer.seek(0) return buffer