commit 762b72bec830e72a8c5830af9386f44879fc4e23 Author: david-fairbanks42 Date: Tue Oct 7 14:17:39 2025 -0400 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6f0c92c --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +_deploy +.venv +lambda_env +__pycache__ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..153ef2a --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..a3bb2e3 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/pdf-lambda-crop.iml b/.idea/pdf-lambda-crop.iml new file mode 100644 index 0000000..53d71c5 --- /dev/null +++ b/.idea/pdf-lambda-crop.iml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..ace6954 --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +# PDF Label Cropper + +## Deployment + +[Documentation](https://docs.aws.amazon.com/lambda/latest/dg/python-package.html) + +This creates a zip file structured for AWS Lambda deployment. The virtual environment steps are only required if the +packages are updated. + +- `python -m venv lambda_env` +- `source ./lambda_env/bin/activate` +- `pip install pypdf pdfplumber` + +Note the location of installed packages with `pip show pypdf` and modify the following commands if necessary. + +- `deactivate` +- `cd lambda_env/lib/python3.13/site-packages/` +- `zip -r ../../../../_deploy/pdf-crop.zip .` +- `cd ../../../../` + +At this point, the zip file only contains the packages. To add the custom code and the lambda function, run the +following command from the project root. This is the same command used to update the custom code. + +- `zip _deploy/pdf-crop.zip cropper.py lambda_function.py` + +Inspect zip contents with `unzip -l _deploy/pdf-crop.zip` + +Update the lambda function with (make sure specify the credentials and region): + +`aws lambda update-function-code --function-name pdf-label-crop --zip-file fileb://_deploy/pdf-crop.zip` diff --git a/auto-crop.py b/auto-crop.py new file mode 100644 index 0000000..0b7a6fa --- /dev/null +++ b/auto-crop.py @@ -0,0 +1,16 @@ +import io +import sys +from cropper import AutoSettings, auto_crop + +if len(sys.argv) < 2: + print("Usage: python test.py ") + sys.exit(1) + +pdf_path = sys.argv[1] +with open(pdf_path, "rb") as fh: + content = io.BytesIO(fh.read()) + +buffer = auto_crop(content, AutoSettings()) + +with open("cropped.pdf", "wb") as fh: + fh.write(buffer.read()) diff --git a/cropper.py b/cropper.py new file mode 100644 index 0000000..86cbf0c --- /dev/null +++ b/cropper.py @@ -0,0 +1,231 @@ +import sys + +from pypdf import PdfWriter, PdfReader, Transformation +import pdfplumber +import io +from dataclasses import dataclass + + +@dataclass +class Setting: + width: float = 288 # 4 * 72 + height: float = 432 # 6 * 72 + + def event_argument(self, event, index, field=None): + if field is None: + field = index + + try: + raw = event[index] + p_type = self.__annotations__[field] + except KeyError: + return + + if p_type.__name__ == 'boolean': + setattr(self, field, raw == True or raw == 'true') + if p_type.__name__ == 'float': + setattr(self, field, float(raw)) + if p_type.__name__ == 'integer': + setattr(self, field, int(raw)) + + +@dataclass +class AutoSettings(Setting): + margin: float = 7.2 # 0.1 * 72 + + +@dataclass +class ManualSettings(Setting): + start_x: float = 90 + start_y: float = 90 + scale_x: float = 1 + scale_y: float = 1 + rotate: int = 90 + + +messages = list() + +def argument_default(event, index, default, cast): + try: + val = event[index] + if cast == 'float': + return float(val) + if cast == 'integer': + return int(val) + if cast == 'boolean': + return val == 'true' + return val + except KeyError: + return default + + +def extents_by_rectangle(page) -> tuple[int|None, int|None, int|None, int|None]: + largest_area = None + largest_rect = None + + for rect in page.rects: + area = rect["width"] * rect["height"] + if largest_area is None or area > largest_area: + largest_area = area + largest_rect = rect + + if largest_rect is None: + return None, None, None, None + + return largest_rect["x0"], largest_rect["y0"], largest_rect["x1"], largest_rect["y1"] + + +def extents_by_image(page) -> tuple[int|None, int|None, int|None, int|None]: + min_x = None + min_y = None + max_x = None + max_y = None + + for obj in page.images: + if min_x is None or obj["x0"] < min_x: + min_x = obj["x0"] + if min_y is None or obj["y0"] < min_y: + min_y = obj["y0"] + if max_x is None or obj["x1"] > max_x: + max_x = obj["x1"] + if max_y is None or obj["y1"] > max_y: + max_y = obj["y1"] + + return min_x, min_y, max_x, max_y + + +# def extents_by_all(page) -> tuple[int|None, int|None, int|None, int|None]: +# min_x = None +# min_y = None +# max_x = None +# max_y = None +# +# for obj in page.objects: +# if obj == 'image': +# x0, y0, x1, y1 = extents_by_image(page) +# if x0 is None or y0 is None or x1 is None or y1 is None: +# continue +# if min_x is None or x0 < min_x: +# min_x = x0 +# if min_y is None or y0 < min_y: +# min_y = y0 +# if max_x is None or x1 > max_x: +# max_x = x1 +# if max_y is None or y1 > max_y: +# max_y = y1 +# +# return min_x, min_y, max_x, max_y + + +def auto_crop(content: io.BytesIO, settings: AutoSettings) -> io.BytesIO: + messages.append("Using auto-crop mode") + + extents_method = 'rectangle' + with pdfplumber.open(content) as pdf: + page = pdf.pages[0] + min_x, min_y, max_x, max_y = extents_by_rectangle(page) + if min_x is None or min_y is None or max_x is None or max_y is None: + extents_method = 'image' + min_x, min_y, max_x, max_y = extents_by_image(page) + + if min_x is None or min_y is None or max_x is None or max_y is None: + raise Exception("Unable to find the extents of the document") + + messages.append(f"Used {extents_method} to find extents") + + min_x = round(min_x - settings.margin) + min_y = round(min_y - settings.margin) + max_x = round(max_x + settings.margin) + max_y = round(max_y + settings.margin) + + width = max_x - min_x + height = max_y - min_y + messages.append(f"Document extents: {width}x{height}") + + rotate = False + if width > height: + h = width + width = height + height = h + rotate = True + + scale = 1 + if width > settings.width or height > settings.height: + x_scale = settings.width / width + y_scale = settings.height / height + scale = min(x_scale, y_scale) + min_x = round((min_x * scale) - settings.margin) + min_y = round((min_y * scale) - settings.margin) + max_x = round((max_x * scale) + settings.margin) + max_y = round((max_y * scale) + settings.margin) + + reader = PdfReader(content) + writer = PdfWriter() + p1 = reader.pages[0] + + p1.cropbox.upper_left = (min_x, min_y) + p1.cropbox.upper_right = (max_x, min_y) + p1.cropbox.lower_left = (min_x, max_y) + p1.cropbox.lower_right = (max_x, max_y) + + if rotate: + p1.rotate(90) + messages.append("Rotated 90 degrees") + + if scale != 1: + op = Transformation().scale(sx=scale, sy=scale) + p1.add_transformation(op) + messages.append(f"Scaled by {scale}") + + writer.add_page(p1) + buffer = io.BytesIO() + writer.write(buffer) + buffer.seek(0) + + return buffer + + +def manual_crop(content: io.BytesIO, settings: ManualSettings) -> io.BytesIO: + messages.append("Using manual-crop mode") + # Swap directions to be less confusing for inputs + if settings.rotate == 90 or settings.rotate == -90: + sx = settings.start_x + settings.start_x = settings.start_y + settings.start_y = sx + h = settings.height + settings.height = settings.width + settings.width = h + sy = settings.scale_y + settings.scale_y = settings.scale_x + settings.scale_x = sy + + reader = PdfReader(content) + writer = PdfWriter() + p1 = reader.pages[0] + + top = p1.mediabox.top - settings.start_y + bottom = top - (settings.height * 72) + left = settings.start_x + right = left + (settings.width * 72) + + p1.mediabox.upper_left = (left, top) + p1.mediabox.upper_right = (right, top) + p1.mediabox.lower_left = (left, bottom) + p1.mediabox.lower_right = (right, bottom) + + if settings.scale_x != 1 or settings.scale_y != 1: + op = Transformation().scale(sx=settings.scale_x, sy=settings.scale_y) + p1.add_transformation(op) + messages.append(f"Scaled by {settings.scale_x}x{settings.scale_y}") + + if settings.rotate != 0: + writer.add_page(p1.rotate(settings.rotate)) + messages.append(f"Rotated {settings.rotate} degrees") + else: + writer.add_page(p1) + + buffer = io.BytesIO() + writer.write(buffer) + buffer.seek(0) + + return buffer diff --git a/lambda_function.py b/lambda_function.py new file mode 100644 index 0000000..f9291a5 --- /dev/null +++ b/lambda_function.py @@ -0,0 +1,54 @@ +# Crop a single page PDF document. +# This is specifically designed for the letter size UPS shipping label that needs to be cropped for printing. + +import base64 +import io +from cropper import AutoSettings, ManualSettings, argument_default, auto_crop, manual_crop, messages +from typing import Dict, Any + + +def lambda_handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]: + messages.clear() + content = io.BytesIO(base64.b64decode(event['b64_content'])) + mode = argument_default(event, 'mode', 'manual', 'string') + messages.append(f"Log stream name is {context.log_stream_name}") + + if mode == 'auto': + settings = AutoSettings() + settings.event_argument(event, 'width') + settings.event_argument(event, 'height') + settings.event_argument(event, 'margin') + + try: + buffer = auto_crop(content, settings) + except Exception as e: + return { + 'statusCode': 400, + 'error': str(e), + 'messages': messages, + } + elif mode == 'manual': + settings = ManualSettings() + settings.event_argument(event, 'start_x') + settings.event_argument(event, 'start_y') + settings.event_argument(event, 'width') + settings.event_argument(event, 'height') + settings.event_argument(event, 'scale_x') + settings.event_argument(event, 'scale_y') + settings.event_argument(event, 'rotate') + + buffer = manual_crop(content, settings) + else: + return { + 'statusCode': 400, + 'error': 'Invalid mode: (manual|auto)', + 'messages': messages, + } + + data = base64.b64encode(buffer.getvalue()) + + return { + 'statusCode': 200, + 'body': data, + 'messages': messages, + } diff --git a/local-invoke.py b/local-invoke.py new file mode 100644 index 0000000..edcf2e3 --- /dev/null +++ b/local-invoke.py @@ -0,0 +1,13 @@ +from lambda_function import lambda_handler +import sys +import json + +if len(sys.argv) < 2: + print("Usage: python test.py ") + sys.exit(1) + +payload = sys.argv[1] +event = json.loads(payload) + +response = lambda_handler(event, None) +print(json.dumps(response))