Initial commit

2025-10-07 14:17:39 -04:00 · 2025-10-07 14:17:39 -04:00 · 762b72bec8
commit 762b72bec8
12 changed files with 402 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
+_deploy
+.venv
+lambda_env
+__pycache__
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.7" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (pdf-lambda-crop)" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/pdf-lambda-crop.iml" filepath="$PROJECT_DIR$/.idea/pdf-lambda-crop.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/pdf-lambda-crop.iml
+++ b/.idea/pdf-lambda-crop.iml
@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+      <excludeFolder url="file://$MODULE_DIR$/_deploy" />
+      <excludeFolder url="file://$MODULE_DIR$/_package_downloads" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.13 (pdf-lambda-crop)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="GOOGLE" />
+    <option name="myDocStringFormat" value="Google" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="py.test" />
+  </component>
+</module>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
--- a/README.md
+++ b/README.md
@ -0,0 +1,30 @@
+# PDF Label Cropper
+
+## Deployment
+
+[Documentation](https://docs.aws.amazon.com/lambda/latest/dg/python-package.html)
+
+This creates a zip file structured for AWS Lambda deployment. The virtual environment steps are only required if the
+packages are updated.
+
+- `python -m venv lambda_env`
+- `source ./lambda_env/bin/activate`
+- `pip install pypdf pdfplumber`
+
+Note the location of installed packages with `pip show pypdf` and modify the following commands if necessary.
+
+- `deactivate`
+- `cd lambda_env/lib/python3.13/site-packages/`
+- `zip -r ../../../../_deploy/pdf-crop.zip .`
+- `cd ../../../../`
+
+At this point, the zip file only contains the packages. To add the custom code and the lambda function, run the
+following command from the project root. This is the same command used to update the custom code.
+
+- `zip _deploy/pdf-crop.zip cropper.py lambda_function.py`
+
+Inspect zip contents with `unzip -l _deploy/pdf-crop.zip`
+
+Update the lambda function with (make sure specify the credentials and region):
+
+`aws lambda update-function-code --function-name pdf-label-crop --zip-file fileb://_deploy/pdf-crop.zip`
--- a/auto-crop.py
+++ b/auto-crop.py
@ -0,0 +1,16 @@
+import io
+import sys
+from cropper import AutoSettings, auto_crop
+
+if len(sys.argv) < 2:
+    print("Usage: python test.py <pdf_path>")
+    sys.exit(1)
+
+pdf_path = sys.argv[1]
+with open(pdf_path, "rb") as fh:
+    content = io.BytesIO(fh.read())
+
+buffer = auto_crop(content, AutoSettings())
+
+with open("cropped.pdf", "wb") as fh:
+    fh.write(buffer.read())
--- a/cropper.py
+++ b/cropper.py
@ -0,0 +1,231 @@
+import sys
+
+from pypdf import PdfWriter, PdfReader, Transformation
+import pdfplumber
+import io
+from dataclasses import dataclass
+
+
+@dataclass
+class Setting:
+    width: float = 288  # 4 * 72
+    height: float = 432  # 6 * 72
+
+    def event_argument(self, event, index, field=None):
+        if field is None:
+            field = index
+
+        try:
+            raw = event[index]
+            p_type = self.__annotations__[field]
+        except KeyError:
+            return
+
+        if p_type.__name__ == 'boolean':
+            setattr(self, field, raw == True or raw == 'true')
+        if p_type.__name__ == 'float':
+            setattr(self, field, float(raw))
+        if p_type.__name__ == 'integer':
+            setattr(self, field, int(raw))
+
+
+@dataclass
+class AutoSettings(Setting):
+    margin: float = 7.2  # 0.1 * 72
+
+
+@dataclass
+class ManualSettings(Setting):
+    start_x: float = 90
+    start_y: float = 90
+    scale_x: float = 1
+    scale_y: float = 1
+    rotate: int = 90
+
+
+messages = list()
+
+def argument_default(event, index, default, cast):
+    try:
+        val = event[index]
+        if cast == 'float':
+            return float(val)
+        if cast == 'integer':
+            return int(val)
+        if cast == 'boolean':
+            return val == 'true'
+        return val
+    except KeyError:
+        return default
+
+
+def extents_by_rectangle(page) -> tuple[int|None, int|None, int|None, int|None]:
+    largest_area = None
+    largest_rect = None
+
+    for rect in page.rects:
+        area = rect["width"] * rect["height"]
+        if largest_area is None or area > largest_area:
+            largest_area = area
+            largest_rect = rect
+
+    if largest_rect is None:
+        return None, None, None, None
+
+    return largest_rect["x0"], largest_rect["y0"], largest_rect["x1"], largest_rect["y1"]
+
+
+def extents_by_image(page) -> tuple[int|None, int|None, int|None, int|None]:
+    min_x = None
+    min_y = None
+    max_x = None
+    max_y = None
+
+    for obj in page.images:
+        if min_x is None or obj["x0"] < min_x:
+            min_x = obj["x0"]
+        if min_y is None or obj["y0"] < min_y:
+            min_y = obj["y0"]
+        if max_x is None or obj["x1"] > max_x:
+            max_x = obj["x1"]
+        if max_y is None or obj["y1"] > max_y:
+            max_y = obj["y1"]
+
+    return min_x, min_y, max_x, max_y
+
+
+# def extents_by_all(page) -> tuple[int|None, int|None, int|None, int|None]:
+#     min_x = None
+#     min_y = None
+#     max_x = None
+#     max_y = None
+#
+#     for obj in page.objects:
+#         if obj == 'image':
+#             x0, y0, x1, y1 = extents_by_image(page)
+#             if x0 is None or y0 is None or x1 is None or y1 is None:
+#                 continue
+#             if min_x is None or x0 < min_x:
+#                 min_x = x0
+#             if min_y is None or y0 < min_y:
+#                 min_y = y0
+#             if max_x is None or x1 > max_x:
+#                 max_x = x1
+#             if max_y is None or y1 > max_y:
+#                 max_y = y1
+#
+#     return min_x, min_y, max_x, max_y
+
+
+def auto_crop(content: io.BytesIO, settings: AutoSettings) -> io.BytesIO:
+    messages.append("Using auto-crop mode")
+
+    extents_method = 'rectangle'
+    with pdfplumber.open(content) as pdf:
+        page = pdf.pages[0]
+        min_x, min_y, max_x, max_y = extents_by_rectangle(page)
+        if min_x is None or min_y is None or max_x is None or max_y is None:
+            extents_method = 'image'
+            min_x, min_y, max_x, max_y = extents_by_image(page)
+
+    if min_x is None or min_y is None or max_x is None or max_y is None:
+        raise Exception("Unable to find the extents of the document")
+
+    messages.append(f"Used {extents_method} to find extents")
+
+    min_x = round(min_x - settings.margin)
+    min_y = round(min_y - settings.margin)
+    max_x = round(max_x + settings.margin)
+    max_y = round(max_y + settings.margin)
+
+    width = max_x - min_x
+    height = max_y - min_y
+    messages.append(f"Document extents: {width}x{height}")
+
+    rotate = False
+    if width > height:
+        h = width
+        width = height
+        height = h
+        rotate = True
+
+    scale = 1
+    if width > settings.width or height > settings.height:
+        x_scale = settings.width / width
+        y_scale = settings.height / height
+        scale = min(x_scale, y_scale)
+        min_x = round((min_x * scale) - settings.margin)
+        min_y = round((min_y * scale) - settings.margin)
+        max_x = round((max_x * scale) + settings.margin)
+        max_y = round((max_y * scale) + settings.margin)
+
+    reader = PdfReader(content)
+    writer = PdfWriter()
+    p1 = reader.pages[0]
+
+    p1.cropbox.upper_left = (min_x, min_y)
+    p1.cropbox.upper_right = (max_x, min_y)
+    p1.cropbox.lower_left = (min_x, max_y)
+    p1.cropbox.lower_right = (max_x, max_y)
+
+    if rotate:
+        p1.rotate(90)
+        messages.append("Rotated 90 degrees")
+
+    if scale != 1:
+        op = Transformation().scale(sx=scale, sy=scale)
+        p1.add_transformation(op)
+        messages.append(f"Scaled by {scale}")
+
+    writer.add_page(p1)
+    buffer = io.BytesIO()
+    writer.write(buffer)
+    buffer.seek(0)
+
+    return buffer
+
+
+def manual_crop(content: io.BytesIO, settings: ManualSettings) -> io.BytesIO:
+    messages.append("Using manual-crop mode")
+    # Swap directions to be less confusing for inputs
+    if settings.rotate == 90 or settings.rotate == -90:
+        sx = settings.start_x
+        settings.start_x = settings.start_y
+        settings.start_y = sx
+        h = settings.height
+        settings.height = settings.width
+        settings.width = h
+        sy = settings.scale_y
+        settings.scale_y = settings.scale_x
+        settings.scale_x = sy
+
+    reader = PdfReader(content)
+    writer = PdfWriter()
+    p1 = reader.pages[0]
+
+    top = p1.mediabox.top - settings.start_y
+    bottom = top - (settings.height * 72)
+    left = settings.start_x
+    right = left + (settings.width * 72)
+
+    p1.mediabox.upper_left = (left, top)
+    p1.mediabox.upper_right = (right, top)
+    p1.mediabox.lower_left = (left, bottom)
+    p1.mediabox.lower_right = (right, bottom)
+
+    if settings.scale_x != 1 or settings.scale_y != 1:
+        op = Transformation().scale(sx=settings.scale_x, sy=settings.scale_y)
+        p1.add_transformation(op)
+        messages.append(f"Scaled by {settings.scale_x}x{settings.scale_y}")
+
+    if settings.rotate != 0:
+        writer.add_page(p1.rotate(settings.rotate))
+        messages.append(f"Rotated {settings.rotate} degrees")
+    else:
+        writer.add_page(p1)
+
+    buffer = io.BytesIO()
+    writer.write(buffer)
+    buffer.seek(0)
+
+    return buffer
--- a/lambda_function.py
+++ b/lambda_function.py
@ -0,0 +1,54 @@
+# Crop a single page PDF document.
+# This is specifically designed for the letter size UPS shipping label that needs to be cropped for printing.
+
+import base64
+import io
+from cropper import AutoSettings, ManualSettings, argument_default, auto_crop, manual_crop, messages
+from typing import Dict, Any
+
+
+def lambda_handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
+    messages.clear()
+    content = io.BytesIO(base64.b64decode(event['b64_content']))
+    mode = argument_default(event, 'mode', 'manual', 'string')
+    messages.append(f"Log stream name is {context.log_stream_name}")
+
+    if mode == 'auto':
+        settings = AutoSettings()
+        settings.event_argument(event, 'width')
+        settings.event_argument(event, 'height')
+        settings.event_argument(event, 'margin')
+
+        try:
+            buffer = auto_crop(content, settings)
+        except Exception as e:
+            return {
+                'statusCode': 400,
+                'error': str(e),
+                'messages': messages,
+            }
+    elif mode == 'manual':
+        settings = ManualSettings()
+        settings.event_argument(event, 'start_x')
+        settings.event_argument(event, 'start_y')
+        settings.event_argument(event, 'width')
+        settings.event_argument(event, 'height')
+        settings.event_argument(event, 'scale_x')
+        settings.event_argument(event, 'scale_y')
+        settings.event_argument(event, 'rotate')
+
+        buffer = manual_crop(content, settings)
+    else:
+        return {
+            'statusCode': 400,
+            'error': 'Invalid mode: (manual|auto)',
+            'messages': messages,
+        }
+
+    data = base64.b64encode(buffer.getvalue())
+
+    return {
+        'statusCode': 200,
+        'body': data,
+        'messages': messages,
+    }
--- a/local-invoke.py
+++ b/local-invoke.py
@ -0,0 +1,13 @@
+from lambda_function import lambda_handler
+import sys
+import json
+
+if len(sys.argv) < 2:
+    print("Usage: python test.py <payload>")
+    sys.exit(1)
+
+payload = sys.argv[1]
+event = json.loads(payload)
+
+response = lambda_handler(event, None)
+print(json.dumps(response))