Initial commit

This commit is contained in:
David Fairbanks 2025-10-07 14:17:39 -04:00
commit 762b72bec8
Signed by: david-fairbanks42
GPG Key ID: 23A5FB8E1952978F
12 changed files with 402 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
_deploy
.venv
lambda_env
__pycache__

8
.idea/.gitignore generated vendored Normal file
View File

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

7
.idea/misc.xml generated Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.7" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (pdf-lambda-crop)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml generated Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/pdf-lambda-crop.iml" filepath="$PROJECT_DIR$/.idea/pdf-lambda-crop.iml" />
</modules>
</component>
</project>

19
.idea/pdf-lambda-crop.iml generated Normal file
View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
<excludeFolder url="file://$MODULE_DIR$/_deploy" />
<excludeFolder url="file://$MODULE_DIR$/_package_downloads" />
</content>
<orderEntry type="jdk" jdkName="Python 3.13 (pdf-lambda-crop)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="GOOGLE" />
<option name="myDocStringFormat" value="Google" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="py.test" />
</component>
</module>

6
.idea/vcs.xml generated Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

30
README.md Normal file
View File

@ -0,0 +1,30 @@
# PDF Label Cropper
## Deployment
[Documentation](https://docs.aws.amazon.com/lambda/latest/dg/python-package.html)
This creates a zip file structured for AWS Lambda deployment. The virtual environment steps are only required if the
packages are updated.
- `python -m venv lambda_env`
- `source ./lambda_env/bin/activate`
- `pip install pypdf pdfplumber`
Note the location of installed packages with `pip show pypdf` and modify the following commands if necessary.
- `deactivate`
- `cd lambda_env/lib/python3.13/site-packages/`
- `zip -r ../../../../_deploy/pdf-crop.zip .`
- `cd ../../../../`
At this point, the zip file only contains the packages. To add the custom code and the lambda function, run the
following command from the project root. This is the same command used to update the custom code.
- `zip _deploy/pdf-crop.zip cropper.py lambda_function.py`
Inspect zip contents with `unzip -l _deploy/pdf-crop.zip`
Update the lambda function with (make sure specify the credentials and region):
`aws lambda update-function-code --function-name pdf-label-crop --zip-file fileb://_deploy/pdf-crop.zip`

16
auto-crop.py Normal file
View File

@ -0,0 +1,16 @@
import io
import sys
from cropper import AutoSettings, auto_crop
if len(sys.argv) < 2:
print("Usage: python test.py <pdf_path>")
sys.exit(1)
pdf_path = sys.argv[1]
with open(pdf_path, "rb") as fh:
content = io.BytesIO(fh.read())
buffer = auto_crop(content, AutoSettings())
with open("cropped.pdf", "wb") as fh:
fh.write(buffer.read())

231
cropper.py Normal file
View File

@ -0,0 +1,231 @@
import sys
from pypdf import PdfWriter, PdfReader, Transformation
import pdfplumber
import io
from dataclasses import dataclass
@dataclass
class Setting:
width: float = 288 # 4 * 72
height: float = 432 # 6 * 72
def event_argument(self, event, index, field=None):
if field is None:
field = index
try:
raw = event[index]
p_type = self.__annotations__[field]
except KeyError:
return
if p_type.__name__ == 'boolean':
setattr(self, field, raw == True or raw == 'true')
if p_type.__name__ == 'float':
setattr(self, field, float(raw))
if p_type.__name__ == 'integer':
setattr(self, field, int(raw))
@dataclass
class AutoSettings(Setting):
margin: float = 7.2 # 0.1 * 72
@dataclass
class ManualSettings(Setting):
start_x: float = 90
start_y: float = 90
scale_x: float = 1
scale_y: float = 1
rotate: int = 90
messages = list()
def argument_default(event, index, default, cast):
try:
val = event[index]
if cast == 'float':
return float(val)
if cast == 'integer':
return int(val)
if cast == 'boolean':
return val == 'true'
return val
except KeyError:
return default
def extents_by_rectangle(page) -> tuple[int|None, int|None, int|None, int|None]:
largest_area = None
largest_rect = None
for rect in page.rects:
area = rect["width"] * rect["height"]
if largest_area is None or area > largest_area:
largest_area = area
largest_rect = rect
if largest_rect is None:
return None, None, None, None
return largest_rect["x0"], largest_rect["y0"], largest_rect["x1"], largest_rect["y1"]
def extents_by_image(page) -> tuple[int|None, int|None, int|None, int|None]:
min_x = None
min_y = None
max_x = None
max_y = None
for obj in page.images:
if min_x is None or obj["x0"] < min_x:
min_x = obj["x0"]
if min_y is None or obj["y0"] < min_y:
min_y = obj["y0"]
if max_x is None or obj["x1"] > max_x:
max_x = obj["x1"]
if max_y is None or obj["y1"] > max_y:
max_y = obj["y1"]
return min_x, min_y, max_x, max_y
# def extents_by_all(page) -> tuple[int|None, int|None, int|None, int|None]:
# min_x = None
# min_y = None
# max_x = None
# max_y = None
#
# for obj in page.objects:
# if obj == 'image':
# x0, y0, x1, y1 = extents_by_image(page)
# if x0 is None or y0 is None or x1 is None or y1 is None:
# continue
# if min_x is None or x0 < min_x:
# min_x = x0
# if min_y is None or y0 < min_y:
# min_y = y0
# if max_x is None or x1 > max_x:
# max_x = x1
# if max_y is None or y1 > max_y:
# max_y = y1
#
# return min_x, min_y, max_x, max_y
def auto_crop(content: io.BytesIO, settings: AutoSettings) -> io.BytesIO:
messages.append("Using auto-crop mode")
extents_method = 'rectangle'
with pdfplumber.open(content) as pdf:
page = pdf.pages[0]
min_x, min_y, max_x, max_y = extents_by_rectangle(page)
if min_x is None or min_y is None or max_x is None or max_y is None:
extents_method = 'image'
min_x, min_y, max_x, max_y = extents_by_image(page)
if min_x is None or min_y is None or max_x is None or max_y is None:
raise Exception("Unable to find the extents of the document")
messages.append(f"Used {extents_method} to find extents")
min_x = round(min_x - settings.margin)
min_y = round(min_y - settings.margin)
max_x = round(max_x + settings.margin)
max_y = round(max_y + settings.margin)
width = max_x - min_x
height = max_y - min_y
messages.append(f"Document extents: {width}x{height}")
rotate = False
if width > height:
h = width
width = height
height = h
rotate = True
scale = 1
if width > settings.width or height > settings.height:
x_scale = settings.width / width
y_scale = settings.height / height
scale = min(x_scale, y_scale)
min_x = round((min_x * scale) - settings.margin)
min_y = round((min_y * scale) - settings.margin)
max_x = round((max_x * scale) + settings.margin)
max_y = round((max_y * scale) + settings.margin)
reader = PdfReader(content)
writer = PdfWriter()
p1 = reader.pages[0]
p1.cropbox.upper_left = (min_x, min_y)
p1.cropbox.upper_right = (max_x, min_y)
p1.cropbox.lower_left = (min_x, max_y)
p1.cropbox.lower_right = (max_x, max_y)
if rotate:
p1.rotate(90)
messages.append("Rotated 90 degrees")
if scale != 1:
op = Transformation().scale(sx=scale, sy=scale)
p1.add_transformation(op)
messages.append(f"Scaled by {scale}")
writer.add_page(p1)
buffer = io.BytesIO()
writer.write(buffer)
buffer.seek(0)
return buffer
def manual_crop(content: io.BytesIO, settings: ManualSettings) -> io.BytesIO:
messages.append("Using manual-crop mode")
# Swap directions to be less confusing for inputs
if settings.rotate == 90 or settings.rotate == -90:
sx = settings.start_x
settings.start_x = settings.start_y
settings.start_y = sx
h = settings.height
settings.height = settings.width
settings.width = h
sy = settings.scale_y
settings.scale_y = settings.scale_x
settings.scale_x = sy
reader = PdfReader(content)
writer = PdfWriter()
p1 = reader.pages[0]
top = p1.mediabox.top - settings.start_y
bottom = top - (settings.height * 72)
left = settings.start_x
right = left + (settings.width * 72)
p1.mediabox.upper_left = (left, top)
p1.mediabox.upper_right = (right, top)
p1.mediabox.lower_left = (left, bottom)
p1.mediabox.lower_right = (right, bottom)
if settings.scale_x != 1 or settings.scale_y != 1:
op = Transformation().scale(sx=settings.scale_x, sy=settings.scale_y)
p1.add_transformation(op)
messages.append(f"Scaled by {settings.scale_x}x{settings.scale_y}")
if settings.rotate != 0:
writer.add_page(p1.rotate(settings.rotate))
messages.append(f"Rotated {settings.rotate} degrees")
else:
writer.add_page(p1)
buffer = io.BytesIO()
writer.write(buffer)
buffer.seek(0)
return buffer

54
lambda_function.py Normal file
View File

@ -0,0 +1,54 @@
# Crop a single page PDF document.
# This is specifically designed for the letter size UPS shipping label that needs to be cropped for printing.
import base64
import io
from cropper import AutoSettings, ManualSettings, argument_default, auto_crop, manual_crop, messages
from typing import Dict, Any
def lambda_handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
messages.clear()
content = io.BytesIO(base64.b64decode(event['b64_content']))
mode = argument_default(event, 'mode', 'manual', 'string')
messages.append(f"Log stream name is {context.log_stream_name}")
if mode == 'auto':
settings = AutoSettings()
settings.event_argument(event, 'width')
settings.event_argument(event, 'height')
settings.event_argument(event, 'margin')
try:
buffer = auto_crop(content, settings)
except Exception as e:
return {
'statusCode': 400,
'error': str(e),
'messages': messages,
}
elif mode == 'manual':
settings = ManualSettings()
settings.event_argument(event, 'start_x')
settings.event_argument(event, 'start_y')
settings.event_argument(event, 'width')
settings.event_argument(event, 'height')
settings.event_argument(event, 'scale_x')
settings.event_argument(event, 'scale_y')
settings.event_argument(event, 'rotate')
buffer = manual_crop(content, settings)
else:
return {
'statusCode': 400,
'error': 'Invalid mode: (manual|auto)',
'messages': messages,
}
data = base64.b64encode(buffer.getvalue())
return {
'statusCode': 200,
'body': data,
'messages': messages,
}

13
local-invoke.py Normal file
View File

@ -0,0 +1,13 @@
from lambda_function import lambda_handler
import sys
import json
if len(sys.argv) < 2:
print("Usage: python test.py <payload>")
sys.exit(1)
payload = sys.argv[1]
event = json.loads(payload)
response = lambda_handler(event, None)
print(json.dumps(response))