Initial commit
This commit is contained in:
commit
762b72bec8
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
_deploy
|
||||||
|
.venv
|
||||||
|
lambda_env
|
||||||
|
__pycache__
|
||||||
8
.idea/.gitignore
generated
vendored
Normal file
8
.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# Editor-based HTTP Client requests
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
||||||
7
.idea/misc.xml
generated
Normal file
7
.idea/misc.xml
generated
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="Black">
|
||||||
|
<option name="sdkName" value="Python 3.7" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (pdf-lambda-crop)" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
||||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/pdf-lambda-crop.iml" filepath="$PROJECT_DIR$/.idea/pdf-lambda-crop.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
19
.idea/pdf-lambda-crop.iml
generated
Normal file
19
.idea/pdf-lambda-crop.iml
generated
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/_deploy" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/_package_downloads" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="jdk" jdkName="Python 3.13 (pdf-lambda-crop)" jdkType="Python SDK" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
<component name="PyDocumentationSettings">
|
||||||
|
<option name="format" value="GOOGLE" />
|
||||||
|
<option name="myDocStringFormat" value="Google" />
|
||||||
|
</component>
|
||||||
|
<component name="TestRunnerService">
|
||||||
|
<option name="PROJECT_TEST_RUNNER" value="py.test" />
|
||||||
|
</component>
|
||||||
|
</module>
|
||||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
30
README.md
Normal file
30
README.md
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# PDF Label Cropper
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
[Documentation](https://docs.aws.amazon.com/lambda/latest/dg/python-package.html)
|
||||||
|
|
||||||
|
This creates a zip file structured for AWS Lambda deployment. The virtual environment steps are only required if the
|
||||||
|
packages are updated.
|
||||||
|
|
||||||
|
- `python -m venv lambda_env`
|
||||||
|
- `source ./lambda_env/bin/activate`
|
||||||
|
- `pip install pypdf pdfplumber`
|
||||||
|
|
||||||
|
Note the location of installed packages with `pip show pypdf` and modify the following commands if necessary.
|
||||||
|
|
||||||
|
- `deactivate`
|
||||||
|
- `cd lambda_env/lib/python3.13/site-packages/`
|
||||||
|
- `zip -r ../../../../_deploy/pdf-crop.zip .`
|
||||||
|
- `cd ../../../../`
|
||||||
|
|
||||||
|
At this point, the zip file only contains the packages. To add the custom code and the lambda function, run the
|
||||||
|
following command from the project root. This is the same command used to update the custom code.
|
||||||
|
|
||||||
|
- `zip _deploy/pdf-crop.zip cropper.py lambda_function.py`
|
||||||
|
|
||||||
|
Inspect zip contents with `unzip -l _deploy/pdf-crop.zip`
|
||||||
|
|
||||||
|
Update the lambda function with (make sure specify the credentials and region):
|
||||||
|
|
||||||
|
`aws lambda update-function-code --function-name pdf-label-crop --zip-file fileb://_deploy/pdf-crop.zip`
|
||||||
16
auto-crop.py
Normal file
16
auto-crop.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
import io
|
||||||
|
import sys
|
||||||
|
from cropper import AutoSettings, auto_crop
|
||||||
|
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print("Usage: python test.py <pdf_path>")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
pdf_path = sys.argv[1]
|
||||||
|
with open(pdf_path, "rb") as fh:
|
||||||
|
content = io.BytesIO(fh.read())
|
||||||
|
|
||||||
|
buffer = auto_crop(content, AutoSettings())
|
||||||
|
|
||||||
|
with open("cropped.pdf", "wb") as fh:
|
||||||
|
fh.write(buffer.read())
|
||||||
231
cropper.py
Normal file
231
cropper.py
Normal file
@ -0,0 +1,231 @@
|
|||||||
|
import sys
|
||||||
|
|
||||||
|
from pypdf import PdfWriter, PdfReader, Transformation
|
||||||
|
import pdfplumber
|
||||||
|
import io
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Setting:
|
||||||
|
width: float = 288 # 4 * 72
|
||||||
|
height: float = 432 # 6 * 72
|
||||||
|
|
||||||
|
def event_argument(self, event, index, field=None):
|
||||||
|
if field is None:
|
||||||
|
field = index
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw = event[index]
|
||||||
|
p_type = self.__annotations__[field]
|
||||||
|
except KeyError:
|
||||||
|
return
|
||||||
|
|
||||||
|
if p_type.__name__ == 'boolean':
|
||||||
|
setattr(self, field, raw == True or raw == 'true')
|
||||||
|
if p_type.__name__ == 'float':
|
||||||
|
setattr(self, field, float(raw))
|
||||||
|
if p_type.__name__ == 'integer':
|
||||||
|
setattr(self, field, int(raw))
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AutoSettings(Setting):
|
||||||
|
margin: float = 7.2 # 0.1 * 72
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ManualSettings(Setting):
|
||||||
|
start_x: float = 90
|
||||||
|
start_y: float = 90
|
||||||
|
scale_x: float = 1
|
||||||
|
scale_y: float = 1
|
||||||
|
rotate: int = 90
|
||||||
|
|
||||||
|
|
||||||
|
messages = list()
|
||||||
|
|
||||||
|
def argument_default(event, index, default, cast):
|
||||||
|
try:
|
||||||
|
val = event[index]
|
||||||
|
if cast == 'float':
|
||||||
|
return float(val)
|
||||||
|
if cast == 'integer':
|
||||||
|
return int(val)
|
||||||
|
if cast == 'boolean':
|
||||||
|
return val == 'true'
|
||||||
|
return val
|
||||||
|
except KeyError:
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def extents_by_rectangle(page) -> tuple[int|None, int|None, int|None, int|None]:
|
||||||
|
largest_area = None
|
||||||
|
largest_rect = None
|
||||||
|
|
||||||
|
for rect in page.rects:
|
||||||
|
area = rect["width"] * rect["height"]
|
||||||
|
if largest_area is None or area > largest_area:
|
||||||
|
largest_area = area
|
||||||
|
largest_rect = rect
|
||||||
|
|
||||||
|
if largest_rect is None:
|
||||||
|
return None, None, None, None
|
||||||
|
|
||||||
|
return largest_rect["x0"], largest_rect["y0"], largest_rect["x1"], largest_rect["y1"]
|
||||||
|
|
||||||
|
|
||||||
|
def extents_by_image(page) -> tuple[int|None, int|None, int|None, int|None]:
|
||||||
|
min_x = None
|
||||||
|
min_y = None
|
||||||
|
max_x = None
|
||||||
|
max_y = None
|
||||||
|
|
||||||
|
for obj in page.images:
|
||||||
|
if min_x is None or obj["x0"] < min_x:
|
||||||
|
min_x = obj["x0"]
|
||||||
|
if min_y is None or obj["y0"] < min_y:
|
||||||
|
min_y = obj["y0"]
|
||||||
|
if max_x is None or obj["x1"] > max_x:
|
||||||
|
max_x = obj["x1"]
|
||||||
|
if max_y is None or obj["y1"] > max_y:
|
||||||
|
max_y = obj["y1"]
|
||||||
|
|
||||||
|
return min_x, min_y, max_x, max_y
|
||||||
|
|
||||||
|
|
||||||
|
# def extents_by_all(page) -> tuple[int|None, int|None, int|None, int|None]:
|
||||||
|
# min_x = None
|
||||||
|
# min_y = None
|
||||||
|
# max_x = None
|
||||||
|
# max_y = None
|
||||||
|
#
|
||||||
|
# for obj in page.objects:
|
||||||
|
# if obj == 'image':
|
||||||
|
# x0, y0, x1, y1 = extents_by_image(page)
|
||||||
|
# if x0 is None or y0 is None or x1 is None or y1 is None:
|
||||||
|
# continue
|
||||||
|
# if min_x is None or x0 < min_x:
|
||||||
|
# min_x = x0
|
||||||
|
# if min_y is None or y0 < min_y:
|
||||||
|
# min_y = y0
|
||||||
|
# if max_x is None or x1 > max_x:
|
||||||
|
# max_x = x1
|
||||||
|
# if max_y is None or y1 > max_y:
|
||||||
|
# max_y = y1
|
||||||
|
#
|
||||||
|
# return min_x, min_y, max_x, max_y
|
||||||
|
|
||||||
|
|
||||||
|
def auto_crop(content: io.BytesIO, settings: AutoSettings) -> io.BytesIO:
|
||||||
|
messages.append("Using auto-crop mode")
|
||||||
|
|
||||||
|
extents_method = 'rectangle'
|
||||||
|
with pdfplumber.open(content) as pdf:
|
||||||
|
page = pdf.pages[0]
|
||||||
|
min_x, min_y, max_x, max_y = extents_by_rectangle(page)
|
||||||
|
if min_x is None or min_y is None or max_x is None or max_y is None:
|
||||||
|
extents_method = 'image'
|
||||||
|
min_x, min_y, max_x, max_y = extents_by_image(page)
|
||||||
|
|
||||||
|
if min_x is None or min_y is None or max_x is None or max_y is None:
|
||||||
|
raise Exception("Unable to find the extents of the document")
|
||||||
|
|
||||||
|
messages.append(f"Used {extents_method} to find extents")
|
||||||
|
|
||||||
|
min_x = round(min_x - settings.margin)
|
||||||
|
min_y = round(min_y - settings.margin)
|
||||||
|
max_x = round(max_x + settings.margin)
|
||||||
|
max_y = round(max_y + settings.margin)
|
||||||
|
|
||||||
|
width = max_x - min_x
|
||||||
|
height = max_y - min_y
|
||||||
|
messages.append(f"Document extents: {width}x{height}")
|
||||||
|
|
||||||
|
rotate = False
|
||||||
|
if width > height:
|
||||||
|
h = width
|
||||||
|
width = height
|
||||||
|
height = h
|
||||||
|
rotate = True
|
||||||
|
|
||||||
|
scale = 1
|
||||||
|
if width > settings.width or height > settings.height:
|
||||||
|
x_scale = settings.width / width
|
||||||
|
y_scale = settings.height / height
|
||||||
|
scale = min(x_scale, y_scale)
|
||||||
|
min_x = round((min_x * scale) - settings.margin)
|
||||||
|
min_y = round((min_y * scale) - settings.margin)
|
||||||
|
max_x = round((max_x * scale) + settings.margin)
|
||||||
|
max_y = round((max_y * scale) + settings.margin)
|
||||||
|
|
||||||
|
reader = PdfReader(content)
|
||||||
|
writer = PdfWriter()
|
||||||
|
p1 = reader.pages[0]
|
||||||
|
|
||||||
|
p1.cropbox.upper_left = (min_x, min_y)
|
||||||
|
p1.cropbox.upper_right = (max_x, min_y)
|
||||||
|
p1.cropbox.lower_left = (min_x, max_y)
|
||||||
|
p1.cropbox.lower_right = (max_x, max_y)
|
||||||
|
|
||||||
|
if rotate:
|
||||||
|
p1.rotate(90)
|
||||||
|
messages.append("Rotated 90 degrees")
|
||||||
|
|
||||||
|
if scale != 1:
|
||||||
|
op = Transformation().scale(sx=scale, sy=scale)
|
||||||
|
p1.add_transformation(op)
|
||||||
|
messages.append(f"Scaled by {scale}")
|
||||||
|
|
||||||
|
writer.add_page(p1)
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
writer.write(buffer)
|
||||||
|
buffer.seek(0)
|
||||||
|
|
||||||
|
return buffer
|
||||||
|
|
||||||
|
|
||||||
|
def manual_crop(content: io.BytesIO, settings: ManualSettings) -> io.BytesIO:
|
||||||
|
messages.append("Using manual-crop mode")
|
||||||
|
# Swap directions to be less confusing for inputs
|
||||||
|
if settings.rotate == 90 or settings.rotate == -90:
|
||||||
|
sx = settings.start_x
|
||||||
|
settings.start_x = settings.start_y
|
||||||
|
settings.start_y = sx
|
||||||
|
h = settings.height
|
||||||
|
settings.height = settings.width
|
||||||
|
settings.width = h
|
||||||
|
sy = settings.scale_y
|
||||||
|
settings.scale_y = settings.scale_x
|
||||||
|
settings.scale_x = sy
|
||||||
|
|
||||||
|
reader = PdfReader(content)
|
||||||
|
writer = PdfWriter()
|
||||||
|
p1 = reader.pages[0]
|
||||||
|
|
||||||
|
top = p1.mediabox.top - settings.start_y
|
||||||
|
bottom = top - (settings.height * 72)
|
||||||
|
left = settings.start_x
|
||||||
|
right = left + (settings.width * 72)
|
||||||
|
|
||||||
|
p1.mediabox.upper_left = (left, top)
|
||||||
|
p1.mediabox.upper_right = (right, top)
|
||||||
|
p1.mediabox.lower_left = (left, bottom)
|
||||||
|
p1.mediabox.lower_right = (right, bottom)
|
||||||
|
|
||||||
|
if settings.scale_x != 1 or settings.scale_y != 1:
|
||||||
|
op = Transformation().scale(sx=settings.scale_x, sy=settings.scale_y)
|
||||||
|
p1.add_transformation(op)
|
||||||
|
messages.append(f"Scaled by {settings.scale_x}x{settings.scale_y}")
|
||||||
|
|
||||||
|
if settings.rotate != 0:
|
||||||
|
writer.add_page(p1.rotate(settings.rotate))
|
||||||
|
messages.append(f"Rotated {settings.rotate} degrees")
|
||||||
|
else:
|
||||||
|
writer.add_page(p1)
|
||||||
|
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
writer.write(buffer)
|
||||||
|
buffer.seek(0)
|
||||||
|
|
||||||
|
return buffer
|
||||||
54
lambda_function.py
Normal file
54
lambda_function.py
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# Crop a single page PDF document.
|
||||||
|
# This is specifically designed for the letter size UPS shipping label that needs to be cropped for printing.
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
from cropper import AutoSettings, ManualSettings, argument_default, auto_crop, manual_crop, messages
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
|
||||||
|
def lambda_handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
|
||||||
|
messages.clear()
|
||||||
|
content = io.BytesIO(base64.b64decode(event['b64_content']))
|
||||||
|
mode = argument_default(event, 'mode', 'manual', 'string')
|
||||||
|
messages.append(f"Log stream name is {context.log_stream_name}")
|
||||||
|
|
||||||
|
if mode == 'auto':
|
||||||
|
settings = AutoSettings()
|
||||||
|
settings.event_argument(event, 'width')
|
||||||
|
settings.event_argument(event, 'height')
|
||||||
|
settings.event_argument(event, 'margin')
|
||||||
|
|
||||||
|
try:
|
||||||
|
buffer = auto_crop(content, settings)
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
'statusCode': 400,
|
||||||
|
'error': str(e),
|
||||||
|
'messages': messages,
|
||||||
|
}
|
||||||
|
elif mode == 'manual':
|
||||||
|
settings = ManualSettings()
|
||||||
|
settings.event_argument(event, 'start_x')
|
||||||
|
settings.event_argument(event, 'start_y')
|
||||||
|
settings.event_argument(event, 'width')
|
||||||
|
settings.event_argument(event, 'height')
|
||||||
|
settings.event_argument(event, 'scale_x')
|
||||||
|
settings.event_argument(event, 'scale_y')
|
||||||
|
settings.event_argument(event, 'rotate')
|
||||||
|
|
||||||
|
buffer = manual_crop(content, settings)
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
'statusCode': 400,
|
||||||
|
'error': 'Invalid mode: (manual|auto)',
|
||||||
|
'messages': messages,
|
||||||
|
}
|
||||||
|
|
||||||
|
data = base64.b64encode(buffer.getvalue())
|
||||||
|
|
||||||
|
return {
|
||||||
|
'statusCode': 200,
|
||||||
|
'body': data,
|
||||||
|
'messages': messages,
|
||||||
|
}
|
||||||
13
local-invoke.py
Normal file
13
local-invoke.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from lambda_function import lambda_handler
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print("Usage: python test.py <payload>")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
payload = sys.argv[1]
|
||||||
|
event = json.loads(payload)
|
||||||
|
|
||||||
|
response = lambda_handler(event, None)
|
||||||
|
print(json.dumps(response))
|
||||||
Loading…
x
Reference in New Issue
Block a user