Initial commit
This commit is contained in:
commit
762b72bec8
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
_deploy
|
||||
.venv
|
||||
lambda_env
|
||||
__pycache__
|
||||
8
.idea/.gitignore
generated
vendored
Normal file
8
.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
7
.idea/misc.xml
generated
Normal file
7
.idea/misc.xml
generated
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.7" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (pdf-lambda-crop)" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/pdf-lambda-crop.iml" filepath="$PROJECT_DIR$/.idea/pdf-lambda-crop.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
19
.idea/pdf-lambda-crop.iml
generated
Normal file
19
.idea/pdf-lambda-crop.iml
generated
Normal file
@ -0,0 +1,19 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/_deploy" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/_package_downloads" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.13 (pdf-lambda-crop)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyDocumentationSettings">
|
||||
<option name="format" value="GOOGLE" />
|
||||
<option name="myDocStringFormat" value="Google" />
|
||||
</component>
|
||||
<component name="TestRunnerService">
|
||||
<option name="PROJECT_TEST_RUNNER" value="py.test" />
|
||||
</component>
|
||||
</module>
|
||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
30
README.md
Normal file
30
README.md
Normal file
@ -0,0 +1,30 @@
|
||||
# PDF Label Cropper
|
||||
|
||||
## Deployment
|
||||
|
||||
[Documentation](https://docs.aws.amazon.com/lambda/latest/dg/python-package.html)
|
||||
|
||||
This creates a zip file structured for AWS Lambda deployment. The virtual environment steps are only required if the
|
||||
packages are updated.
|
||||
|
||||
- `python -m venv lambda_env`
|
||||
- `source ./lambda_env/bin/activate`
|
||||
- `pip install pypdf pdfplumber`
|
||||
|
||||
Note the location of installed packages with `pip show pypdf` and modify the following commands if necessary.
|
||||
|
||||
- `deactivate`
|
||||
- `cd lambda_env/lib/python3.13/site-packages/`
|
||||
- `zip -r ../../../../_deploy/pdf-crop.zip .`
|
||||
- `cd ../../../../`
|
||||
|
||||
At this point, the zip file only contains the packages. To add the custom code and the lambda function, run the
|
||||
following command from the project root. This is the same command used to update the custom code.
|
||||
|
||||
- `zip _deploy/pdf-crop.zip cropper.py lambda_function.py`
|
||||
|
||||
Inspect zip contents with `unzip -l _deploy/pdf-crop.zip`
|
||||
|
||||
Update the lambda function with (make sure specify the credentials and region):
|
||||
|
||||
`aws lambda update-function-code --function-name pdf-label-crop --zip-file fileb://_deploy/pdf-crop.zip`
|
||||
16
auto-crop.py
Normal file
16
auto-crop.py
Normal file
@ -0,0 +1,16 @@
|
||||
import io
|
||||
import sys
|
||||
from cropper import AutoSettings, auto_crop
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python test.py <pdf_path>")
|
||||
sys.exit(1)
|
||||
|
||||
pdf_path = sys.argv[1]
|
||||
with open(pdf_path, "rb") as fh:
|
||||
content = io.BytesIO(fh.read())
|
||||
|
||||
buffer = auto_crop(content, AutoSettings())
|
||||
|
||||
with open("cropped.pdf", "wb") as fh:
|
||||
fh.write(buffer.read())
|
||||
231
cropper.py
Normal file
231
cropper.py
Normal file
@ -0,0 +1,231 @@
|
||||
import sys
|
||||
|
||||
from pypdf import PdfWriter, PdfReader, Transformation
|
||||
import pdfplumber
|
||||
import io
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class Setting:
|
||||
width: float = 288 # 4 * 72
|
||||
height: float = 432 # 6 * 72
|
||||
|
||||
def event_argument(self, event, index, field=None):
|
||||
if field is None:
|
||||
field = index
|
||||
|
||||
try:
|
||||
raw = event[index]
|
||||
p_type = self.__annotations__[field]
|
||||
except KeyError:
|
||||
return
|
||||
|
||||
if p_type.__name__ == 'boolean':
|
||||
setattr(self, field, raw == True or raw == 'true')
|
||||
if p_type.__name__ == 'float':
|
||||
setattr(self, field, float(raw))
|
||||
if p_type.__name__ == 'integer':
|
||||
setattr(self, field, int(raw))
|
||||
|
||||
|
||||
@dataclass
|
||||
class AutoSettings(Setting):
|
||||
margin: float = 7.2 # 0.1 * 72
|
||||
|
||||
|
||||
@dataclass
|
||||
class ManualSettings(Setting):
|
||||
start_x: float = 90
|
||||
start_y: float = 90
|
||||
scale_x: float = 1
|
||||
scale_y: float = 1
|
||||
rotate: int = 90
|
||||
|
||||
|
||||
messages = list()
|
||||
|
||||
def argument_default(event, index, default, cast):
|
||||
try:
|
||||
val = event[index]
|
||||
if cast == 'float':
|
||||
return float(val)
|
||||
if cast == 'integer':
|
||||
return int(val)
|
||||
if cast == 'boolean':
|
||||
return val == 'true'
|
||||
return val
|
||||
except KeyError:
|
||||
return default
|
||||
|
||||
|
||||
def extents_by_rectangle(page) -> tuple[int|None, int|None, int|None, int|None]:
|
||||
largest_area = None
|
||||
largest_rect = None
|
||||
|
||||
for rect in page.rects:
|
||||
area = rect["width"] * rect["height"]
|
||||
if largest_area is None or area > largest_area:
|
||||
largest_area = area
|
||||
largest_rect = rect
|
||||
|
||||
if largest_rect is None:
|
||||
return None, None, None, None
|
||||
|
||||
return largest_rect["x0"], largest_rect["y0"], largest_rect["x1"], largest_rect["y1"]
|
||||
|
||||
|
||||
def extents_by_image(page) -> tuple[int|None, int|None, int|None, int|None]:
|
||||
min_x = None
|
||||
min_y = None
|
||||
max_x = None
|
||||
max_y = None
|
||||
|
||||
for obj in page.images:
|
||||
if min_x is None or obj["x0"] < min_x:
|
||||
min_x = obj["x0"]
|
||||
if min_y is None or obj["y0"] < min_y:
|
||||
min_y = obj["y0"]
|
||||
if max_x is None or obj["x1"] > max_x:
|
||||
max_x = obj["x1"]
|
||||
if max_y is None or obj["y1"] > max_y:
|
||||
max_y = obj["y1"]
|
||||
|
||||
return min_x, min_y, max_x, max_y
|
||||
|
||||
|
||||
# def extents_by_all(page) -> tuple[int|None, int|None, int|None, int|None]:
|
||||
# min_x = None
|
||||
# min_y = None
|
||||
# max_x = None
|
||||
# max_y = None
|
||||
#
|
||||
# for obj in page.objects:
|
||||
# if obj == 'image':
|
||||
# x0, y0, x1, y1 = extents_by_image(page)
|
||||
# if x0 is None or y0 is None or x1 is None or y1 is None:
|
||||
# continue
|
||||
# if min_x is None or x0 < min_x:
|
||||
# min_x = x0
|
||||
# if min_y is None or y0 < min_y:
|
||||
# min_y = y0
|
||||
# if max_x is None or x1 > max_x:
|
||||
# max_x = x1
|
||||
# if max_y is None or y1 > max_y:
|
||||
# max_y = y1
|
||||
#
|
||||
# return min_x, min_y, max_x, max_y
|
||||
|
||||
|
||||
def auto_crop(content: io.BytesIO, settings: AutoSettings) -> io.BytesIO:
|
||||
messages.append("Using auto-crop mode")
|
||||
|
||||
extents_method = 'rectangle'
|
||||
with pdfplumber.open(content) as pdf:
|
||||
page = pdf.pages[0]
|
||||
min_x, min_y, max_x, max_y = extents_by_rectangle(page)
|
||||
if min_x is None or min_y is None or max_x is None or max_y is None:
|
||||
extents_method = 'image'
|
||||
min_x, min_y, max_x, max_y = extents_by_image(page)
|
||||
|
||||
if min_x is None or min_y is None or max_x is None or max_y is None:
|
||||
raise Exception("Unable to find the extents of the document")
|
||||
|
||||
messages.append(f"Used {extents_method} to find extents")
|
||||
|
||||
min_x = round(min_x - settings.margin)
|
||||
min_y = round(min_y - settings.margin)
|
||||
max_x = round(max_x + settings.margin)
|
||||
max_y = round(max_y + settings.margin)
|
||||
|
||||
width = max_x - min_x
|
||||
height = max_y - min_y
|
||||
messages.append(f"Document extents: {width}x{height}")
|
||||
|
||||
rotate = False
|
||||
if width > height:
|
||||
h = width
|
||||
width = height
|
||||
height = h
|
||||
rotate = True
|
||||
|
||||
scale = 1
|
||||
if width > settings.width or height > settings.height:
|
||||
x_scale = settings.width / width
|
||||
y_scale = settings.height / height
|
||||
scale = min(x_scale, y_scale)
|
||||
min_x = round((min_x * scale) - settings.margin)
|
||||
min_y = round((min_y * scale) - settings.margin)
|
||||
max_x = round((max_x * scale) + settings.margin)
|
||||
max_y = round((max_y * scale) + settings.margin)
|
||||
|
||||
reader = PdfReader(content)
|
||||
writer = PdfWriter()
|
||||
p1 = reader.pages[0]
|
||||
|
||||
p1.cropbox.upper_left = (min_x, min_y)
|
||||
p1.cropbox.upper_right = (max_x, min_y)
|
||||
p1.cropbox.lower_left = (min_x, max_y)
|
||||
p1.cropbox.lower_right = (max_x, max_y)
|
||||
|
||||
if rotate:
|
||||
p1.rotate(90)
|
||||
messages.append("Rotated 90 degrees")
|
||||
|
||||
if scale != 1:
|
||||
op = Transformation().scale(sx=scale, sy=scale)
|
||||
p1.add_transformation(op)
|
||||
messages.append(f"Scaled by {scale}")
|
||||
|
||||
writer.add_page(p1)
|
||||
buffer = io.BytesIO()
|
||||
writer.write(buffer)
|
||||
buffer.seek(0)
|
||||
|
||||
return buffer
|
||||
|
||||
|
||||
def manual_crop(content: io.BytesIO, settings: ManualSettings) -> io.BytesIO:
|
||||
messages.append("Using manual-crop mode")
|
||||
# Swap directions to be less confusing for inputs
|
||||
if settings.rotate == 90 or settings.rotate == -90:
|
||||
sx = settings.start_x
|
||||
settings.start_x = settings.start_y
|
||||
settings.start_y = sx
|
||||
h = settings.height
|
||||
settings.height = settings.width
|
||||
settings.width = h
|
||||
sy = settings.scale_y
|
||||
settings.scale_y = settings.scale_x
|
||||
settings.scale_x = sy
|
||||
|
||||
reader = PdfReader(content)
|
||||
writer = PdfWriter()
|
||||
p1 = reader.pages[0]
|
||||
|
||||
top = p1.mediabox.top - settings.start_y
|
||||
bottom = top - (settings.height * 72)
|
||||
left = settings.start_x
|
||||
right = left + (settings.width * 72)
|
||||
|
||||
p1.mediabox.upper_left = (left, top)
|
||||
p1.mediabox.upper_right = (right, top)
|
||||
p1.mediabox.lower_left = (left, bottom)
|
||||
p1.mediabox.lower_right = (right, bottom)
|
||||
|
||||
if settings.scale_x != 1 or settings.scale_y != 1:
|
||||
op = Transformation().scale(sx=settings.scale_x, sy=settings.scale_y)
|
||||
p1.add_transformation(op)
|
||||
messages.append(f"Scaled by {settings.scale_x}x{settings.scale_y}")
|
||||
|
||||
if settings.rotate != 0:
|
||||
writer.add_page(p1.rotate(settings.rotate))
|
||||
messages.append(f"Rotated {settings.rotate} degrees")
|
||||
else:
|
||||
writer.add_page(p1)
|
||||
|
||||
buffer = io.BytesIO()
|
||||
writer.write(buffer)
|
||||
buffer.seek(0)
|
||||
|
||||
return buffer
|
||||
54
lambda_function.py
Normal file
54
lambda_function.py
Normal file
@ -0,0 +1,54 @@
|
||||
# Crop a single page PDF document.
|
||||
# This is specifically designed for the letter size UPS shipping label that needs to be cropped for printing.
|
||||
|
||||
import base64
|
||||
import io
|
||||
from cropper import AutoSettings, ManualSettings, argument_default, auto_crop, manual_crop, messages
|
||||
from typing import Dict, Any
|
||||
|
||||
|
||||
def lambda_handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
|
||||
messages.clear()
|
||||
content = io.BytesIO(base64.b64decode(event['b64_content']))
|
||||
mode = argument_default(event, 'mode', 'manual', 'string')
|
||||
messages.append(f"Log stream name is {context.log_stream_name}")
|
||||
|
||||
if mode == 'auto':
|
||||
settings = AutoSettings()
|
||||
settings.event_argument(event, 'width')
|
||||
settings.event_argument(event, 'height')
|
||||
settings.event_argument(event, 'margin')
|
||||
|
||||
try:
|
||||
buffer = auto_crop(content, settings)
|
||||
except Exception as e:
|
||||
return {
|
||||
'statusCode': 400,
|
||||
'error': str(e),
|
||||
'messages': messages,
|
||||
}
|
||||
elif mode == 'manual':
|
||||
settings = ManualSettings()
|
||||
settings.event_argument(event, 'start_x')
|
||||
settings.event_argument(event, 'start_y')
|
||||
settings.event_argument(event, 'width')
|
||||
settings.event_argument(event, 'height')
|
||||
settings.event_argument(event, 'scale_x')
|
||||
settings.event_argument(event, 'scale_y')
|
||||
settings.event_argument(event, 'rotate')
|
||||
|
||||
buffer = manual_crop(content, settings)
|
||||
else:
|
||||
return {
|
||||
'statusCode': 400,
|
||||
'error': 'Invalid mode: (manual|auto)',
|
||||
'messages': messages,
|
||||
}
|
||||
|
||||
data = base64.b64encode(buffer.getvalue())
|
||||
|
||||
return {
|
||||
'statusCode': 200,
|
||||
'body': data,
|
||||
'messages': messages,
|
||||
}
|
||||
13
local-invoke.py
Normal file
13
local-invoke.py
Normal file
@ -0,0 +1,13 @@
|
||||
from lambda_function import lambda_handler
|
||||
import sys
|
||||
import json
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python test.py <payload>")
|
||||
sys.exit(1)
|
||||
|
||||
payload = sys.argv[1]
|
||||
event = json.loads(payload)
|
||||
|
||||
response = lambda_handler(event, None)
|
||||
print(json.dumps(response))
|
||||
Loading…
x
Reference in New Issue
Block a user