Spaces:
Sleeping
Sleeping
| import cv2 | |
| import re | |
| import os | |
| import glob | |
| import sys | |
| from fpdf import FPDF | |
| from pdf2image import convert_from_path | |
| from PIL import Image as PILImage | |
| class Converter: | |
| def __init__(self): | |
| self.a4_w_mm = 210 | |
| self.a4_h_mm = 297 | |
| self.dpi = 200.0 | |
| def invert_image(self, i_input, i_output): | |
| """Inverts a given image.""" | |
| image = cv2.imread(i_input) | |
| print("Inverting image: {}".format(i_input)) | |
| if image is None: | |
| print("Error reading image: {}".format(i_input)) | |
| return | |
| image = ~image | |
| cv2.imwrite(i_output, image) | |
| def pdf_to_img_all(self, file_path, o_dir): | |
| """Converts all PDF pages to JPEG images.""" | |
| if not os.path.exists(o_dir): | |
| os.makedirs(o_dir) | |
| pages = convert_from_path(file_path, dpi=self.dpi) | |
| for i, image in enumerate(pages): | |
| output_path = os.path.join(o_dir, f"{i+1}.jpeg") | |
| image.save(output_path, 'JPEG', quality=95) | |
| print("Saved image: {}".format(output_path)) | |
| def get_scaled_dimensions(self, width_pixels, height_pixels): | |
| """Calculate scaled dimensions maintaining aspect ratio.""" | |
| width_ratio = width_pixels / height_pixels | |
| if width_ratio > 1: # Landscape | |
| w = self.a4_h_mm | |
| h = self.a4_h_mm / width_ratio | |
| return 'L', w, h | |
| else: # Portrait | |
| h = self.a4_h_mm | |
| w = self.a4_h_mm * width_ratio | |
| return 'P', w, h | |
| def img_to_pdf(self, i_dir, o_dir, filename, invert_pages): | |
| """Combines images into PDF, inverting specified pages.""" | |
| pdf = FPDF(unit="mm", format='A4') | |
| pdf.set_auto_page_break(auto=False, margin=0) | |
| pdf.set_margins(0, 0, 0) | |
| # Collect and sort image files | |
| filepaths = [] | |
| for filepath in glob.iglob(os.path.join(i_dir, '*.jpeg')): | |
| filepaths.append(filepath) | |
| pages = [] | |
| for path in filepaths: | |
| base = os.path.basename(path) | |
| match = re.search(r'(\d+)\.jpeg', base) | |
| if match: | |
| pages.append((int(match.group(1)), path)) | |
| pages.sort(key=lambda x: x[0]) | |
| # Process each page | |
| for page_num, img_path in pages: | |
| if page_num in invert_pages: | |
| self.invert_image(img_path, img_path) | |
| else: | |
| print("Keeping original image: {}".format(img_path)) | |
| with PILImage.open(img_path) as img: | |
| width_pixels, height_pixels = img.size | |
| # Get scaled dimensions and orientation | |
| orientation, w, h = self.get_scaled_dimensions(width_pixels, height_pixels) | |
| # Add page with proper orientation | |
| pdf.add_page(orientation=orientation) | |
| # Center image on page | |
| x = (self.a4_w_mm if orientation == 'P' else self.a4_h_mm - w) / 2 | |
| y = (self.a4_h_mm if orientation == 'P' else self.a4_w_mm - h) / 2 | |
| # Place image | |
| pdf.image(img_path, x=x, y=y, w=w, h=h) | |
| print(f"Added {img_path} to PDF ({orientation})") | |
| # Save output | |
| if not os.path.exists(o_dir): | |
| os.makedirs(o_dir) | |
| output_pdf = os.path.join(o_dir, filename) | |
| pdf.output(output_pdf, "F") | |
| print("Generated PDF: {}".format(output_pdf)) | |
| def parse_page_ranges(range_str): | |
| """Converts range string to list of page numbers.""" | |
| pages = set() | |
| parts = range_str.split(',') | |
| for part in parts: | |
| part = part.strip() | |
| if '-' in part: | |
| start, end = part.split('-') | |
| pages.update(range(int(start), int(end) + 1)) | |
| else: | |
| pages.add(int(part)) | |
| return sorted(pages) | |
| if __name__ == "__main__": | |
| converter = Converter() | |
| pdf_file = 'input.pdf' | |
| img_dir = 'images' | |
| output_dir = 'output' | |
| output_pdf_name = 'result.pdf' | |
| page_range_str = "1-12,14-20,22-32,56,66-78,82-97" | |
| pages_to_invert = parse_page_ranges(page_range_str) | |
| converter.pdf_to_img_all(pdf_file, img_dir) | |
| converter.img_to_pdf(img_dir, output_dir, output_pdf_name, pages_to_invert) |