Explore

PDF Plumber

import pip

pip.main(['install', 'pdfplumber', '--trusted-host', 'pypi.org', '--trusted-host', 'files.pythonhosted.org'])

import pdfplumber

r"C:\Users\hurleyk\Downloads\sample.pdf"

import pdfplumber

# Path to your PDF file - update with your actual username & filename

pdf_path = r"C:\Users\hurleyk\Downloads\sample.pdf"

try:

with pdfplumber.open(pdf_path) as pdf:

print(f" PDF loaded successfully!")

print(f"Total pages: {len(pdf.pages)}")

# Loop through all pages

for i, page in enumerate(pdf.pages):

text = page.extract_text()

print(f"\n--- Page {i+1} ---\n")

print(text if text else "[No extractable text found]")

except Exception as e:

print(" Error:", e)

# ─── Read incoming PDF paths ─────────────────────────────────

df_in = Alteryx.read("#1")

out_rows = []

for _, row in df_in.iterrows():

pdf_path = row['PDF_Path']

base, _ = os.path.splitext(pdf_path)

txt_out = base + '.txt'

# If .txt already exists, skip

if os.path.exists(txt_out):

status = f"Skipped: TXT already exists → {txt_out}"

else:

try:

with pdfplumber.open(pdf_path) as pdf:

full_text = ""

for page in pdf.pages:

text = page.extract_text()

if text:

full_text += text + "\n\n"

# Write to TXT file

with open(txt_out, 'w', encoding='utf-8') as f:

f.write(full_text.strip() or f"[No text extracted from {os.path.basename(pdf_path)}]")

status = f"Success: Created {txt_out}"

except Exception as e:

txt_out = ""

status = f"Error: {e}"

out_rows.append({

"PDF_Path": pdf_path,

"Text_Path": txt_out,

"Status": status

})

# ─── Output results to Alteryx ───────────────────────────────

df_out = pd.DataFrame(out_rows)

Alteryx.write(df_out, 1)

Want to print your doc?
This is not the way.

Try clicking the ··· in the right corner or using a keyboard shortcut (

CtrlP

) instead.