icon picker
PDF Plumber

import pip
pip.main(['install', 'pdfplumber', '--trusted-host', 'pypi.org', '--trusted-host', 'files.pythonhosted.org'])
import pdfplumber

r"C:\Users\hurleyk\Downloads\sample.pdf"
import pdfplumber
# Path to your PDF file - update with your actual username & filename
pdf_path = r"C:\Users\hurleyk\Downloads\sample.pdf"
try:
with pdfplumber.open(pdf_path) as pdf:
print(f" PDF loaded successfully!")
print(f"Total pages: {len(pdf.pages)}")
# Loop through all pages
for i, page in enumerate(pdf.pages):
text = page.extract_text()
print(f"\n--- Page {i+1} ---\n")
print(text if text else "[No extractable text found]")
except Exception as e:
print(" Error:", e)


# ─── Read incoming PDF paths ─────────────────────────────────
df_in = Alteryx.read("#1")
out_rows = []

for _, row in df_in.iterrows():
pdf_path = row['PDF_Path']
base, _ = os.path.splitext(pdf_path)
txt_out = base + '.txt'

# If .txt already exists, skip
if os.path.exists(txt_out):
status = f"Skipped: TXT already exists → {txt_out}"
else:
try:
with pdfplumber.open(pdf_path) as pdf:
full_text = ""
for page in pdf.pages:
text = page.extract_text()
if text:
full_text += text + "\n\n"

# Write to TXT file
with open(txt_out, 'w', encoding='utf-8') as f:
f.write(full_text.strip() or f"[No text extracted from {os.path.basename(pdf_path)}]")

status = f"Success: Created {txt_out}"
except Exception as e:
txt_out = ""
status = f"Error: {e}"

out_rows.append({
"PDF_Path": pdf_path,
"Text_Path": txt_out,
"Status": status
})

# ─── Output results to Alteryx ───────────────────────────────
df_out = pd.DataFrame(out_rows)
Alteryx.write(df_out, 1)


Want to print your doc?
This is not the way.
Try clicking the ⋯ next to your doc name or using a keyboard shortcut (
CtrlP
) instead.