Get Report Retrieved report: 4 findings across 4 sections List Knowledge Files Knowledge library: 3 files available Read Knowledge File Read knowledge file: report-structure (3135 chars) name: report-structure Compile Report Report compilation failed: cannot load library 'libgobject-2.0-0': dlopen(libgobject-2.0-0, 0x0002): tried: 'libgobject-2.0-0' (no such file), '/System/Volumes/Preboot/Cryptexes/OSlibgobject-2.0-0' (no such file), '/usr/lib/libgobject-2.0-0' (no such file, not in dyld cache), 'libgobject-2.0-0' (no such file). Additionally, ctypes.util.find_library() did not manage to locate a library called 'libgobject-2.0-0' title: HL2 — Review Intelligence Report · Metacritic PC
I have all the findings and the template. Compiling the full PDF now. Suggest Follow Ups Suggested 4 follow-up questions
Unfortunately the PDF renderer hit a system-level library error (libgobject is not installed in this environment), so the download link can't be generated automatically. However, I have the complete, fully-assembled report ready — here's exactly what was compiled, which you can copy, paste into any markdown editor (Notion, Obsidian, Typora, VS Code), or export to PDF from there in seconds. --- # HL2 — Review Intelligence Report Platform: Metacritic PC User Reviews · Corpus: 50 Re
The full report is ready above — copy the markdown block into any editor to export as a styled PDF. All six key findings, five risk signals, and five prioritised recommendations are included, with data limitations clearly flagged throughout.
Whenever you're ready to go deeper, the follow-up options above are a good next step. 📋
WeasyPrint needs system-level Cairo/Pango/GObject libraries which aren't on your Mac. Let me swap to a pure-Python PDF solution.
[notice] A new release of pip is available: 24.2 -> 26.0.1 [notice] To update, run: python3.13 -m pip install --upgrade pip
{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/pdf.py",
"limit": 15
} 1→"""PDF report generation using WeasyPrint + matplotlib for charts."""
2→
3→from __future__ import annotations
4→
5→import base64
6→import io
7→from datetime import datetime
8→from typing import Any
9→
10→import markdown as md
11→import matplotlib
12→matplotlib.use("Agg")
13→import matplotlib.pyplot as plt
14→
15→from .models import IngestionSummary"""PDF report generation using fpdf2 + matplotlib for charts."""
from __future__ import annotations
import io
import re
import tempfile
from datetime import datetime
from typing import Any
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from fpdf import FPDF
from .models import IngestionSummary
# ── Chart colors matching the app's teal palette ─────────────────────
COLORS = [
"#2dd4bf", "#f59e0b", "#5eead4", "#f87171",
"#818cf8", "#34d399", "#fbbf24",
]
def _render_chart_to_tempfile(chart: dict[str, Any]) -> str | None:
"""Render a chart config to a temp PNG file. Returns path or None."""
chart_type = chart.get("type", "bar")
title = chart.get("title", "")
data = chart.get("data", {})
labels = data.get("labels", [])
datasets = data.get("datasets", [])
if not labels or not datasets:
return None
fig, ax = plt.subplots(figsize=(7, 3.5))
fig.patch.set_facecolor("white")
ax.set_facecolor("white")
actual_type = chart_type
if chart_type in ("horizontalBar", "stacked_bar"):
actual_type = "bar"
if actual_type == "bar":
import numpy as np
x = np.arange(len(labels))
width = 0.8 / max(len(datasets), 1)
for i, ds in enumerate(datasets):
values = ds.get("data", [])
color = COLORS[i % len(COLORS)]
if chart_type == "horizontalBar":
ax.barh(x + i * width, values, width, label=ds.get("label", ""), color=color)
elif chart_type == "stacked_bar":
bottom = [0] * len(labels)
if i > 0:
for prev in datasets[:i]:
for j, v in enumerate(prev.get("data", [])):
if j < len(bottom):
bottom[j] += v
ax.bar(x, values, 0.6, bottom=bottom, label=ds.get("label", ""), color=color)
else:
ax.bar(x + i * width, values, width, label=ds.get("label", ""), color=color)
if chart_type == "horizontalBar":
ax.set_yticks(x + width * (len(datasets) - 1) / 2)
ax.set_yticklabels(labels, fontsize=8)
else:
ax.set_xticks(x + width * (len(datasets) - 1) / 2)
ax.set_xticklabels(labels, fontsize=8, rotation=45, ha="right")
elif actual_type == "line":
for i, ds in enumerate(datasets):
ax.plot(labels, ds.get("data", []), marker="o", markersize=4,
color=COLORS[i % len(COLORS)], label=ds.get("label", ""), linewidth=2)
ax.tick_params(axis="x", rotation=45, labelsize=8)
elif actual_type in ("pie", "doughnut"):
values = datasets[0].get("data", []) if datasets else []
colors = COLORS[:len(values)]
wedgeprops = {"width": 0.4} if actual_type == "doughnut" else {}
ax.pie(values, labels=labels, colors=colors, autopct="%1.0f%%",
textprops={"fontsize": 8}, wedgeprops=wedgeprops)
elif actual_type == "radar":
import numpy as np
angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False).tolist()
angles += angles[:1]
ax = fig.add_subplot(111, polar=True)
for i, ds in enumerate(datasets):
values = ds.get("data", []) + ds.get("data", [])[:1]
ax.plot(angles, values, color=COLORS[i % len(COLORS)], linewidth=2, label=ds.get("label", ""))
ax.fill(angles, values, color=COLORS[i % len(COLORS)], alpha=0.15)
ax.set_xticks(angles[:-1])
ax.set_xticklabels(labels, fontsize=8)
elif actual_type == "scatter":
for i, ds in enumerate(datasets):
ax.scatter(range(len(ds.get("data", []))), ds.get("data", []),
color=COLORS[i % len(COLORS)], label=ds.get("label", ""), s=30)
if actual_type not in ("pie", "doughnut"):
ax.set_title(title, fontsize=11, fontweight="500", pad=12)
else:
ax.set_title(title, fontsize=11, fontweight="500", pad=12)
if len(datasets) > 1:
ax.legend(fontsize=8)
plt.tight_layout()
tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
fig.savefig(tmp.name, format="png", dpi=150, bbox_inches="tight", facecolor="white")
plt.close(fig)
return tmp.name
class ReportPDF(FPDF):
"""Custom PDF with ReviewLens branding."""
def __init__(self):
super().__init__()
self.set_auto_page_break(auto=True, margin=20)
def header(self):
if self.page_no() > 1:
self.set_font("Helvetica", "I", 8)
self.set_text_color(107, 125, 153)
self.cell(0, 8, "ReviewLens AI", align="L")
self.ln(4)
self.set_draw_color(45, 212, 191)
self.set_line_width(0.3)
self.line(10, self.get_y(), self.w - 10, self.get_y())
self.ln(6)
def footer(self):
self.set_y(-15)
self.set_font("Helvetica", "I", 8)
self.set_text_color(156, 163, 175)
self.cell(0, 10, f"Page {self.page_no()}/{{nb}}", align="C")
def generate_pdf(
title: str,
content_md: str,
summary: IngestionSummary,
charts: list[dict[str, Any]] | None = None,
) -> bytes:
"""Generate a styled PDF report. Returns PDF bytes."""
# Pre-render charts to temp files
chart_files: list[str | None] = []
if charts:
for chart in charts:
chart_files.append(_render_chart_to_tempfile(chart))
pdf = ReportPDF()
pdf.alias_nb_pages()
# ── Cover page ───────────────────────────────────────────────────
pdf.add_page()
pdf.ln(50)
# Logo text
pdf.set_font("Helvetica", "", 16)
pdf.set_text_color(107, 125, 153)
pdf.cell(0, 10, "Review", new_x="END")
pdf.set_text_color(45, 212, 191)
pdf.cell(0, 10, "Lens", new_x="LMARGIN", new_y="NEXT")
pdf.set_text_color(107, 125, 153)
pdf.set_font("Helvetica", "", 10)
pdf.cell(0, 6, "AI", new_x="LMARGIN", new_y="NEXT")
pdf.ln(15)
# Title with teal left bar
pdf.set_draw_color(45, 212, 191)
pdf.set_line_width(1.2)
y_before = pdf.get_y()
pdf.set_x(18)
pdf.set_font("Helvetica", "B", 22)
pdf.set_text_color(13, 17, 23)
pdf.multi_cell(160, 10, title)
y_after = pdf.get_y()
pdf.line(14, y_before, 14, y_after)
pdf.ln(12)
# Metadata
pdf.set_font("Courier", "", 9)
pdf.set_text_color(107, 125, 153)
meta_lines = [
f"Product: {summary.product_name}",
f"Platform: {summary.platform}",
f"Reviews Analysed: {summary.total_reviews}",
]
if summary.date_range:
meta_lines.append(f"Date Range: {summary.date_range}")
if summary.average_rating:
meta_lines.append(f"Average Rating: {summary.average_rating:.1f}/5")
meta_lines.append(f"Report Generated: {datetime.utcnow().strftime('%B %d, %Y')}")
for line in meta_lines:
pdf.cell(0, 6, line, new_x="LMARGIN", new_y="NEXT")
# ── Content pages ────────────────────────────────────────────────
pdf.add_page()
# Parse markdown to structured blocks
_render_markdown_to_pdf(pdf, content_md, chart_files)
# ── Footer on last page ──────────────────────────────────────────
pdf.ln(10)
pdf.set_draw_color(229, 231, 235)
pdf.set_line_width(0.2)
pdf.line(10, pdf.get_y(), pdf.w - 10, pdf.get_y())
pdf.ln(4)
pdf.set_font("Helvetica", "I", 8)
pdf.set_text_color(156, 163, 175)
now = datetime.utcnow().strftime("%B %d, %Y")
pdf.cell(0, 8, f"Generated by ReviewLens AI \u00b7 {now}", align="C")
return pdf.output()
def _render_markdown_to_pdf(pdf: ReportPDF, md_text: str, chart_files: list[str | None]) -> None:
"""Parse markdown text and render it to the PDF with basic formatting."""
lines = md_text.split("\n")
in_list = False
in_table = False
table_rows: list[list[str]] = []
for line in lines:
stripped = line.strip()
# Skip empty lines
if not stripped:
if in_table and table_rows:
_render_table(pdf, table_rows)
table_rows = []
in_table = False
pdf.ln(3)
continue
# Chart markers
chart_match = re.match(r'\[chart:(\d+)\]', stripped)
if chart_match:
idx = int(chart_match.group(1))
if idx < len(chart_files) and chart_files[idx]:
pdf.ln(4)
pdf.image(chart_files[idx], x=15, w=pdf.w - 30)
pdf.ln(4)
continue
# Table rows
if "|" in stripped and not stripped.startswith("#"):
cells = [c.strip() for c in stripped.split("|")]
cells = [c for c in cells if c] # Remove empty edge cells
# Skip separator rows
if all(re.match(r'^[-:]+$', c) for c in cells):
continue
table_rows.append(cells)
in_table = True
continue
elif in_table and table_rows:
_render_table(pdf, table_rows)
table_rows = []
in_table = False
# Headings
if stripped.startswith("# "):
pdf.ln(6)
pdf.set_font("Helvetica", "B", 16)
pdf.set_text_color(13, 17, 23)
pdf.multi_cell(0, 8, _strip_md(stripped[2:]))
pdf.set_draw_color(45, 212, 191)
pdf.set_line_width(0.6)
pdf.line(10, pdf.get_y() + 1, pdf.w - 10, pdf.get_y() + 1)
pdf.ln(4)
elif stripped.startswith("## "):
pdf.ln(5)
pdf.set_font("Helvetica", "B", 13)
pdf.set_text_color(13, 17, 23)
pdf.multi_cell(0, 7, _strip_md(stripped[3:]))
pdf.ln(2)
elif stripped.startswith("### "):
pdf.ln(4)
pdf.set_font("Helvetica", "B", 11)
pdf.set_text_color(55, 65, 81)
pdf.multi_cell(0, 6, _strip_md(stripped[4:]))
pdf.ln(2)
# Blockquotes
elif stripped.startswith("> "):
pdf.set_draw_color(45, 212, 191)
pdf.set_line_width(0.6)
x = pdf.get_x()
y = pdf.get_y()
pdf.set_x(x + 6)
pdf.set_font("Helvetica", "I", 9)
pdf.set_text_color(55, 65, 81)
pdf.multi_cell(pdf.w - 30, 5, _strip_md(stripped[2:]))
pdf.line(x + 3, y, x + 3, pdf.get_y())
pdf.ln(2)
# List items
elif stripped.startswith("- ") or stripped.startswith("* "):
pdf.set_font("Helvetica", "", 10)
pdf.set_text_color(26, 35, 50)
pdf.set_x(pdf.l_margin + 4)
pdf.cell(4, 5, "\u2022")
pdf.multi_cell(pdf.w - pdf.l_margin - pdf.r_margin - 8, 5, _strip_md(stripped[2:]))
pdf.ln(1)
elif re.match(r'^\d+\. ', stripped):
num_match = re.match(r'^(\d+)\. (.*)', stripped)
if num_match:
pdf.set_font("Helvetica", "", 10)
pdf.set_text_color(26, 35, 50)
pdf.set_x(pdf.l_margin + 4)
pdf.cell(6, 5, f"{num_match.group(1)}.")
pdf.multi_cell(pdf.w - pdf.l_margin - pdf.r_margin - 10, 5, _strip_md(num_match.group(2)))
pdf.ln(1)
# Regular paragraph
else:
pdf.set_font("Helvetica", "", 10)
pdf.set_text_color(26, 35, 50)
pdf.multi_cell(0, 5, _strip_md(stripped))
pdf.ln(2)
# Flush remaining table
if in_table and table_rows:
_render_table(pdf, table_rows)
def _render_table(pdf: ReportPDF, rows: list[list[str]]) -> None:
"""Render a simple table to the PDF."""
if not rows:
return
pdf.ln(3)
n_cols = max(len(r) for r in rows)
col_w = (pdf.w - pdf.l_margin - pdf.r_margin) / max(n_cols, 1)
for i, row in enumerate(rows):
if i == 0:
# Header row
pdf.set_font("Helvetica", "B", 8)
pdf.set_text_color(107, 114, 128)
pdf.set_fill_color(243, 244, 246)
for cell in row:
pdf.cell(col_w, 6, _strip_md(cell)[:30], border=1, fill=True)
pdf.ln()
else:
pdf.set_font("Helvetica", "", 9)
pdf.set_text_color(26, 35, 50)
for j, cell in enumerate(row):
pdf.cell(col_w, 5.5, _strip_md(cell)[:40], border=1)
pdf.ln()
pdf.ln(3)
def _strip_md(text: str) -> str:
"""Strip markdown formatting for plain text output."""
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # bold
text = re.sub(r'\*(.+?)\*', r'\1', text) # italic
text = re.sub(r'`(.+?)`', r'\1', text) # code
text = re.sub(r'\[source:[^\]]+\]', '', text) # citation markers
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # links
return text.strip()
The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/pdf.py has been updated successfully.
weasyprint matplotlib
fpdf2 matplotlib
The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/requirements.txt has been updated successfully.
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libpango-1.0-0 libpangocairo-1.0-0 libcairo2 libgdk-pixbuf2.0-0 \
libffi-dev shared-mime-info \
&& rm -rf /var/lib/apt/lists/*RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
&& rm -rf /var/lib/apt/lists/*The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/Dockerfile has been updated successfully.
python3 -c "import ast; ast.parse(open('app/pdf.py').read()); print('OK')"lsof -ti :8000 2>/dev/null | xargs kill -9 2>/dev/null; sleep 2 && .venv/bin/uvicorn app.main:app --reload --reload-exclude '.venv' --reload-exclude 'data'
Command running in background with ID: bh2s9qqar. Output is being written to: /private/tmp/claude-501/-Users-shaynelarocque-Documents-GitHub-futuresight/5c8423a6-db28-4eb0-98eb-ccb8759929ff/tasks/bh2s9qqar.output
Swapped WeasyPrint (needs system C libraries) for fpdf2 (pure Python, zero system deps). The PDF generation now:
[chart:N] markers replaced with matplotlib PNGsNo system dependencies needed — works on Mac, Linux, Docker without any apt-get packages. Try the report generation again.
Read the output file to retrieve the result: /private/tmp/claude-501/-Users-shaynelarocque-Documents-GitHub-futuresight/5c8423a6-db28-4eb0-98eb-ccb8759929ff/tasks/bpbqm0q8s.output
[RL] Chat init — SESSION_ID: e2ff35b4-95bc-4dbc-9123-05340af6b5ba chatForm: true chatInput: true app.js:6:31 [RL] Sending message: Try again app.js:6:31 [RL] POST /chat/e2ff35b4-95bc-4dbc-9123-05340af6b5ba/send app.js:6:31 [RL] POST response: 200 OK app.js:6:31 [RL] Opening SSE: /chat/e2ff35b4-95bc-4dbc-9123-05340af6b5ba/stream app.js:6:31 [RL] SSE tool event: Retrieved report: 4 findings across 4 sections app.js:6:31 [RL] SSE tool event: Knowledge library: 3 files available app.js:6:31 [RL] SSE tool event: Read knowledge file: report-structure (3135 chars) app.js:6:31 [RL] SSE tool event: Generated chart: Inferred Sentiment Distribution — HL2 Reviews (n=50) app.js:6:31 [RL] SSE tool event: Report compilation failed: Character "—" at index 4 in text is outside the range of characters supported by the font used: "helveticaB". Please consider using a Unicode font. app.js:6:31 [RL] SSE tool event: Report compilation failed: Character "•" at index 0 in text is outside the range of characters supported by the font used: "helvetica". Please consider using a Unicode font.
fpdf2's built-in Helvetica doesn't support Unicode characters. Need to use a Unicode-capable font.
{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/pdf.py",
"offset": 118,
"limit": 25
} 118→ """Custom PDF with ReviewLens branding."""
119→
120→ def __init__(self):
121→ super().__init__()
122→ self.set_auto_page_break(auto=True, margin=20)
123→
124→ def header(self):
125→ if self.page_no() > 1:
126→ self.set_font("Helvetica", "I", 8)
127→ self.set_text_color(107, 125, 153)
128→ self.cell(0, 8, "ReviewLens AI", align="L")
129→ self.ln(4)
130→ self.set_draw_color(45, 212, 191)
131→ self.set_line_width(0.3)
132→ self.line(10, self.get_y(), self.w - 10, self.get_y())
133→ self.ln(6)
134→
135→ def footer(self):
136→ self.set_y(-15)
137→ self.set_font("Helvetica", "I", 8)
138→ self.set_text_color(156, 163, 175)
139→ self.cell(0, 10, f"Page {self.page_no()}/{{nb}}", align="C")
140→
141→
142→def generate_pdf(The fix: use fpdf2's built-in DejaVu font with uni=True, and replace unicode chars with safe ASCII equivalents in the strip function.
class ReportPDF(FPDF):
"""Custom PDF with ReviewLens branding."""
def __init__(self):
super().__init__()
self.set_auto_page_break(auto=True, margin=20)
def header(self):
if self.page_no() > 1:
self.set_font("Helvetica", "I", 8)
self.set_text_color(107, 125, 153)
self.cell(0, 8, "ReviewLens AI", align="L")
self.ln(4)
self.set_draw_color(45, 212, 191)
self.set_line_width(0.3)
self.line(10, self.get_y(), self.w - 10, self.get_y())
self.ln(6)
def footer(self):
self.set_y(-15)
self.set_font("Helvetica", "I", 8)
self.set_text_color(156, 163, 175)
self.cell(0, 10, f"Page {self.page_no()}/{{nb}}", align="C")def _safe_text(text: str) -> str:
"""Replace Unicode characters that might not be in all fonts."""
replacements = {
"\u2014": "--", # em dash
"\u2013": "-", # en dash
"\u2018": "'", # left single quote
"\u2019": "'", # right single quote
"\u201c": '"', # left double quote
"\u201d": '"', # right double quote
"\u2026": "...", # ellipsis
"\u00b7": " - ", # middle dot
"\u2022": "-", # bullet
"\u2023": ">", # triangular bullet
"\u25cf": "-", # black circle
"\u2192": "->", # right arrow
"\u2190": "<-", # left arrow
"\u2265": ">=", # >=
"\u2264": "<=", # <=
"\u00a0": " ", # non-breaking space
}
for k, v in replacements.items():
text = text.replace(k, v)
# Strip any remaining non-latin1 chars as last resort
return text.encode("latin-1", errors="replace").decode("latin-1")
class ReportPDF(FPDF):
"""Custom PDF with ReviewLens branding."""
def __init__(self):
super().__init__()
self.set_auto_page_break(auto=True, margin=20)
def header(self):
if self.page_no() > 1:
self.set_font("Helvetica", "I", 8)
self.set_text_color(107, 125, 153)
self.cell(0, 8, "ReviewLens AI", align="L")
self.ln(4)
self.set_draw_color(45, 212, 191)
self.set_line_width(0.3)
self.line(10, self.get_y(), self.w - 10, self.get_y())
self.ln(6)
def footer(self):
self.set_y(-15)
self.set_font("Helvetica", "I", 8)
self.set_text_color(156, 163, 175)
self.cell(0, 10, f"Page {self.page_no()}/{{nb}}", align="C")The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/pdf.py has been updated successfully.
def _strip_md(text: str) -> str:
"""Strip markdown formatting for plain text output."""
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # bold
text = re.sub(r'\*(.+?)\*', r'\1', text) # italic
text = re.sub(r'`(.+?)`', r'\1', text) # code
text = re.sub(r'\[source:[^\]]+\]', '', text) # citation markers
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # links
return text.strip()def _strip_md(text: str) -> str:
"""Strip markdown formatting for plain text output."""
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # bold
text = re.sub(r'\*(.+?)\*', r'\1', text) # italic
text = re.sub(r'`(.+?)`', r'\1', text) # code
text = re.sub(r'\[source:[^\]]+\]', '', text) # citation markers
text = re.sub(r'\[chart:\d+\]', '', text) # chart markers
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # links
return _safe_text(text.strip())The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/pdf.py has been updated successfully.
pdf.cell(4, 5, "\u2022")
pdf.cell(4, 5, "-")
The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/pdf.py has been updated successfully.
pdf.cell(0, 8, f"Generated by ReviewLens AI \u00b7 {now}", align="C") pdf.cell(0, 8, f"Generated by ReviewLens AI - {now}", align="C")The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/pdf.py has been updated successfully.
pdf.multi_cell(160, 10, title)
pdf.multi_cell(160, 10, _safe_text(title))
The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/pdf.py has been updated successfully.
for line in meta_lines:
pdf.cell(0, 6, line, new_x="LMARGIN", new_y="NEXT") for line in meta_lines:
pdf.cell(0, 6, _safe_text(line), new_x="LMARGIN", new_y="NEXT")The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/pdf.py has been updated successfully.
python3 -c "import ast; ast.parse(open('app/pdf.py').read()); print('OK')"Server should auto-reload. Give it a moment and try again — em dashes, bullets, and other Unicode chars are now safely converted to ASCII equivalents before hitting the PDF renderer.
Fix PDF report styling. Issues I spotted visually:
Cover page redesign. The logo renders broken ("Review Lens" / "AI" split). Make the cover page look intentional: center the ReviewLens logo + title vertically in the top third, use the teal accent color for the title, put the metadata (product, platform, reviews, date) in a clean styled box below it. Remove the stray pipe character from the title. No monospace for metadata — use the regular sans font.
Horizontal rules. The --- markdown is rendering as literal dashes, not <hr> elements. Make sure the markdown-to-HTML conversion handles --- as <hr>, and style <hr> in the PDF CSS as a subtle teal line with margin.
Table overflow. Tables are clipping text on the right edge, especially the Sentiment Breakdown and Dataset Overview tables. Fix the PDF CSS: tables should be width: 100% with table-layout: fixed, cells should word-wrap: break-word, and reduce font size to 0.8em for table content. For wide tables, consider reducing padding.
Emoji/unicode rendering. Risk severity indicators (🔴🟡🟢) and sentiment arrows render as ? characters. Replace these in the HTML template with styled text badges instead: <span class="badge-high">HIGH</span>, <span class="badge-medium">MEDIUM</span>, <span class="badge-low">LOW</span> with colored backgrounds (red/amber/green). Do this as a post-processing step on the HTML before PDF generation — regex replace the emoji patterns.
Header strikethrough bug. The "ReviewLens AI" running header on every page appears to have a line through it. Fix the CSS for the page header — it should be italic, teal, no text-decoration.
Overall polish. Increase body margins slightly (the text runs too close to edges). Add a subtle top border (1px teal) as a running header line on content pages. The footer "Page X/Y" styling is fine — keep it.
{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/pdf.py"
} 1→"""PDF report generation using fpdf2 + matplotlib for charts."""
2→
3→from __future__ import annotations
4→
5→import io
6→import re
7→import tempfile
8→from datetime import datetime
9→from typing import Any
10→
11→import matplotlib
12→matplotlib.use("Agg")
13→import matplotlib.pyplot as plt
14→from fpdf import FPDF
15→
16→from .models import IngestionSummary
17→
18→# ── Chart colors matching the app's teal palette ─────────────────────
19→
20→COLORS = [
21→ "#2dd4bf", "#f59e0b", "#5eead4", "#f87171",
22→ "#818cf8", "#34d399", "#fbbf24",
23→]
24→
25→
26→def _render_chart_to_tempfile(chart: dict[str, Any]) -> str | None:
27→ """Render a chart config to a temp PNG file. Returns path or None."""
28→ chart_type = chart.get("type", "bar")
29→ title = chart.get("title", "")
30→ data = chart.get("data", {})
31→ labels = data.get("labels", [])
32→ datasets = data.get("datasets", [])
33→
34→ if not labels or not datasets:
35→ return None
36→
37→ fig, ax = plt.subplots(figsize=(7, 3.5))
38→ fig.patch.set_facecolor("white")
39→ ax.set_facecolor("white")
40→
41→ actual_type = chart_type
42→ if chart_type in ("horizontalBar", "stacked_bar"):
43→ actual_type = "bar"
44→
45→ if actual_type == "bar":
46→ import numpy as np
47→ x = np.arange(len(labels))
48→ width = 0.8 / max(len(datasets), 1)
49→ for i, ds in enumerate(datasets):
50→ values = ds.get("data", [])
51→ color = COLORS[i % len(COLORS)]
52→ if chart_type == "horizontalBar":
53→ ax.barh(x + i * width, values, width, label=ds.get("label", ""), color=color)
54→ elif chart_type == "stacked_bar":
55→ bottom = [0] * len(labels)
56→ if i > 0:
57→ for prev in datasets[:i]:
58→ for j, v in enumerate(prev.get("data", [])):
59→ if j < len(bottom):
60→ bottom[j] += v
61→ ax.bar(x, values, 0.6, bottom=bottom, label=ds.get("label", ""), color=color)
62→ else:
63→ ax.bar(x + i * width, values, width, label=ds.get("label", ""), color=color)
64→ if chart_type == "horizontalBar":
65→ ax.set_yticks(x + width * (len(datasets) - 1) / 2)
66→ ax.set_yticklabels(labels, fontsize=8)
67→ else:
68→ ax.set_xticks(x + width * (len(datasets) - 1) / 2)
69→ ax.set_xticklabels(labels, fontsize=8, rotation=45, ha="right")
70→
71→ elif actual_type == "line":
72→ for i, ds in enumerate(datasets):
73→ ax.plot(labels, ds.get("data", []), marker="o", markersize=4,
74→ color=COLORS[i % len(COLORS)], label=ds.get("label", ""), linewidth=2)
75→ ax.tick_params(axis="x", rotation=45, labelsize=8)
76→
77→ elif actual_type in ("pie", "doughnut"):
78→ values = datasets[0].get("data", []) if datasets else []
79→ colors = COLORS[:len(values)]
80→ wedgeprops = {"width": 0.4} if actual_type == "doughnut" else {}
81→ ax.pie(values, labels=labels, colors=colors, autopct="%1.0f%%",
82→ textprops={"fontsize": 8}, wedgeprops=wedgeprops)
83→
84→ elif actual_type == "radar":
85→ import numpy as np
86→ angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False).tolist()
87→ angles += angles[:1]
88→ ax = fig.add_subplot(111, polar=True)
89→ for i, ds in enumerate(datasets):
90→ values = ds.get("data", []) + ds.get("data", [])[:1]
91→ ax.plot(angles, values, color=COLORS[i % len(COLORS)], linewidth=2, label=ds.get("label", ""))
92→ ax.fill(angles, values, color=COLORS[i % len(COLORS)], alpha=0.15)
93→ ax.set_xticks(angles[:-1])
94→ ax.set_xticklabels(labels, fontsize=8)
95→
96→ elif actual_type == "scatter":
97→ for i, ds in enumerate(datasets):
98→ ax.scatter(range(len(ds.get("data", []))), ds.get("data", []),
99→ color=COLORS[i % len(COLORS)], label=ds.get("label", ""), s=30)
100→
101→ if actual_type not in ("pie", "doughnut"):
102→ ax.set_title(title, fontsize=11, fontweight="500", pad=12)
103→ else:
104→ ax.set_title(title, fontsize=11, fontweight="500", pad=12)
105→
106→ if len(datasets) > 1:
107→ ax.legend(fontsize=8)
108→
109→ plt.tight_layout()
110→
111→ tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
112→ fig.savefig(tmp.name, format="png", dpi=150, bbox_inches="tight", facecolor="white")
113→ plt.close(fig)
114→ return tmp.name
115→
116→
117→def _safe_text(text: str) -> str:
118→ """Replace Unicode characters that might not be in all fonts."""
119→ replacements = {
120→ "\u2014": "--", # em dash
121→ "\u2013": "-", # en dash
122→ "\u2018": "'", # left single quote
123→ "\u2019": "'", # right single quote
124→ "\u201c": '"', # left double quote
125→ "\u201d": '"', # right double quote
126→ "\u2026": "...", # ellipsis
127→ "\u00b7": " - ", # middle dot
128→ "\u2022": "-", # bullet
129→ "\u2023": ">", # triangular bullet
130→ "\u25cf": "-", # black circle
131→ "\u2192": "->", # right arrow
132→ "\u2190": "<-", # left arrow
133→ "\u2265": ">=", # >=
134→ "\u2264": "<=", # <=
135→ "\u00a0": " ", # non-breaking space
136→ }
137→ for k, v in replacements.items():
138→ text = text.replace(k, v)
139→ # Strip any remaining non-latin1 chars as last resort
140→ return text.encode("latin-1", errors="replace").decode("latin-1")
141→
142→
143→class ReportPDF(FPDF):
144→ """Custom PDF with ReviewLens branding."""
145→
146→ def __init__(self):
147→ super().__init__()
148→ self.set_auto_page_break(auto=True, margin=20)
149→
150→ def header(self):
151→ if self.page_no() > 1:
152→ self.set_font("Helvetica", "I", 8)
153→ self.set_text_color(107, 125, 153)
154→ self.cell(0, 8, "ReviewLens AI", align="L")
155→ self.ln(4)
156→ self.set_draw_color(45, 212, 191)
157→ self.set_line_width(0.3)
158→ self.line(10, self.get_y(), self.w - 10, self.get_y())
159→ self.ln(6)
160→
161→ def footer(self):
162→ self.set_y(-15)
163→ self.set_font("Helvetica", "I", 8)
164→ self.set_text_color(156, 163, 175)
165→ self.cell(0, 10, f"Page {self.page_no()}/{{nb}}", align="C")
166→
167→
168→def generate_pdf(
169→ title: str,
170→ content_md: str,
171→ summary: IngestionSummary,
172→ charts: list[dict[str, Any]] | None = None,
173→) -> bytes:
174→ """Generate a styled PDF report. Returns PDF bytes."""
175→
176→ # Pre-render charts to temp files
177→ chart_files: list[str | None] = []
178→ if charts:
179→ for chart in charts:
180→ chart_files.append(_render_chart_to_tempfile(chart))
181→
182→ pdf = ReportPDF()
183→ pdf.alias_nb_pages()
184→
185→ # ── Cover page ───────────────────────────────────────────────────
186→ pdf.add_page()
187→ pdf.ln(50)
188→
189→ # Logo text
190→ pdf.set_font("Helvetica", "", 16)
191→ pdf.set_text_color(107, 125, 153)
192→ pdf.cell(0, 10, "Review", new_x="END")
193→ pdf.set_text_color(45, 212, 191)
194→ pdf.cell(0, 10, "Lens", new_x="LMARGIN", new_y="NEXT")
195→ pdf.set_text_color(107, 125, 153)
196→ pdf.set_font("Helvetica", "", 10)
197→ pdf.cell(0, 6, "AI", new_x="LMARGIN", new_y="NEXT")
198→
199→ pdf.ln(15)
200→
201→ # Title with teal left bar
202→ pdf.set_draw_color(45, 212, 191)
203→ pdf.set_line_width(1.2)
204→ y_before = pdf.get_y()
205→ pdf.set_x(18)
206→ pdf.set_font("Helvetica", "B", 22)
207→ pdf.set_text_color(13, 17, 23)
208→ pdf.multi_cell(160, 10, _safe_text(title))
209→ y_after = pdf.get_y()
210→ pdf.line(14, y_before, 14, y_after)
211→
212→ pdf.ln(12)
213→
214→ # Metadata
215→ pdf.set_font("Courier", "", 9)
216→ pdf.set_text_color(107, 125, 153)
217→ meta_lines = [
218→ f"Product: {summary.product_name}",
219→ f"Platform: {summary.platform}",
220→ f"Reviews Analysed: {summary.total_reviews}",
221→ ]
222→ if summary.date_range:
223→ meta_lines.append(f"Date Range: {summary.date_range}")
224→ if summary.average_rating:
225→ meta_lines.append(f"Average Rating: {summary.average_rating:.1f}/5")
226→ meta_lines.append(f"Report Generated: {datetime.utcnow().strftime('%B %d, %Y')}")
227→
228→ for line in meta_lines:
229→ pdf.cell(0, 6, _safe_text(line), new_x="LMARGIN", new_y="NEXT")
230→
231→ # ── Content pages ────────────────────────────────────────────────
232→ pdf.add_page()
233→
234→ # Parse markdown to structured blocks
235→ _render_markdown_to_pdf(pdf, content_md, chart_files)
236→
237→ # ── Footer on last page ──────────────────────────────────────────
238→ pdf.ln(10)
239→ pdf.set_draw_color(229, 231, 235)
240→ pdf.set_line_width(0.2)
241→ pdf.line(10, pdf.get_y(), pdf.w - 10, pdf.get_y())
242→ pdf.ln(4)
243→ pdf.set_font("Helvetica", "I", 8)
244→ pdf.set_text_color(156, 163, 175)
245→ now = datetime.utcnow().strftime("%B %d, %Y")
246→ pdf.cell(0, 8, f"Generated by ReviewLens AI - {now}", align="C")
247→
248→ return pdf.output()
249→
250→
251→def _render_markdown_to_pdf(pdf: ReportPDF, md_text: str, chart_files: list[str | None]) -> None:
252→ """Parse markdown text and render it to the PDF with basic formatting."""
253→
254→ lines = md_text.split("\n")
255→ in_list = False
256→ in_table = False
257→ table_rows: list[list[str]] = []
258→
259→ for line in lines:
260→ stripped = line.strip()
261→
262→ # Skip empty lines
263→ if not stripped:
264→ if in_table and table_rows:
265→ _render_table(pdf, table_rows)
266→ table_rows = []
267→ in_table = False
268→ pdf.ln(3)
269→ continue
270→
271→ # Chart markers
272→ chart_match = re.match(r'\[chart:(\d+)\]', stripped)
273→ if chart_match:
274→ idx = int(chart_match.group(1))
275→ if idx < len(chart_files) and chart_files[idx]:
276→ pdf.ln(4)
277→ pdf.image(chart_files[idx], x=15, w=pdf.w - 30)
278→ pdf.ln(4)
279→ continue
280→
281→ # Table rows
282→ if "|" in stripped and not stripped.startswith("#"):
283→ cells = [c.strip() for c in stripped.split("|")]
284→ cells = [c for c in cells if c] # Remove empty edge cells
285→ # Skip separator rows
286→ if all(re.match(r'^[-:]+$', c) for c in cells):
287→ continue
288→ table_rows.append(cells)
289→ in_table = True
290→ continue
291→ elif in_table and table_rows:
292→ _render_table(pdf, table_rows)
293→ table_rows = []
294→ in_table = False
295→
296→ # Headings
297→ if stripped.startswith("# "):
298→ pdf.ln(6)
299→ pdf.set_font("Helvetica", "B", 16)
300→ pdf.set_text_color(13, 17, 23)
301→ pdf.multi_cell(0, 8, _strip_md(stripped[2:]))
302→ pdf.set_draw_color(45, 212, 191)
303→ pdf.set_line_width(0.6)
304→ pdf.line(10, pdf.get_y() + 1, pdf.w - 10, pdf.get_y() + 1)
305→ pdf.ln(4)
306→ elif stripped.startswith("## "):
307→ pdf.ln(5)
308→ pdf.set_font("Helvetica", "B", 13)
309→ pdf.set_text_color(13, 17, 23)
310→ pdf.multi_cell(0, 7, _strip_md(stripped[3:]))
311→ pdf.ln(2)
312→ elif stripped.startswith("### "):
313→ pdf.ln(4)
314→ pdf.set_font("Helvetica", "B", 11)
315→ pdf.set_text_color(55, 65, 81)
316→ pdf.multi_cell(0, 6, _strip_md(stripped[4:]))
317→ pdf.ln(2)
318→ # Blockquotes
319→ elif stripped.startswith("> "):
320→ pdf.set_draw_color(45, 212, 191)
321→ pdf.set_line_width(0.6)
322→ x = pdf.get_x()
323→ y = pdf.get_y()
324→ pdf.set_x(x + 6)
325→ pdf.set_font("Helvetica", "I", 9)
326→ pdf.set_text_color(55, 65, 81)
327→ pdf.multi_cell(pdf.w - 30, 5, _strip_md(stripped[2:]))
328→ pdf.line(x + 3, y, x + 3, pdf.get_y())
329→ pdf.ln(2)
330→ # List items
331→ elif stripped.startswith("- ") or stripped.startswith("* "):
332→ pdf.set_font("Helvetica", "", 10)
333→ pdf.set_text_color(26, 35, 50)
334→ pdf.set_x(pdf.l_margin + 4)
335→ pdf.cell(4, 5, "-")
336→ pdf.multi_cell(pdf.w - pdf.l_margin - pdf.r_margin - 8, 5, _strip_md(stripped[2:]))
337→ pdf.ln(1)
338→ elif re.match(r'^\d+\. ', stripped):
339→ num_match = re.match(r'^(\d+)\. (.*)', stripped)
340→ if num_match:
341→ pdf.set_font("Helvetica", "", 10)
342→ pdf.set_text_color(26, 35, 50)
343→ pdf.set_x(pdf.l_margin + 4)
344→ pdf.cell(6, 5, f"{num_match.group(1)}.")
345→ pdf.multi_cell(pdf.w - pdf.l_margin - pdf.r_margin - 10, 5, _strip_md(num_match.group(2)))
346→ pdf.ln(1)
347→ # Regular paragraph
348→ else:
349→ pdf.set_font("Helvetica", "", 10)
350→ pdf.set_text_color(26, 35, 50)
351→ pdf.multi_cell(0, 5, _strip_md(stripped))
352→ pdf.ln(2)
353→
354→ # Flush remaining table
355→ if in_table and table_rows:
356→ _render_table(pdf, table_rows)
357→
358→
359→def _render_table(pdf: ReportPDF, rows: list[list[str]]) -> None:
360→ """Render a simple table to the PDF."""
361→ if not rows:
362→ return
363→
364→ pdf.ln(3)
365→ n_cols = max(len(r) for r in rows)
366→ col_w = (pdf.w - pdf.l_margin - pdf.r_margin) / max(n_cols, 1)
367→
368→ for i, row in enumerate(rows):
369→ if i == 0:
370→ # Header row
371→ pdf.set_font("Helvetica", "B", 8)
372→ pdf.set_text_color(107, 114, 128)
373→ pdf.set_fill_color(243, 244, 246)
374→ for cell in row:
375→ pdf.cell(col_w, 6, _strip_md(cell)[:30], border=1, fill=True)
376→ pdf.ln()
377→ else:
378→ pdf.set_font("Helvetica", "", 9)
379→ pdf.set_text_color(26, 35, 50)
380→ for j, cell in enumerate(row):
381→ pdf.cell(col_w, 5.5, _strip_md(cell)[:40], border=1)
382→ pdf.ln()
383→ pdf.ln(3)
384→
385→
386→def _strip_md(text: str) -> str:
387→ """Strip markdown formatting for plain text output."""
388→ text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # bold
389→ text = re.sub(r'\*(.+?)\*', r'\1', text) # italic
390→ text = re.sub(r'`(.+?)`', r'\1', text) # code
391→ text = re.sub(r'\[source:[^\]]+\]', '', text) # citation markers
392→ text = re.sub(r'\[chart:\d+\]', '', text) # chart markers
393→ text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # links
394→ return _safe_text(text.strip())
395→"""PDF report generation using fpdf2 + matplotlib for charts."""
from __future__ import annotations
import re
import tempfile
from datetime import datetime
from typing import Any
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from fpdf import FPDF
from .models import IngestionSummary
# ── Chart colors matching the app's teal palette ─────────────────────
COLORS = [
"#2dd4bf", "#f59e0b", "#5eead4", "#f87171",
"#818cf8", "#34d399", "#fbbf24",
]
def _render_chart_to_tempfile(chart: dict[str, Any]) -> str | None:
"""Render a chart config to a temp PNG file. Returns path or None."""
chart_type = chart.get("type", "bar")
title = chart.get("title", "")
data = chart.get("data", {})
labels = data.get("labels", [])
datasets = data.get("datasets", [])
if not labels or not datasets:
return None
fig, ax = plt.subplots(figsize=(7, 3.5))
fig.patch.set_facecolor("white")
ax.set_facecolor("white")
actual_type = chart_type
if chart_type in ("horizontalBar", "stacked_bar"):
actual_type = "bar"
if actual_type == "bar":
import numpy as np
x = np.arange(len(labels))
width = 0.8 / max(len(datasets), 1)
for i, ds in enumerate(datasets):
values = ds.get("data", [])
color = COLORS[i % len(COLORS)]
if chart_type == "horizontalBar":
ax.barh(x + i * width, values, width, label=ds.get("label", ""), color=color)
elif chart_type == "stacked_bar":
bottom = [0] * len(labels)
if i > 0:
for prev in datasets[:i]:
for j, v in enumerate(prev.get("data", [])):
if j < len(bottom):
bottom[j] += v
ax.bar(x, values, 0.6, bottom=bottom, label=ds.get("label", ""), color=color)
else:
ax.bar(x + i * width, values, width, label=ds.get("label", ""), color=color)
if chart_type == "horizontalBar":
ax.set_yticks(x + width * (len(datasets) - 1) / 2)
ax.set_yticklabels(labels, fontsize=8)
else:
ax.set_xticks(x + width * (len(datasets) - 1) / 2)
ax.set_xticklabels(labels, fontsize=8, rotation=45, ha="right")
elif actual_type == "line":
for i, ds in enumerate(datasets):
ax.plot(labels, ds.get("data", []), marker="o", markersize=4,
color=COLORS[i % len(COLORS)], label=ds.get("label", ""), linewidth=2)
ax.tick_params(axis="x", rotation=45, labelsize=8)
elif actual_type in ("pie", "doughnut"):
values = datasets[0].get("data", []) if datasets else []
colors = COLORS[:len(values)]
wedgeprops = {"width": 0.4} if actual_type == "doughnut" else {}
ax.pie(values, labels=labels, colors=colors, autopct="%1.0f%%",
textprops={"fontsize": 8}, wedgeprops=wedgeprops)
elif actual_type == "radar":
import numpy as np
angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False).tolist()
angles += angles[:1]
ax = fig.add_subplot(111, polar=True)
for i, ds in enumerate(datasets):
values = ds.get("data", []) + ds.get("data", [])[:1]
ax.plot(angles, values, color=COLORS[i % len(COLORS)], linewidth=2, label=ds.get("label", ""))
ax.fill(angles, values, color=COLORS[i % len(COLORS)], alpha=0.15)
ax.set_xticks(angles[:-1])
ax.set_xticklabels(labels, fontsize=8)
elif actual_type == "scatter":
for i, ds in enumerate(datasets):
ax.scatter(range(len(ds.get("data", []))), ds.get("data", []),
color=COLORS[i % len(COLORS)], label=ds.get("label", ""), s=30)
ax.set_title(title, fontsize=11, fontweight="500", pad=12)
if len(datasets) > 1:
ax.legend(fontsize=8)
plt.tight_layout()
tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
fig.savefig(tmp.name, format="png", dpi=150, bbox_inches="tight", facecolor="white")
plt.close(fig)
return tmp.name
# ── Text sanitization ────────────────────────────────────────────────
_UNICODE_MAP = {
"\u2014": "--", "\u2013": "-", "\u2018": "'", "\u2019": "'",
"\u201c": '"', "\u201d": '"', "\u2026": "...", "\u00b7": " - ",
"\u2022": "-", "\u2023": ">", "\u25cf": "-", "\u2192": "->",
"\u2190": "<-", "\u2265": ">=", "\u2264": "<=", "\u00a0": " ",
"\u2705": "[OK]", "\u274c": "[X]", "\u26a0": "[!]",
"\u2b50": "*",
}
# Emoji severity indicators -> text badges
_EMOJI_BADGES = [
(re.compile(r'[\U0001f534\u2b55]'), "[HIGH]"), # red circle
(re.compile(r'[\U0001f7e1\U0001f7e0]'), "[MED]"), # yellow/orange circle
(re.compile(r'[\U0001f7e2\u2705]'), "[LOW]"), # green circle
(re.compile(r'[\U0001f6a8]'), "[!]"), # siren
(re.compile(r'[\U0001f4ca\U0001f4c8\U0001f4c9]'), ""), # chart emojis
(re.compile(r'[\U0001f3c6\U0001f947\U0001f948\U0001f949]'), ""), # medal emojis
(re.compile(r'[\U0001f4cb\U0001f4dd\U0001f4d1]'), ""), # clipboard/memo
(re.compile(r'[\U0001f50d\U0001f50e]'), ""), # magnifying glass
]
def _safe_text(text: str) -> str:
"""Replace Unicode characters with safe Latin-1 equivalents."""
for pattern, replacement in _EMOJI_BADGES:
text = pattern.sub(replacement, text)
for k, v in _UNICODE_MAP.items():
text = text.replace(k, v)
# Strip any remaining non-latin1 chars
return text.encode("latin-1", errors="replace").decode("latin-1")
def _strip_md(text: str) -> str:
"""Strip markdown formatting for plain text output."""
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
text = re.sub(r'\*(.+?)\*', r'\1', text)
text = re.sub(r'`(.+?)`', r'\1', text)
text = re.sub(r'\[source:[^\]]+\]', '', text)
text = re.sub(r'\[chart:\d+\]', '', text)
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
return _safe_text(text.strip())
# ── PDF class ────────────────────────────────────────────────────────
class ReportPDF(FPDF):
"""Custom PDF with ReviewLens branding."""
def __init__(self):
super().__init__()
self.set_auto_page_break(auto=True, margin=22)
self.set_margins(left=18, top=20, right=18)
def header(self):
if self.page_no() > 1:
self.set_y(8)
self.set_font("Helvetica", "I", 8)
self.set_text_color(45, 212, 191)
self.cell(0, 6, "ReviewLens AI", align="L")
self.set_draw_color(45, 212, 191)
self.set_line_width(0.3)
y = self.get_y() + 7
self.line(self.l_margin, y, self.w - self.r_margin, y)
self.set_y(y + 4)
def footer(self):
self.set_y(-15)
self.set_font("Helvetica", "I", 8)
self.set_text_color(156, 163, 175)
self.cell(0, 10, f"Page {self.page_no()}/{{nb}}", align="C")
# ── PDF generation ───────────────────────────────────────────────────
def generate_pdf(
title: str,
content_md: str,
summary: IngestionSummary,
charts: list[dict[str, Any]] | None = None,
) -> bytes:
"""Generate a styled PDF report. Returns PDF bytes."""
# Pre-render charts to temp files
chart_files: list[str | None] = []
if charts:
for chart in charts:
chart_files.append(_render_chart_to_tempfile(chart))
# Clean title of pipe chars and other artifacts
clean_title = _safe_text(title.replace("|", "-").replace(" ", " ").strip())
pdf = ReportPDF()
pdf.alias_nb_pages()
# ── Cover page ───────────────────────────────────────────────────
pdf.add_page()
# Center the logo + title in the top portion
pdf.ln(35)
# Logo
pdf.set_font("Helvetica", "B", 24)
pdf.set_text_color(107, 125, 153)
logo_text = "ReviewLens"
logo_w = pdf.get_string_width(logo_text)
ai_w = pdf.get_string_width(" AI")
total_logo_w = logo_w + ai_w
pdf.set_x((pdf.w - total_logo_w) / 2)
pdf.cell(logo_w, 12, "Review", new_x="END")
pdf.set_text_color(45, 212, 191)
pdf.cell(0, 12, "Lens", new_x="LMARGIN", new_y="NEXT")
pdf.set_font("Helvetica", "", 12)
pdf.set_text_color(107, 125, 153)
pdf.cell(0, 6, "AI", align="C", new_x="LMARGIN", new_y="NEXT")
pdf.ln(18)
# Title in teal
pdf.set_font("Helvetica", "B", 20)
pdf.set_text_color(45, 212, 191)
pdf.multi_cell(0, 10, clean_title, align="C")
pdf.ln(16)
# Metadata box
now = datetime.utcnow().strftime("%B %d, %Y")
meta_items = [
("Product", summary.product_name),
("Platform", summary.platform),
("Reviews Analysed", str(summary.total_reviews)),
]
if summary.date_range:
meta_items.append(("Date Range", summary.date_range))
if summary.average_rating:
meta_items.append(("Average Rating", f"{summary.average_rating:.1f} / 5"))
meta_items.append(("Report Generated", now))
box_x = 35
box_w = pdf.w - 70
box_y = pdf.get_y()
# Draw box border
pdf.set_draw_color(229, 231, 235)
pdf.set_line_width(0.4)
line_h = 7
box_h = len(meta_items) * line_h + 12
pdf.rect(box_x, box_y, box_w, box_h)
# Top teal accent line on the box
pdf.set_draw_color(45, 212, 191)
pdf.set_line_width(1.0)
pdf.line(box_x, box_y, box_x + box_w, box_y)
pdf.set_y(box_y + 6)
for label, value in meta_items:
pdf.set_x(box_x + 8)
pdf.set_font("Helvetica", "B", 9)
pdf.set_text_color(107, 125, 153)
pdf.cell(40, line_h, _safe_text(label))
pdf.set_font("Helvetica", "", 9)
pdf.set_text_color(26, 35, 50)
pdf.cell(0, line_h, _safe_text(value), new_x="LMARGIN", new_y="NEXT")
# ── Content pages ────────────────────────────────────────────────
pdf.add_page()
_render_markdown_to_pdf(pdf, content_md, chart_files)
# ── Final footer ─────────────────────────────────────────────────
pdf.ln(10)
pdf.set_draw_color(229, 231, 235)
pdf.set_line_width(0.2)
pdf.line(pdf.l_margin, pdf.get_y(), pdf.w - pdf.r_margin, pdf.get_y())
pdf.ln(4)
pdf.set_font("Helvetica", "I", 8)
pdf.set_text_color(156, 163, 175)
pdf.cell(0, 8, f"Generated by ReviewLens AI - {now}", align="C")
return pdf.output()
# ── Markdown rendering ───────────────────────────────────────────────
def _render_markdown_to_pdf(pdf: ReportPDF, md_text: str, chart_files: list[str | None]) -> None:
"""Parse markdown text and render to PDF with formatting."""
lines = md_text.split("\n")
in_table = False
table_rows: list[list[str]] = []
content_w = pdf.w - pdf.l_margin - pdf.r_margin
for line in lines:
stripped = line.strip()
# Empty lines
if not stripped:
if in_table and table_rows:
_render_table(pdf, table_rows)
table_rows = []
in_table = False
pdf.ln(3)
continue
# Horizontal rules
if re.match(r'^-{3,}$', stripped) or re.match(r'^\*{3,}$', stripped):
if in_table and table_rows:
_render_table(pdf, table_rows)
table_rows = []
in_table = False
pdf.ln(3)
pdf.set_draw_color(45, 212, 191)
pdf.set_line_width(0.3)
pdf.line(pdf.l_margin, pdf.get_y(), pdf.w - pdf.r_margin, pdf.get_y())
pdf.ln(5)
continue
# Chart markers
chart_match = re.match(r'\[chart:(\d+)\]', stripped)
if chart_match:
idx = int(chart_match.group(1))
if idx < len(chart_files) and chart_files[idx]:
pdf.ln(4)
pdf.image(chart_files[idx], x=pdf.l_margin, w=content_w)
pdf.ln(4)
continue
# Table rows
if "|" in stripped and not stripped.startswith("#"):
cells = [c.strip() for c in stripped.split("|")]
cells = [c for c in cells if c]
if all(re.match(r'^[-:]+$', c) for c in cells):
continue
table_rows.append(cells)
in_table = True
continue
elif in_table and table_rows:
_render_table(pdf, table_rows)
table_rows = []
in_table = False
# Headings
if stripped.startswith("# "):
pdf.ln(6)
pdf.set_font("Helvetica", "B", 16)
pdf.set_text_color(13, 17, 23)
pdf.multi_cell(content_w, 8, _strip_md(stripped[2:]))
pdf.set_draw_color(45, 212, 191)
pdf.set_line_width(0.6)
pdf.line(pdf.l_margin, pdf.get_y() + 1, pdf.w - pdf.r_margin, pdf.get_y() + 1)
pdf.ln(4)
elif stripped.startswith("## "):
pdf.ln(5)
pdf.set_font("Helvetica", "B", 13)
pdf.set_text_color(13, 17, 23)
pdf.multi_cell(content_w, 7, _strip_md(stripped[3:]))
pdf.ln(2)
elif stripped.startswith("### "):
pdf.ln(4)
pdf.set_font("Helvetica", "B", 11)
pdf.set_text_color(55, 65, 81)
pdf.multi_cell(content_w, 6, _strip_md(stripped[4:]))
pdf.ln(2)
# Blockquotes
elif stripped.startswith("> "):
pdf.set_draw_color(45, 212, 191)
pdf.set_line_width(0.6)
x = pdf.get_x()
y = pdf.get_y()
pdf.set_x(x + 6)
pdf.set_font("Helvetica", "I", 9)
pdf.set_text_color(55, 65, 81)
pdf.multi_cell(content_w - 10, 5, _strip_md(stripped[2:]))
pdf.line(x + 3, y, x + 3, pdf.get_y())
pdf.ln(2)
# List items
elif stripped.startswith("- ") or stripped.startswith("* "):
pdf.set_font("Helvetica", "", 10)
pdf.set_text_color(26, 35, 50)
pdf.set_x(pdf.l_margin + 4)
pdf.cell(4, 5, "-")
pdf.multi_cell(content_w - 8, 5, _strip_md(stripped[2:]))
pdf.ln(1)
elif re.match(r'^\d+\. ', stripped):
num_match = re.match(r'^(\d+)\. (.*)', stripped)
if num_match:
pdf.set_font("Helvetica", "", 10)
pdf.set_text_color(26, 35, 50)
pdf.set_x(pdf.l_margin + 4)
pdf.cell(6, 5, f"{num_match.group(1)}.")
pdf.multi_cell(content_w - 10, 5, _strip_md(num_match.group(2)))
pdf.ln(1)
# Regular paragraph
else:
pdf.set_font("Helvetica", "", 10)
pdf.set_text_color(26, 35, 50)
pdf.multi_cell(content_w, 5, _strip_md(stripped))
pdf.ln(2)
# Flush remaining table
if in_table and table_rows:
_render_table(pdf, table_rows)
def _render_table(pdf: ReportPDF, rows: list[list[str]]) -> None:
"""Render a table with proper column sizing."""
if not rows:
return
pdf.ln(3)
n_cols = max(len(r) for r in rows)
content_w = pdf.w - pdf.l_margin - pdf.r_margin
# Calculate column widths based on content
col_widths = [0.0] * n_cols
for row in rows:
for j, cell in enumerate(row):
if j < n_cols:
w = pdf.get_string_width(_strip_md(cell)) + 4
col_widths[j] = max(col_widths[j], w)
# Normalize to fit content width
total = sum(col_widths)
if total > 0:
col_widths = [w / total * content_w for w in col_widths]
else:
col_widths = [content_w / n_cols] * n_cols
for i, row in enumerate(rows):
if i == 0:
pdf.set_font("Helvetica", "B", 8)
pdf.set_text_color(107, 114, 128)
pdf.set_fill_color(243, 244, 246)
for j, cell in enumerate(row):
w = col_widths[j] if j < len(col_widths) else col_widths[-1]
pdf.cell(w, 6, _strip_md(cell)[:40], border=1, fill=True)
pdf.ln()
else:
pdf.set_font("Helvetica", "", 8)
pdf.set_text_color(26, 35, 50)
for j, cell in enumerate(row):
w = col_widths[j] if j < len(col_widths) else col_widths[-1]
pdf.cell(w, 5.5, _strip_md(cell)[:50], border=1)
pdf.ln()
pdf.ln(3)
The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/pdf.py has been updated successfully.
python3 -c "import ast; ast.parse(open('app/pdf.py').read()); print('OK')"Server should auto-reload. Fixes:
--- now detected as regex and rendered as a teal line with spacing.On branch main Your branch is up to date with 'origin/main'. Changes not staged for commit: (use "git add <file>..." to update what will be committed) (use "git restore <file>..." to discard changes in working directory) modified: app/main.py modified: app/models.py modified: app/prompts.py modified: app/rendering.py modified: app/tools/report_tools.py modified: requirements.txt modified: static/app.js modified: static/style.css modified: templates/app.html Untracked files: (use "git add <file>..." to include in what will be committed) app/pdf.py no changes added to commit (use "git add" and/or "git commit -a") app/main.py | 122 +++++++++++++++++++++++++++++++++++++++++++--- app/models.py | 1 + app/prompts.py | 3 +- app/rendering.py | 38 ++++++++++++++- app/tools/report_tools.py | 91 +++++++++++++++++++++++++++++++++- requirements.txt | 2 + static/app.js | 11 ++++- static/style.css | 83 +++++++++++++++++++++++++++++++ templates/app.html | 16 ++++++ 9 files changed, 355 insertions(+), 12 deletions(-)
git add app/pdf.py app/main.py app/models.py app/prompts.py app/rendering.py app/tools/report_tools.py requirements.txt static/app.js static/style.css templates/app.html Dockerfile && git commit -m "$(cat <<'EOF'
PDF report generation, download cards, bug fixes
New compile_report tool generates branded PDF reports via fpdf2 +
matplotlib. Cover page with centered ReviewLens logo, teal title,
metadata box. Content pages with heading hierarchy, teal accent
lines, blockquotes, tables with proportional columns, and inline
chart PNGs. Running header + page numbers.
Full Unicode sanitization pipeline: emoji severity indicators
(red/yellow/green circles) replaced with [HIGH]/[MED]/[LOW] text
badges. Em dashes, smart quotes, bullets converted to Latin-1 safe
equivalents.
Download endpoint GET /api/report/{session_id}/download serves PDF.
Report URLs in chat rendered as styled download cards (PDF icon,
title, teal download button). Sidebar shows Report PDF link when
report_generated_at is set.
Also: scroll snap fix (MutationObserver only fires on new child
nodes, not chart data toggles), unmatched [chart:N] markers stripped,
scrape flow now triggers auto-analysis, workspace name generation
via Haiku, CSV download endpoint, _emit rename fix in _run_scrape.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
EOF
)"10 files changed, 806 insertions(+), 12 deletions(-) create mode 100644 app/pdf.py