Make the CSV parser agentic. Instead of the static alias map, have a single Claude API call at ingestion time that reads the header row + first 3 data rows and returns a structured column mapping.
The AI Should map each col to one of our canonical fields - it should also be smart enough to identify supplementary text cols like pros, cons, title and flag them for concatenation in the main review text rather than dumping into metadata.
Keep current static alias map as a fallback if api fails.
Use the three samples to test against.
{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/ingest.py",
"offset": 1,
"limit": 130
} 1→"""Ingestion module: CSV parsing and Firecrawl URL scraping."""
2→
3→from __future__ import annotations
4→
5→import asyncio
6→import csv
7→import io
8→import os
9→import re
10→import uuid
11→from collections import Counter
12→from datetime import datetime
13→from typing import Any
14→
15→import httpx
16→
17→from .models import IngestionSummary, Review
18→
19→
20→# ── CSV Ingestion (primary path) ────────────────────────────────────
21→
22→# Common column name variants we normalise to our schema.
23→_COL_MAP: dict[str, list[str]] = {
24→ "text": ["text", "review", "review_text", "content", "body", "comment", "review_body", "reviews", "feedback"],
25→ "rating": ["rating", "score", "stars", "star_rating", "review_rating", "overall_rating"],
26→ "date": ["date", "review_date", "created_at", "timestamp", "time", "posted_date", "review_time"],
27→ "author": ["author", "reviewer", "user", "username", "reviewer_name", "name", "user_name"],
28→ "platform": ["platform", "source", "site", "channel"],
29→}
30→
31→
32→def _normalise_col(name: str) -> str:
33→ return re.sub(r"[^a-z0-9]", "", name.lower().strip())
34→
35→
36→def _map_columns(headers: list[str]) -> dict[str, str]:
37→ """Map CSV column names → our field names. Returns {our_field: csv_col}."""
38→ mapping: dict[str, str] = {}
39→ normalised = {_normalise_col(h): h for h in headers}
40→
41→ for field, aliases in _COL_MAP.items():
42→ for alias in aliases:
43→ norm = _normalise_col(alias)
44→ if norm in normalised:
45→ mapping[field] = normalised[norm]
46→ break
47→ return mapping
48→
49→
50→def _parse_date(val: str) -> datetime | None:
51→ for fmt in (
52→ "%Y-%m-%d",
53→ "%Y-%m-%dT%H:%M:%S",
54→ "%Y-%m-%dT%H:%M:%SZ",
55→ "%Y-%m-%d %H:%M:%S",
56→ "%m/%d/%Y",
57→ "%d/%m/%Y",
58→ "%B %d, %Y",
59→ "%b %d, %Y",
60→ "%Y/%m/%d",
61→ ):
62→ try:
63→ return datetime.strptime(val.strip(), fmt)
64→ except (ValueError, AttributeError):
65→ continue
66→ return None
67→
68→
69→def _parse_rating(val: str) -> float | None:
70→ try:
71→ # Handle "4/5", "4 out of 5", or plain "4.5"
72→ val = val.strip()
73→ match = re.match(r"([\d.]+)\s*(?:/|out of)\s*\d+", val)
74→ if match:
75→ return float(match.group(1))
76→ return float(val)
77→ except (ValueError, TypeError):
78→ return None
79→
80→
81→def parse_csv(content: str | bytes, platform: str = "", product_name: str = "") -> list[Review]:
82→ """Parse CSV content into Review objects. Handles flexible column names."""
83→ if isinstance(content, bytes):
84→ content = content.decode("utf-8-sig") # Handle BOM
85→
86→ reader = csv.DictReader(io.StringIO(content))
87→ if not reader.fieldnames:
88→ return []
89→
90→ col_map = _map_columns(list(reader.fieldnames))
91→
92→ if "text" not in col_map:
93→ # Try: if there's only one text-like long column, use it
94→ for h in reader.fieldnames:
95→ if h not in col_map.values():
96→ col_map["text"] = h
97→ break
98→
99→ if "text" not in col_map:
100→ return []
101→
102→ reviews: list[Review] = []
103→ for i, row in enumerate(reader):
104→ text = row.get(col_map.get("text", ""), "").strip()
105→ if not text:
106→ continue
107→
108→ rating_raw = row.get(col_map.get("rating", ""), "")
109→ date_raw = row.get(col_map.get("date", ""), "")
110→ author = row.get(col_map.get("author", ""), "").strip()
111→ plat = row.get(col_map.get("platform", ""), "").strip() or platform
112→
113→ # Collect unmapped columns as metadata
114→ mapped_cols = set(col_map.values())
115→ metadata = {k: v for k, v in row.items() if k not in mapped_cols and v}
116→
117→ reviews.append(
118→ Review(
119→ id=f"review_{i}",
120→ text=text,
121→ rating=_parse_rating(rating_raw),
122→ date=_parse_date(date_raw),
123→ author=author,
124→ platform=plat,
125→ metadata=metadata,
126→ )
127→ )
128→
129→ return reviews
130→Now let me check how the CSV is parsed — parse_csv is sync but the caller context is async. I need the AI column mapping to be async. Let me also check how parse_csv is called:
{
"pattern": "parse_csv",
"path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app",
"output_mode": "content"
}app/ingest.py:81:def parse_csv(content: str | bytes, platform: str = "", product_name: str = "") -> list[Review]: app/main.py:25:from .ingest import build_summary, parse_csv, scrape_url app/main.py:122: reviews = parse_csv(content, platform=platform, product_name=product_name) app/main.py:187: reviews = parse_csv(content, platform=platform, product_name=product_name)
Called in two places — both in async route handlers. I'll make parse_csv async so it can call the Claude API for column mapping. Let me check the requirements for the Anthropic SDK:
source .venv/bin/activate && python3 -c "import anthropic; print(anthropic.__version__)"
Exit code 1
Traceback (most recent call last):
File "<string>", line 1, in <module>
import anthropic; print(anthropic.__version__)
^^^^^^^^^^^^^^^^
ModuleNotFoundError: No module named 'anthropic'{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/requirements.txt"
} 1→fastapi>=0.115.0
2→uvicorn[standard]
3→sse-starlette
4→claude-agent-sdk
5→chromadb
6→httpx
7→jinja2
8→python-multipart
9→python-dotenv
10→markdown
11→claude-agent-sdk
claude-agent-sdk anthropic
The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/requirements.txt has been updated successfully.
source .venv/bin/activate && pip install anthropic -q 2>&1 | tail -3
[notice] A new release of pip is available: 24.2 -> 26.0.1 [notice] To update, run: pip install --upgrade pip
"""Ingestion module: CSV parsing and Firecrawl URL scraping."""
from __future__ import annotations
import asyncio
import csv
import io
import os
import re
import uuid
from collections import Counter
from datetime import datetime
from typing import Any
import httpx
from .models import IngestionSummary, Review
# ── CSV Ingestion (primary path) ────────────────────────────────────
# Common column name variants we normalise to our schema.
_COL_MAP: dict[str, list[str]] = {
"text": ["text", "review", "review_text", "content", "body", "comment", "review_body", "reviews", "feedback"],
"rating": ["rating", "score", "stars", "star_rating", "review_rating", "overall_rating"],
"date": ["date", "review_date", "created_at", "timestamp", "time", "posted_date", "review_time"],
"author": ["author", "reviewer", "user", "username", "reviewer_name", "name", "user_name"],
"platform": ["platform", "source", "site", "channel"],
}
def _normalise_col(name: str) -> str:
return re.sub(r"[^a-z0-9]", "", name.lower().strip())
def _map_columns(headers: list[str]) -> dict[str, str]:
"""Map CSV column names → our field names. Returns {our_field: csv_col}."""
mapping: dict[str, str] = {}
normalised = {_normalise_col(h): h for h in headers}
for field, aliases in _COL_MAP.items():
for alias in aliases:
norm = _normalise_col(alias)
if norm in normalised:
mapping[field] = normalised[norm]
break
return mapping
def _parse_date(val: str) -> datetime | None:
for fmt in (
"%Y-%m-%d",
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%SZ",
"%Y-%m-%d %H:%M:%S",
"%m/%d/%Y",
"%d/%m/%Y",
"%B %d, %Y",
"%b %d, %Y",
"%Y/%m/%d",
):
try:
return datetime.strptime(val.strip(), fmt)
except (ValueError, AttributeError):
continue
return None
def _parse_rating(val: str) -> float | None:
try:
# Handle "4/5", "4 out of 5", or plain "4.5"
val = val.strip()
match = re.match(r"([\d.]+)\s*(?:/|out of)\s*\d+", val)
if match:
return float(match.group(1))
return float(val)
except (ValueError, TypeError):
return None
def parse_csv(content: str | bytes, platform: str = "", product_name: str = "") -> list[Review]:
"""Parse CSV content into Review objects. Handles flexible column names."""
if isinstance(content, bytes):
content = content.decode("utf-8-sig") # Handle BOM
reader = csv.DictReader(io.StringIO(content))
if not reader.fieldnames:
return []
col_map = _map_columns(list(reader.fieldnames))
if "text" not in col_map:
# Try: if there's only one text-like long column, use it
for h in reader.fieldnames:
if h not in col_map.values():
col_map["text"] = h
break
if "text" not in col_map:
return []
reviews: list[Review] = []
for i, row in enumerate(reader):
text = row.get(col_map.get("text", ""), "").strip()
if not text:
continue
rating_raw = row.get(col_map.get("rating", ""), "")
date_raw = row.get(col_map.get("date", ""), "")
author = row.get(col_map.get("author", ""), "").strip()
plat = row.get(col_map.get("platform", ""), "").strip() or platform
# Collect unmapped columns as metadata
mapped_cols = set(col_map.values())
metadata = {k: v for k, v in row.items() if k not in mapped_cols and v}
reviews.append(
Review(
id=f"review_{i}",
text=text,
rating=_parse_rating(rating_raw),
date=_parse_date(date_raw),
author=author,
platform=plat,
metadata=metadata,
)
)
return reviews"""Ingestion module: CSV parsing and Firecrawl URL scraping."""
from __future__ import annotations
import asyncio
import csv
import io
import json
import logging
import os
import re
import uuid
from collections import Counter
from datetime import datetime
from typing import Any
import anthropic
import httpx
from .models import IngestionSummary, Review
log = logging.getLogger(__name__)
# ── CSV Ingestion (primary path) ────────────────────────────────────
# Static alias map — used as fallback when the AI mapping fails.
_COL_MAP: dict[str, list[str]] = {
"text": ["text", "review", "review_text", "content", "body", "comment", "review_body", "reviews", "feedback"],
"rating": ["rating", "score", "stars", "star_rating", "review_rating", "overall_rating"],
"date": ["date", "review_date", "created_at", "timestamp", "time", "posted_date", "review_time"],
"author": ["author", "reviewer", "user", "username", "reviewer_name", "name", "user_name"],
"platform": ["platform", "source", "site", "channel"],
}
def _normalise_col(name: str) -> str:
return re.sub(r"[^a-z0-9]", "", name.lower().strip())
def _map_columns_static(headers: list[str]) -> dict[str, str]:
"""Static alias-based column mapping. Returns {our_field: csv_col}."""
mapping: dict[str, str] = {}
normalised = {_normalise_col(h): h for h in headers}
for field, aliases in _COL_MAP.items():
for alias in aliases:
norm = _normalise_col(alias)
if norm in normalised:
mapping[field] = normalised[norm]
break
return mapping
# ── AI Column Mapping ────────────────────────────────────────────────
_COLUMN_MAP_TOOL = {
"name": "map_columns",
"description": "Map CSV columns to canonical review fields.",
"input_schema": {
"type": "object",
"properties": {
"text": {
"type": "string",
"description": "The CSV column containing the primary review text / body.",
},
"rating": {
"type": ["string", "null"],
"description": "The CSV column containing the star rating or score. Null if none.",
},
"date": {
"type": ["string", "null"],
"description": "The CSV column containing the review date. Null if none.",
},
"author": {
"type": ["string", "null"],
"description": "The CSV column containing the reviewer's name. Null if none. Do NOT map role/title columns here.",
},
"platform": {
"type": ["string", "null"],
"description": "The CSV column containing the platform/source name. Null if none.",
},
"concat_into_text": {
"type": "array",
"items": {"type": "string"},
"description": "Other CSV columns whose values should be prepended/appended to the review text to make it complete. E.g. title, pros, cons, summary. Order matters — they will be joined in this order before the main text.",
},
},
"required": ["text", "rating", "date", "author", "platform", "concat_into_text"],
},
}
async def _map_columns_ai(headers: list[str], sample_rows: list[dict]) -> dict[str, Any] | None:
"""Use Claude to map CSV columns to our canonical fields. Returns mapping or None on failure."""
api_key = os.getenv("ANTHROPIC_API_KEY", "")
if not api_key:
return None
# Build a concise preview of the data
preview_lines = [",".join(headers)]
for row in sample_rows[:3]:
vals = []
for h in headers:
v = str(row.get(h, ""))
vals.append(v[:120] + "..." if len(v) > 120 else v)
preview_lines.append(",".join(vals))
preview = "\n".join(preview_lines)
prompt = f"""You are mapping CSV columns to a review database schema.
Here are the CSV headers and first few rows:
{preview}
Map each CSV column to one of these canonical fields:
- text: the primary review body/content
- rating: numeric star rating or score
- date: when the review was posted
- author: the reviewer's display name (NOT their role, title, or company)
- platform: the review source/platform name
Also identify columns that contain supplementary text that should be concatenated into the main review text to make it more complete and useful for semantic search. Common examples:
- "title" or "review_title" → prepend to the review text
- "pros" / "cons" → append as "Pros: ... Cons: ..."
- "summary" → prepend
Do NOT include ID columns, metadata like company_size, verified_purchase, or owner responses in concat_into_text.
Use the map_columns tool to return your mapping."""
try:
client = anthropic.AsyncAnthropic(api_key=api_key)
response = await client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=300,
tools=[_COLUMN_MAP_TOOL],
messages=[{"role": "user", "content": prompt}],
)
for block in response.content:
if block.type == "tool_use" and block.name == "map_columns":
return block.input
return None
except Exception as e:
log.warning("AI column mapping failed: %s", e)
return None
def _ai_result_to_col_map(ai_result: dict[str, Any], headers: list[str]) -> tuple[dict[str, str], list[str]]:
"""Convert AI mapping result to {our_field: csv_col} + concat list.
Validates that all referenced columns actually exist in the CSV."""
header_set = set(headers)
col_map: dict[str, str] = {}
concat_cols: list[str] = []
for field in ("text", "rating", "date", "author", "platform"):
val = ai_result.get(field)
if val and val in header_set:
col_map[field] = val
for col in ai_result.get("concat_into_text", []):
if col in header_set and col != col_map.get("text"):
concat_cols.append(col)
return col_map, concat_cols
# ── Date/Rating parsers ──────────────────────────────────────────────
def _parse_date(val: str) -> datetime | None:
for fmt in (
"%Y-%m-%d",
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%SZ",
"%Y-%m-%d %H:%M:%S",
"%m/%d/%Y",
"%d/%m/%Y",
"%B %d, %Y",
"%b %d, %Y",
"%Y/%m/%d",
):
try:
return datetime.strptime(val.strip(), fmt)
except (ValueError, AttributeError):
continue
return None
def _parse_rating(val: str) -> float | None:
try:
val = val.strip()
match = re.match(r"([\d.]+)\s*(?:/|out of)\s*\d+", val)
if match:
return float(match.group(1))
return float(val)
except (ValueError, TypeError):
return None
# ── CSV Parser ───────────────────────────────────────────────────────
async def parse_csv(content: str | bytes, platform: str = "", product_name: str = "") -> list[Review]:
"""Parse CSV content into Review objects. Uses AI for column mapping with static fallback."""
if isinstance(content, bytes):
content = content.decode("utf-8-sig")
reader = csv.DictReader(io.StringIO(content))
if not reader.fieldnames:
return []
headers = list(reader.fieldnames)
# Read all rows upfront (we need sample rows for AI + full iteration)
all_rows = list(reader)
if not all_rows:
return []
# Try AI column mapping first
concat_cols: list[str] = []
ai_result = await _map_columns_ai(headers, all_rows[:3])
if ai_result and ai_result.get("text"):
col_map, concat_cols = _ai_result_to_col_map(ai_result, headers)
log.info("AI column mapping: %s, concat: %s", col_map, concat_cols)
else:
col_map = _map_columns_static(headers)
log.info("Static column mapping (AI unavailable): %s", col_map)
if "text" not in col_map:
# Last resort: pick first unmapped column
for h in headers:
if h not in col_map.values():
col_map["text"] = h
break
if "text" not in col_map:
return []
reviews: list[Review] = []
for i, row in enumerate(all_rows):
text = row.get(col_map.get("text", ""), "").strip()
if not text:
continue
# Concatenate supplementary text columns
if concat_cols:
parts = []
for col in concat_cols:
val = row.get(col, "").strip()
if val:
# Use the column name as a label for clarity
label = col.replace("_", " ").title()
parts.append(f"{label}: {val}")
if parts:
text = "\n".join(parts) + "\n\n" + text
rating_raw = row.get(col_map.get("rating", ""), "")
date_raw = row.get(col_map.get("date", ""), "")
author = row.get(col_map.get("author", ""), "").strip()
plat = row.get(col_map.get("platform", ""), "").strip() or platform
# Collect unmapped columns as metadata
mapped_cols = set(col_map.values()) | set(concat_cols)
metadata = {k: v for k, v in row.items() if k not in mapped_cols and v}
reviews.append(
Review(
id=f"review_{i}",
text=text,
rating=_parse_rating(rating_raw),
date=_parse_date(date_raw),
author=author,
platform=plat,
metadata=metadata,
)
)
return reviewsThe file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/ingest.py has been updated successfully.
{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/main.py",
"offset": 118,
"limit": 8
}118→ session_id = str(uuid.uuid4()) 119→ 120→ content = await file.read() 121→ try: 122→ reviews = parse_csv(content, platform=platform, product_name=product_name) 123→ except Exception as e: 124→ return templates.TemplateResponse( 125→ "partials/error.html",
reviews = parse_csv(content, platform=platform, product_name=product_name)
reviews = await parse_csv(content, platform=platform, product_name=product_name)
The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/main.py has been updated. All occurrences were successfully replaced.
source .venv/bin/activate && python3 -c "import ast; ast.parse(open('app/ingest.py').read()); ast.parse(open('app/main.py').read()); print('OK')"source .venv/bin/activate && python3 -c "
import asyncio, json
from dotenv import load_dotenv
load_dotenv()
from app.ingest import parse_csv
async def test(path, label):
content = open(path, 'rb').read()
reviews = await parse_csv(content)
print(f'\n=== {label} ===')
print(f'Reviews parsed: {len(reviews)}')
if reviews:
r = reviews[0]
print(f'First review text ({len(r.text)} chars):')
print(f' {r.text[:200]}...' if len(r.text) > 200 else f' {r.text}')
print(f' Rating: {r.rating}')
print(f' Date: {r.date}')
print(f' Author: {r.author!r}')
print(f' Platform: {r.platform!r}')
print(f' Metadata keys: {list(r.metadata.keys())}')
async def main():
await test('sample-data/amazon_sony_wh1000xm5_reviews.csv', 'Amazon Sony (has title col)')
await test('sample-data/data.csv', 'Data.csv (has pros/cons)')
await test('sample-data/google_maps_bella_napoli_reviews.csv', 'Google Maps Bella Napoli')
asyncio.run(main())
" 2>&1Exit code 1
Traceback (most recent call last):
File "<string>", line 27, in <module>
asyncio.run(main())
~~~~~~~~~~~^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/runners.py", line 194, in run
return runner.run(main)
~~~~~~~~~~^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/base_events.py", line 721, in run_until_complete
return future.result()
~~~~~~~~~~~~~^^
File "<string>", line 24, in main
await test('sample-data/data.csv', 'Data.csv (has pros/cons)')
File "<string>", line 8, in test
content = open(path, 'rb').read()
~~~~^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: 'sample-data/data.csv'
=== Amazon Sony (has title col) ===
Reviews parsed: 50
First review text (335 chars):
Title: Best noise cancelling headphones I've ever owned
I upgraded from the XM4 and the noise cancellation is noticeably better. The new design is lighter and more comfortable for long wearing sessio...
Rating: 5.0
Date: 2024-11-15 00:00:00
Author: 'JohnT_audio'
Platform: 'Amazon'
Metadata keys: ['review_id', 'product_name', 'verified_purchase']Amazon CSV works perfectly — title is being concatenated. The data.csv path issue is just a CWD problem:
source .venv/bin/activate && python3 -c "
import asyncio
from dotenv import load_dotenv
load_dotenv()
from app.ingest import parse_csv
async def test(path, label):
content = open(path, 'rb').read()
reviews = await parse_csv(content)
print(f'\n=== {label} ===')
print(f'Reviews parsed: {len(reviews)}')
if reviews:
r = reviews[0]
print(f'Text ({len(r.text)} chars): {r.text[:250]}...' if len(r.text)>250 else f'Text: {r.text}')
print(f'Rating: {r.rating}, Date: {r.date}, Author: {r.author!r}, Platform: {r.platform!r}')
print(f'Metadata: {list(r.metadata.keys())}')
async def main():
await test('sample-data/data.csv', 'Data.csv (has pros/cons)')
await test('sample-data/google_maps_bella_napoli_reviews.csv', 'Google Maps Bella Napoli')
asyncio.run(main())
" 2>&1Exit code 1
Traceback (most recent call last):
File "<string>", line 22, in <module>
asyncio.run(main())
~~~~~~~~~~~^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/runners.py", line 194, in run
return runner.run(main)
~~~~~~~~~~^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/base_events.py", line 721, in run_until_complete
return future.result()
~~~~~~~~~~~~~^^
File "<string>", line 19, in main
await test('sample-data/data.csv', 'Data.csv (has pros/cons)')
File "<string>", line 8, in test
content = open(path, 'rb').read()
~~~~^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: 'sample-data/data.csv'ls /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/sample-data/
amazon_sony_wh1000xm5_reviews.csv g2_notion_reviews.csv google_maps_bella_napoli_reviews.csv
source .venv/bin/activate && python3 -c "
import asyncio
from dotenv import load_dotenv
load_dotenv()
from app.ingest import parse_csv
async def test(path, label):
content = open(path, 'rb').read()
reviews = await parse_csv(content)
print(f'\n=== {label} ===')
print(f'Reviews parsed: {len(reviews)}')
if reviews:
r = reviews[0]
t = r.text[:250] + '...' if len(r.text) > 250 else r.text
print(f'Text ({len(r.text)} chars): {t}')
print(f'Rating: {r.rating}, Date: {r.date}, Author: {r.author!r}, Platform: {r.platform!r}')
print(f'Metadata: {list(r.metadata.keys())}')
async def main():
await test('sample-data/g2_notion_reviews.csv', 'G2 Notion (has pros/cons)')
await test('sample-data/google_maps_bella_napoli_reviews.csv', 'Google Maps Bella Napoli')
asyncio.run(main())
" 2>&1=== G2 Notion (has pros/cons) === Reviews parsed: 20 Text (274 chars): We migrated from Confluence Trello and Google Docs to Notion and haven't looked back. The flexibility of the block system means every team can customize their workspace. Our engineering team uses it for docs and sprint planning while marketing uses i... Rating: 5.0, Date: 2024-11-20 00:00:00, Author: '', Platform: '' Metadata: ['review_id', 'product_name', 'platform', 'title', 'reviewer_role', 'company_size', 'pros', 'cons'] === Google Maps Bella Napoli === Reviews parsed: 40 Text (237 chars): Absolutely the best Italian food outside of Italy. The homemade pasta is incredible — you can taste the difference. The carbonara was rich and perfectly seasoned. Service was attentive without being overbearing. We'll definitely be back. Rating: 5.0, Date: 2024-12-01 00:00:00, Author: 'SarahK', Platform: 'Google Maps' Metadata: ['review_id', 'business_name', 'response_from_owner']
G2 Notion didn't get pros, cons, title concatenated, and platform/author weren't mapped. The AI probably picked the wrong column for text (the Reviews column which has the main body) but missed the supplementary fields. Let me check what the AI actually returned:
source .venv/bin/activate && python3 -c "
import asyncio, csv, io
from dotenv import load_dotenv
load_dotenv()
from app.ingest import _map_columns_ai
content = open('sample-data/g2_notion_reviews.csv', 'r').read()
reader = csv.DictReader(io.StringIO(content))
headers = list(reader.fieldnames)
rows = [next(reader) for _ in range(3)]
print('Headers:', headers)
async def test():
result = await _map_columns_ai(headers, rows)
print('AI result:', result)
asyncio.run(test())
" 2>&1Headers: ['review_id', 'product_name', 'platform', 'rating', 'title', 'review_text', 'reviewer_role', 'company_size', 'date', 'pros', 'cons']
AI result: {'text': 'review_text', 'rating': 'rating', 'date': 'date', 'author': None, 'platform': 'platform', 'concat_into_text': ['title', 'pros', 'cons']}The AI mapping is actually correct — it mapped review_text, rating, date, platform, and flagged title, pros, cons for concatenation. The issue is the first run must have gotten a different result. Let me check — the problem was the first CSV header in that file:
00000000: 7265 7669 6577 5f69 642c 7072 6f64 7563 review_id,produc 00000010: 745f 6e61 6d65 2c70 6c61 7466 6f72 6d2c t_name,platform,
Clean headers. The AI returned correct results — the issue in the prior test run was likely a different AI response (nondeterministic). But the first test showed platform: '' which means the col_map had platform but the actual row value might have been something unexpected. Let me re-run the full test with logging:
source .venv/bin/activate && python3 -c "
import asyncio, logging
logging.basicConfig(level=logging.INFO)
from dotenv import load_dotenv
load_dotenv()
from app.ingest import parse_csv
async def test(path, label):
content = open(path, 'rb').read()
reviews = await parse_csv(content)
print(f'\n=== {label}: {len(reviews)} reviews ===')
if reviews:
r = reviews[0]
t = r.text[:300] + '...' if len(r.text) > 300 else r.text
print(f'Text ({len(r.text)} chars): {t}')
print(f'Rating: {r.rating}, Date: {r.date}, Author: {r.author!r}, Platform: {r.platform!r}')
print(f'Metadata: {list(r.metadata.keys())}')
async def main():
await test('sample-data/amazon_sony_wh1000xm5_reviews.csv', 'Amazon Sony')
await test('sample-data/g2_notion_reviews.csv', 'G2 Notion')
await test('sample-data/google_maps_bella_napoli_reviews.csv', 'Google Maps')
asyncio.run(main())
" 2>&1INFO:httpx:HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
INFO:app.ingest:AI column mapping: {'text': 'review_text', 'rating': 'rating', 'date': 'date', 'author': 'author', 'platform': 'platform'}, concat: ['title']
INFO:httpx:HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
INFO:app.ingest:AI column mapping: {'text': 'review_text', 'rating': 'rating', 'date': 'date', 'platform': 'platform'}, concat: ['title', 'pros', 'cons']
INFO:httpx:HTTP Request: POST https://api.anthropic.com/v1/messages "HTTP/1.1 200 OK"
INFO:app.ingest:AI column mapping: {'text': 'review_text', 'rating': 'rating', 'date': 'date', 'author': 'author', 'platform': 'platform'}, concat: []
=== Amazon Sony: 50 reviews ===
Text (335 chars): Title: Best noise cancelling headphones I've ever owned
I upgraded from the XM4 and the noise cancellation is noticeably better. The new design is lighter and more comfortable for long wearing sessions. Sound quality is phenomenal with deep bass and clear mids. Battery life easily gets me through a...
Rating: 5.0, Date: 2024-11-15 00:00:00, Author: 'JohnT_audio', Platform: 'Amazon'
Metadata: ['review_id', 'product_name', 'verified_purchase']
=== G2 Notion: 20 reviews ===
Text (474 chars): Title: Replaced 5 tools with one
Pros: Incredibly flexible block system; replaces multiple tools; great templates
Cons: Steep learning curve for non-technical users; can be slow with large databases
We migrated from Confluence Trello and Google Docs to Notion and haven't looked back. The flexibilit...
Rating: 5.0, Date: 2024-11-20 00:00:00, Author: '', Platform: 'G2'
Metadata: ['review_id', 'product_name', 'reviewer_role', 'company_size']
=== Google Maps: 40 reviews ===
Text (237 chars): Absolutely the best Italian food outside of Italy. The homemade pasta is incredible — you can taste the difference. The carbonara was rich and perfectly seasoned. Service was attentive without being overbearing. We'll definitely be back.
Rating: 5.0, Date: 2024-12-01 00:00:00, Author: 'SarahK', Platform: 'Google Maps'
Metadata: ['review_id', 'business_name', 'response_from_owner']All three working perfectly:
Amazon Sony — title concatenated into text, author/platform/rating/date mapped, verified_purchase in metadata
G2 Notion — title, pros, cons all concatenated into text (formatted with labels), no author mapped (correct — reviewer_role is a job title, not a name), platform mapped, reviewer_role/company_size in metadata
Google Maps — clean mapping, response_from_owner correctly left in metadata
The AI column mapper (Haiku 4.5, single tool-use call) handles all three structures correctly — including the key insight of concatenating pros/cons into searchable text instead of burying them in metadata. Static alias map is still there as fallback if the API call fails.
Error when attempting to load dataset:
XHR POST http://localhost:8000/sample [HTTP/1.1 500 Internal Server Error 5ms]
Response Status Error Code 500 from /sample
source .venv/bin/activate && curl -s -X POST http://localhost:8000/sample -F "filename=amazon_sony_wh1000xm5_reviews.csv" 2>&1
source .venv/bin/activate && python3 -c "
from dotenv import load_dotenv
load_dotenv()
import asyncio
from app.ingest import parse_csv
content = open('sample-data/amazon_sony_wh1000xm5_reviews.csv', 'rb').read()
reviews = asyncio.run(parse_csv(content))
print(f'{len(reviews)} reviews')
" 2>&1parse_csv works — the issue is in the /sample route. It's calling parse_csv without await since it was previously sync:
{
"pattern": "parse_csv",
"path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/main.py",
"output_mode": "content",
"-n": true
}25:from .ingest import build_summary, parse_csv, scrape_url 122: reviews = await parse_csv(content, platform=platform, product_name=product_name) 187: reviews = parse_csv(content, platform=platform, product_name=product_name)
{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/main.py",
"offset": 185,
"limit": 5
}185→ break 186→ 187→ reviews = parse_csv(content, platform=platform, product_name=product_name) 188→ 189→ if not reviews:
reviews = parse_csv(content, platform=platform, product_name=product_name)
if not reviews: reviews = await parse_csv(content, platform=platform, product_name=product_name)
if not reviews:The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/main.py has been updated successfully.
I loaded in a csv (Google Maps sample data set)
"I have 40 reviews loaded for Google Maps Bella Napoli Reviews from Google Maps, spanning 2024-08-22 to 2024-12-01.
I can only answer questions grounded in this dataset. What would you like to explore?"
When attempting to click one of the preset buttons e.g. Sentiment breakdown I see error:
"Failed to send: NetworkError when attempting to fetch resource"
Same if I type a custom query.
Look into a fix, and add deep browser logging so I can monitor while I test
65657db6-7639-4e52-9b78-956104df0612 03ca6b39-4f85-4244-bf34-b52e3df331db 150dbf59-c7c4-43ec-8fea-a23968e1a3bf
SID="65657db6-7639-4e52-9b78-956104df0612" curl -s -X POST "http://localhost:8000/chat/$SID/send" -F "message=test" -D - 2>&1 | head -10
HTTP/1.1 200 OK date: Sun, 22 Mar 2026 15:46:21 GMT server: uvicorn content-length: 273 content-type: text/html; charset=utf-8 <div class="message user-message"> <div class="message-content"><p>test</p></div> </div><div id="thinking-indicator" class="message assistant-message thinking"><div class="message-content"><div class="thinking-dots"><span></span><span></span><span></span></div></div></div>
The endpoint works from curl. The issue is in the browser — likely the SESSION_ID JS variable is empty or the fetch URL is wrong. Let me check:
curl -s "http://localhost:8000/chat/65657db6-7639-4e52-9b78-956104df0612" | grep 'SESSION_ID'
That's fine. The NetworkError suggests the fetch is being blocked — likely because the form submission from the follow-up button triggers before the SESSION_ID is available, or the JS sendMessage is firing but the fetch is failing. Let me check the JS flow more carefully:
{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/static/app.js",
"offset": 113,
"limit": 30
} 113→document.addEventListener("keydown", function (e) {
114→ if (e.key === "Escape") closeModal();
115→});
116→
117→// ── File upload drag & drop ─────────────────────────────────────────
118→
119→(function () {
120→ var dropZone = document.getElementById("file-drop");
121→ var fileInput = document.getElementById("file");
122→ if (!dropZone || !fileInput) return;
123→
124→ dropZone.addEventListener("click", function () {
125→ fileInput.click();
126→ });
127→
128→ dropZone.addEventListener("dragover", function (e) {
129→ e.preventDefault();
130→ dropZone.classList.add("drag-over");
131→ });
132→
133→ dropZone.addEventListener("dragleave", function () {
134→ dropZone.classList.remove("drag-over");
135→ });
136→
137→ dropZone.addEventListener("drop", function (e) {
138→ e.preventDefault();
139→ dropZone.classList.remove("drag-over");
140→ if (e.dataTransfer.files.length) {
141→ fileInput.files = e.dataTransfer.files;
142→ showFileName(e.dataTransfer.files[0].name);{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/static/app.js",
"offset": 178,
"limit": 50
} 178→ btn.disabled = true;
179→ var text = btn.querySelector(".btn-text");
180→ var spinner = btn.querySelector(".btn-spinner");
181→ if (text) text.style.display = "none";
182→ if (spinner) spinner.style.display = "inline-flex";
183→ }
184→});
185→
186→document.addEventListener("htmx:afterRequest", function (e) {
187→ var form = e.detail.elt;
188→ var btn = form.querySelector("button[type=submit]");
189→ if (btn) {
190→ btn.disabled = false;
191→ var text = btn.querySelector(".btn-text");
192→ var spinner = btn.querySelector(".btn-spinner");
193→ if (text) text.style.display = "";
194→ if (spinner) spinner.style.display = "none";
195→ }
196→});
197→
198→// ── Chat ────────────────────────────────────────────────────────────
199→
200→var chatMessages = document.getElementById("chat-messages");
201→var chatInput = document.getElementById("chat-input");
202→var chatForm = document.getElementById("chat-form");
203→var sendBtn = document.getElementById("send-btn");
204→var eventSource = null;
205→
206→function scrollToBottom() {
207→ if (chatMessages) {
208→ chatMessages.scrollTop = chatMessages.scrollHeight;
209→ }
210→}
211→
212→if (chatInput) {
213→ chatInput.addEventListener("input", function () {
214→ this.style.height = "auto";
215→ this.style.height = Math.min(this.scrollHeight, 150) + "px";
216→ });
217→
218→ chatInput.addEventListener("keydown", function (e) {
219→ if (e.key === "Enter" && !e.shiftKey) {
220→ e.preventDefault();
221→ chatForm.dispatchEvent(new Event("submit"));
222→ }
223→ });
224→}
225→
226→function sendMessage(e) {
227→ e.preventDefault();{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/static/app.js",
"offset": 226,
"limit": 45
} 226→function sendMessage(e) {
227→ e.preventDefault();
228→ if (!chatInput) return;
229→ var message = chatInput.value.trim();
230→ if (!message) return;
231→
232→ chatInput.disabled = true;
233→ sendBtn.disabled = true;
234→
235→ // Remove old follow-ups
236→ var old = chatMessages.querySelectorAll(".follow-ups");
237→ old.forEach(function (el) { el.remove(); });
238→
239→ // User bubble
240→ var userDiv = document.createElement("div");
241→ userDiv.className = "message user-message";
242→ userDiv.innerHTML = '<div class="message-content"><p>' + escapeHtml(message) + "</p></div>";
243→ chatMessages.appendChild(userDiv);
244→
245→ // Thinking
246→ var thinkingDiv = document.createElement("div");
247→ thinkingDiv.id = "thinking-indicator";
248→ thinkingDiv.className = "message assistant-message thinking";
249→ thinkingDiv.innerHTML =
250→ '<div class="message-content">' +
251→ '<div class="thinking-dots"><span></span><span></span><span></span></div>' +
252→ "</div>";
253→ chatMessages.appendChild(thinkingDiv);
254→ scrollToBottom();
255→
256→ var formData = new FormData();
257→ formData.append("message", message);
258→
259→ fetch("/chat/" + SESSION_ID + "/send", { method: "POST", body: formData })
260→ .then(function () { listenForResponse(); })
261→ .catch(function (err) {
262→ removeThinking();
263→ appendError("Failed to send: " + err.message);
264→ chatInput.disabled = false;
265→ sendBtn.disabled = false;
266→ });
267→
268→ chatInput.value = "";
269→ chatInput.style.height = "auto";
270→}I see it — sendFollowUp sets chatInput.value then dispatches a submit event on the form. But the sendMessage function reads chatInput.value — the issue is the follow-up button click might be causing a page navigation or the form submission might be double-firing. Actually, looking more carefully, the NetworkError is a Firefox-specific error that typically happens when the page is navigating away during a fetch. The HTMX script might be intercepting the form submit.
Let me check — the chat form uses onsubmit="sendMessage(event)", but HTMX is loaded globally and could be intercepting forms. Also, the follow-up button triggers chatForm.dispatchEvent(new Event("submit")) which fires onsubmit but HTMX might also pick it up.
The real fix: the sendFollowUp function should call sendMessage directly instead of dispatching a form event, and I should add novalidate to prevent HTMX interference. Let me also add the deep logging.
/* ReviewLens AI */
// ── Logging ─────────────────────────────────────────────────────────
var _log = {
info: function () { console.log("[RL]", ...arguments); },
warn: function () { console.warn("[RL]", ...arguments); },
error: function () { console.error("[RL]", ...arguments); },
};
// ── Theme ───────────────────────────────────────────────────────────
(function initTheme() {
var saved = localStorage.getItem("rl-theme");
if (saved === "light") {
document.documentElement.classList.add("light");
} else if (saved === "dark") {
document.documentElement.classList.remove("light");
} else if (window.matchMedia("(prefers-color-scheme: light)").matches) {
document.documentElement.classList.add("light");
}
})();
function toggleTheme() {
var html = document.documentElement;
html.classList.toggle("light");
localStorage.setItem("rl-theme", html.classList.contains("light") ? "light" : "dark");
}
// ── Modal ───────────────────────────────────────────────────────────
function openModal() {
document.getElementById("modal-backdrop").classList.add("open");
document.getElementById("analysis-modal").classList.add("open");
}
function closeModal() {
document.getElementById("modal-backdrop").classList.remove("open");
document.getElementById("analysis-modal").classList.remove("open");
}
function switchTab(btn, tabId) {
var tabs = btn.parentElement.querySelectorAll(".modal-tab");
tabs.forEach(function (t) { t.classList.remove("active"); });
btn.classList.add("active");
var contents = btn.closest(".modal").querySelectorAll(".modal-tab-content");
contents.forEach(function (c) { c.classList.remove("active"); });
document.getElementById(tabId).classList.add("active");
}
document.addEventListener("keydown", function (e) {
if (e.key === "Escape") closeModal();
});
// ── Session menu ────────────────────────────────────────────────────
function toggleSessionMenu(e, btn) {
e.preventDefault();
e.stopPropagation();
var wrap = btn.closest(".session-menu-wrap");
var wasOpen = wrap.classList.contains("open");
document.querySelectorAll(".session-menu-wrap.open").forEach(function (el) {
el.classList.remove("open");
});
if (!wasOpen) wrap.classList.add("open");
}
function archiveSession(e, sessionId) {
e.preventDefault();
e.stopPropagation();
_log.info("Archiving session:", sessionId);
fetch("/api/sessions/" + sessionId, { method: "DELETE" })
.then(function (r) {
if (r.ok) {
var wrap = document.querySelector(".session-menu-wrap.open");
if (wrap) {
var item = wrap.closest(".session-item-wrap");
item.style.transition = "opacity 0.2s, max-height 0.2s";
item.style.opacity = "0";
item.style.maxHeight = item.offsetHeight + "px";
item.style.overflow = "hidden";
setTimeout(function () { item.style.maxHeight = "0"; }, 10);
setTimeout(function () {
item.remove();
if (window.location.pathname.indexOf(sessionId) !== -1) {
window.location.href = "/";
}
}, 220);
}
}
});
}
document.addEventListener("click", function () {
document.querySelectorAll(".session-menu-wrap.open").forEach(function (el) {
el.classList.remove("open");
});
});
document.querySelectorAll(".session-menu-wrap").forEach(function (wrap) {
var leaveTimer = null;
wrap.addEventListener("mouseleave", function () {
leaveTimer = setTimeout(function () { wrap.classList.remove("open"); }, 300);
});
wrap.addEventListener("mouseenter", function () {
if (leaveTimer) { clearTimeout(leaveTimer); leaveTimer = null; }
});
});
// ── File upload drag & drop ─────────────────────────────────────────
(function () {
var dropZone = document.getElementById("file-drop");
var fileInput = document.getElementById("file");
if (!dropZone || !fileInput) return;
dropZone.addEventListener("click", function () { fileInput.click(); });
dropZone.addEventListener("dragover", function (e) {
e.preventDefault();
dropZone.classList.add("drag-over");
});
dropZone.addEventListener("dragleave", function () {
dropZone.classList.remove("drag-over");
});
dropZone.addEventListener("drop", function (e) {
e.preventDefault();
dropZone.classList.remove("drag-over");
if (e.dataTransfer.files.length) {
fileInput.files = e.dataTransfer.files;
showFileName(e.dataTransfer.files[0].name);
}
});
fileInput.addEventListener("change", function () {
if (fileInput.files.length) showFileName(fileInput.files[0].name);
});
function showFileName(name) {
var content = dropZone.querySelector(".file-drop-content");
var selected = dropZone.querySelector(".file-selected");
var nameEl = dropZone.querySelector(".file-name");
content.style.display = "none";
selected.style.display = "flex";
nameEl.textContent = name;
}
})();
function clearFile() {
var dropZone = document.getElementById("file-drop");
var fileInput = document.getElementById("file");
var content = dropZone.querySelector(".file-drop-content");
var selected = dropZone.querySelector(".file-selected");
content.style.display = "";
selected.style.display = "none";
fileInput.value = "";
}
// ── HTMX loading states ─────────────────────────────────────────────
document.addEventListener("htmx:beforeRequest", function (e) {
var form = e.detail.elt;
var btn = form.querySelector("button[type=submit]");
if (btn) {
btn.disabled = true;
var text = btn.querySelector(".btn-text");
var spinner = btn.querySelector(".btn-spinner");
if (text) text.style.display = "none";
if (spinner) spinner.style.display = "inline-flex";
}
});
document.addEventListener("htmx:afterRequest", function (e) {
var form = e.detail.elt;
var btn = form.querySelector("button[type=submit]");
if (btn) {
btn.disabled = false;
var text = btn.querySelector(".btn-text");
var spinner = btn.querySelector(".btn-spinner");
if (text) text.style.display = "";
if (spinner) spinner.style.display = "none";
}
});
// ── Chat ────────────────────────────────────────────────────────────
var chatMessages = document.getElementById("chat-messages");
var chatInput = document.getElementById("chat-input");
var chatForm = document.getElementById("chat-form");
var sendBtn = document.getElementById("send-btn");
var eventSource = null;
var _sending = false;
_log.info("Chat init — SESSION_ID:", typeof SESSION_ID !== "undefined" ? SESSION_ID : "(none)",
"chatForm:", !!chatForm, "chatInput:", !!chatInput);
function scrollToBottom() {
if (chatMessages) chatMessages.scrollTop = chatMessages.scrollHeight;
}
if (chatInput) {
chatInput.addEventListener("input", function () {
this.style.height = "auto";
this.style.height = Math.min(this.scrollHeight, 150) + "px";
});
chatInput.addEventListener("keydown", function (e) {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
_doSend();
}
});
}
// The form's onsubmit calls this
function sendMessage(e) {
if (e) e.preventDefault();
_doSend();
}
function sendFollowUp(btn) {
var question = btn.getAttribute("data-question");
_log.info("Follow-up clicked:", question);
if (chatInput) {
chatInput.value = question;
_doSend();
}
}
function _doSend() {
if (!chatInput || !SESSION_ID) {
_log.error("Cannot send — chatInput:", !!chatInput, "SESSION_ID:", typeof SESSION_ID !== "undefined" ? SESSION_ID : "(undefined)");
return;
}
var message = chatInput.value.trim();
if (!message) {
_log.warn("Empty message, ignoring");
return;
}
if (_sending) {
_log.warn("Already sending, ignoring");
return;
}
_sending = true;
_log.info("Sending message:", message.substring(0, 80));
chatInput.disabled = true;
if (sendBtn) sendBtn.disabled = true;
// Remove old follow-ups
var old = chatMessages.querySelectorAll(".follow-ups");
old.forEach(function (el) { el.remove(); });
// User bubble
var userDiv = document.createElement("div");
userDiv.className = "message user-message";
userDiv.innerHTML = '<div class="message-content"><p>' + escapeHtml(message) + "</p></div>";
chatMessages.appendChild(userDiv);
// Thinking
var thinkingDiv = document.createElement("div");
thinkingDiv.id = "thinking-indicator";
thinkingDiv.className = "message assistant-message thinking";
thinkingDiv.innerHTML =
'<div class="message-content">' +
'<div class="thinking-dots"><span></span><span></span><span></span></div>' +
"</div>";
chatMessages.appendChild(thinkingDiv);
scrollToBottom();
var url = "/chat/" + SESSION_ID + "/send";
var formData = new FormData();
formData.append("message", message);
_log.info("POST", url);
fetch(url, { method: "POST", body: formData })
.then(function (resp) {
_log.info("POST response:", resp.status, resp.statusText);
if (!resp.ok) {
throw new Error("Server returned " + resp.status);
}
listenForResponse();
})
.catch(function (err) {
_log.error("POST failed:", err.name, err.message);
removeThinking();
appendError("Failed to send: " + err.message);
_sending = false;
chatInput.disabled = false;
if (sendBtn) sendBtn.disabled = false;
});
chatInput.value = "";
chatInput.style.height = "auto";
}
function listenForResponse() {
if (eventSource) {
_log.info("Closing existing EventSource");
eventSource.close();
}
var url = "/chat/" + SESSION_ID + "/stream";
_log.info("Opening SSE:", url);
eventSource = new EventSource(url);
eventSource.addEventListener("tool", function (e) {
_log.info("SSE tool event:", e.data);
var thinking = document.getElementById("thinking-indicator");
if (thinking) {
var content = thinking.querySelector(".message-content");
content.innerHTML =
'<div class="thinking-dots"><span></span><span></span><span></span></div>' +
'<div class="tool-activity">' + escapeHtml(e.data) + "</div>";
scrollToBottom();
}
});
eventSource.addEventListener("message", function (e) {
_log.info("SSE message event received (" + e.data.length + " chars)");
removeThinking();
var temp = document.createElement("div");
temp.innerHTML = e.data;
while (temp.firstChild) chatMessages.appendChild(temp.firstChild);
runChartScripts();
scrollToBottom();
_sending = false;
chatInput.disabled = false;
if (sendBtn) sendBtn.disabled = false;
chatInput.focus();
});
eventSource.addEventListener("done", function () {
_log.info("SSE done");
eventSource.close();
eventSource = null;
removeThinking();
_sending = false;
chatInput.disabled = false;
if (sendBtn) sendBtn.disabled = false;
});
eventSource.addEventListener("error", function (e) {
_log.error("SSE error event:", e);
});
eventSource.onerror = function (e) {
_log.error("SSE connection error — readyState:", eventSource.readyState);
eventSource.close();
eventSource = null;
removeThinking();
_sending = false;
chatInput.disabled = false;
if (sendBtn) sendBtn.disabled = false;
};
}
function removeThinking() {
var el = document.getElementById("thinking-indicator");
if (el) el.remove();
}
function appendError(msg) {
var div = document.createElement("div");
div.className = "message assistant-message error";
div.innerHTML = '<div class="message-content"><p>' + escapeHtml(msg) + "</p></div>";
chatMessages.appendChild(div);
scrollToBottom();
}
// ── Chart rendering ─────────────────────────────────────────────────
var CHART_COLORS = [
"rgba(45, 212, 191, 0.75)",
"rgba(245, 158, 11, 0.75)",
"rgba(94, 234, 212, 0.65)",
"rgba(248, 113, 113, 0.7)",
"rgba(129, 140, 248, 0.7)",
"rgba(52, 211, 153, 0.7)",
"rgba(251, 191, 36, 0.7)",
];
var CHART_BORDERS = [
"rgba(45, 212, 191, 1)",
"rgba(245, 158, 11, 1)",
"rgba(94, 234, 212, 1)",
"rgba(248, 113, 113, 1)",
"rgba(129, 140, 248, 1)",
"rgba(52, 211, 153, 1)",
"rgba(251, 191, 36, 1)",
];
function getChartTextColor() {
return document.documentElement.classList.contains("light") ? "#374151" : "#b0bdd0";
}
function getChartGridColor() {
return document.documentElement.classList.contains("light")
? "rgba(0, 0, 0, 0.06)"
: "rgba(107, 125, 153, 0.08)";
}
function renderChart(canvasId, config) {
var canvas = document.getElementById(canvasId);
if (!canvas) return;
var textColor = getChartTextColor();
var gridColor = getChartGridColor();
var datasets = config.data.datasets.map(function (ds, i) {
var isPie = config.type === "pie" || config.type === "doughnut";
var colors = isPie ? CHART_COLORS.slice(0, ds.data.length) : [CHART_COLORS[i % CHART_COLORS.length]];
var borders = isPie ? CHART_BORDERS.slice(0, ds.data.length) : [CHART_BORDERS[i % CHART_BORDERS.length]];
return {
label: ds.label,
data: ds.data,
backgroundColor: colors.length === 1 ? colors[0] : colors,
borderColor: borders.length === 1 ? borders[0] : borders,
borderWidth: config.type === "line" ? 2 : 1,
tension: 0.3,
fill: config.type === "line",
};
});
new Chart(canvas, {
type: config.type,
data: { labels: config.data.labels, datasets: datasets },
options: {
responsive: true,
maintainAspectRatio: true,
plugins: {
title: {
display: true,
text: config.title,
font: { size: 13, weight: "500", family: "'DM Sans', sans-serif" },
color: textColor,
padding: { bottom: 12 },
},
legend: {
labels: { color: textColor, font: { family: "'DM Sans', sans-serif", size: 11 } },
},
},
scales:
config.type !== "pie" && config.type !== "doughnut"
? {
x: {
ticks: { color: textColor, font: { family: "'IBM Plex Mono', monospace", size: 10 } },
grid: { color: gridColor },
},
y: {
ticks: { color: textColor, font: { family: "'IBM Plex Mono', monospace", size: 10 } },
grid: { color: gridColor },
beginAtZero: true,
},
}
: undefined,
},
});
}
function runChartScripts() {
if (!chatMessages) return;
var scripts = chatMessages.querySelectorAll("script");
scripts.forEach(function (script) {
if (script.textContent.indexOf("renderChart") !== -1 && !script.dataset.executed) {
script.dataset.executed = "true";
eval(script.textContent);
}
});
}
// ── Utilities ────────────────────────────────────────────────────────
function escapeHtml(str) {
var div = document.createElement("div");
div.appendChild(document.createTextNode(str));
return div.innerHTML;
}
if (chatMessages) {
var observer = new MutationObserver(scrollToBottom);
observer.observe(chatMessages, { childList: true, subtree: true });
}
scrollToBottom();
// ── Scraping status poll ────────────────────────────────────────────
(function () {
var scrapingView = document.getElementById("scraping-view");
if (!scrapingView || !SESSION_ID) return;
var stepNav = document.getElementById("step-navigating");
var stepIdx = document.getElementById("step-indexing");
setTimeout(function () {
if (stepNav) stepNav.classList.add("active");
}, 3000);
var poller = setInterval(function () {
fetch("/api/status/" + SESSION_ID)
.then(function (r) { return r.json(); })
.then(function (data) {
_log.info("Scrape poll:", data.status);
if (data.status === "ready") {
if (stepNav) stepNav.classList.add("done");
if (stepIdx) { stepIdx.classList.add("active"); stepIdx.classList.add("done"); }
clearInterval(poller);
setTimeout(function () { window.location.reload(); }, 600);
} else if (data.status === "error") {
clearInterval(poller);
window.location.reload();
}
})
.catch(function () {});
}, 3000);
setTimeout(function () { clearInterval(poller); }, 360000);
})();
The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/static/app.js has been updated successfully.
Now I also need to make sure HTMX doesn't intercept the chat form. The chat form has onsubmit but no hx-* attributes, so HTMX shouldn't touch it — but let me add hx-disable to be safe:
{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/templates/app.html",
"offset": 185,
"limit": 12
}185→ <div class="chat-input-area"> 186→ <form id="chat-form" onsubmit="sendMessage(event)"> 187→ <div class="input-row"> 188→ <textarea id="chat-input" name="message" placeholder="Ask about the reviews..." rows="1" autofocus></textarea> 189→ <button type="submit" class="btn-send" id="send-btn"> 190→ <svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"> 191→ <line x1="5" y1="12" x2="19" y2="12"/><polyline points="12 5 19 12 12 19"/> 192→ </svg> 193→ </button> 194→ </div> 195→ </form> 196→ <p class="input-hint">Analyses only the uploaded dataset. No external knowledge.</p>
The form looks fine — no hx-* attrs. But the onsubmit handler returns undefined which means the browser's default form submit could also fire. Let me make the form handler explicit about preventing default:
<form id="chat-form" onsubmit="sendMessage(event)">
<form id="chat-form" onsubmit="sendMessage(event); return false;">
The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/templates/app.html has been updated successfully.
That return false ensures the browser never tries a native form submission (which would navigate away and kill the fetch mid-flight — exactly causing NetworkError).
Changes:
- Root cause fix: return false on the form's onsubmit prevents the browser's native form submission from racing with the fetch() call. The NetworkError was the fetch being aborted by page navigation.
- sendFollowUp now calls _doSend() directly instead of dispatching a form submit event — eliminates the double-fire risk entirely
- All send logic consolidated into _doSend() with a _sending guard to prevent duplicate requests
- Deep logging throughout: [RL] prefix in console for chat init, message sends, POST responses, SSE events, SSE errors with readyState, follow-up clicks, archive actions
Open the browser console and try again — you should see [RL] logs tracing the entire flow.
[RL] Chat init — SESSION_ID: 03ca6b39-4f85-4244-bf34-b52e3df331db chatForm: true chatInput: true app.js:6:31 GET http://localhost:8000/favicon.ico [HTTP/1.1 404 Not Found 2ms]
[RL] Follow-up clicked: What's the overall sentiment breakdown? app.js:6:31 [RL] Sending message: What's the overall sentiment breakdown? app.js:6:31 [RL] POST /chat/03ca6b39-4f85-4244-bf34-b52e3df331db/send app.js:6:31 [RL] POST response: 200 OK app.js:6:31 [RL] Opening SSE: /chat/03ca6b39-4f85-4244-bf34-b52e3df331db/stream app.js:6:31 [RL] SSE message event received (170 chars) app.js:6:31 [RL] SSE done app.js:6:31
I have 40 reviews loaded for Google Maps Bella Napoli Reviews from Google Maps, spanning 2024-08-22 to 2024-12-01.
I can only answer questions grounded in this dataset. What would you like to explore?
What's the overall sentiment breakdown?
Sorry, I encountered an error: tool() got an unexpected keyword argument 'schema'
The @tool decorator from claude-agent-sdk doesn't accept a schema kwarg. Let me check the actual API.
source .venv/bin/activate && python3 -c " from claude_agent_sdk import tool import inspect print(inspect.signature(tool)) print() print(inspect.getsource(tool)) " 2>&1 | head -40
(name: str, description: str, input_schema: type | dict[str, typing.Any], annotations: mcp.types.ToolAnnotations | None = None) -> collections.abc.Callable[[collections.abc.Callable[[typing.Any], collections.abc.Awaitable[dict[str, typing.Any]]]], claude_agent_sdk.SdkMcpTool[typing.Any]]
def tool(
name: str,
description: str,
input_schema: type | dict[str, Any],
annotations: ToolAnnotations | None = None,
) -> Callable[[Callable[[Any], Awaitable[dict[str, Any]]]], SdkMcpTool[Any]]:
"""Decorator for defining MCP tools with type safety.
Creates a tool that can be used with SDK MCP servers. The tool runs
in-process within your Python application, providing better performance
than external MCP servers.
Args:
name: Unique identifier for the tool. This is what Claude will use
to reference the tool in function calls.
description: Human-readable description of what the tool does.
This helps Claude understand when to use the tool.
input_schema: Schema defining the tool's input parameters.
Can be either:
- A dictionary mapping parameter names to types (e.g., {"text": str})
- A TypedDict class for more complex schemas
- A JSON Schema dictionary for full validation
Returns:
A decorator function that wraps the tool implementation and returns
an SdkMcpTool instance ready for use with create_sdk_mcp_server().
Example:
Basic tool with simple schema:
>>> @tool("greet", "Greet a user", {"name": str})
... async def greet(args):
... return {"content": [{"type": "text", "text": f"Hello, {args['name']}!"}]}
Tool with multiple parameters:
>>> @tool("add", "Add two numbers", {"a": float, "b": float})
... async def add_numbers(args):
... result = args["a"] + args["b"]
... return {"content": [{"type": "text", "text": f"Result: {result}"}]}{
"pattern": "@tool\\(|schema=\\{",
"path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/tools.py",
"output_mode": "content",
"-n": true
}28: @tool(
31: schema={
98: @tool(
101: schema={
166: @tool(
169: schema={
237: @tool(
240: schema={
357: @tool(
360: schema={ schema={ input_schema={The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/tools.py has been updated. All occurrences were successfully replaced.
source .venv/bin/activate && python3 -c "from app.tools import create_review_tools_server; print('Import OK')"All five @tool() decorators fixed: schema= → input_schema=. Try the sentiment breakdown again.
Just tested, got error:
There's an issue with the selected model (claude-sonnet-4-6-20250514). It may not exist or you may not have access to it. Run --model to pick a different model.
Set default model to claude-sonnet-4-6
Regardless of that, I have a .env with claude-opus-4-6 set and it doesnt seem to be picking up on that. Look into this please.
{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/agent.py",
"offset": 1,
"limit": 25
} 1→"""Conversational agent loop — invoked per-message, not per-session."""
2→
3→from __future__ import annotations
4→
5→import json
6→import os
7→from typing import Any
8→
9→from claude_agent_sdk import (
10→ AssistantMessage,
11→ ClaudeAgentOptions,
12→ ClaudeSDKClient,
13→ HookMatcher,
14→ ResultMessage,
15→ TextBlock,
16→)
17→
18→from .models import ChatMessage, IngestionSummary
19→from .prompts import build_system_prompt
20→from .tools import create_review_tools_server
21→
22→DEFAULT_MODEL = os.getenv("CLAUDE_MODEL", "claude-sonnet-4-6-20250514")
23→
24→
25→async def handle_message(Same problem as the Firecrawl key — os.getenv runs at import time, before load_dotenv() in main.py. The .env value of CLAUDE_MODEL is never picked up. Also the default model ID is wrong.
DEFAULT_MODEL = os.getenv("CLAUDE_MODEL", "claude-sonnet-4-6-20250514")DEFAULT_MODEL = "claude-sonnet-4-6"
The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/agent.py has been updated successfully.
{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/agent.py",
"offset": 25,
"limit": 15
} 25→async def handle_message(
26→ session_id: str,
27→ user_message: str,
28→ conversation_history: list[ChatMessage],
29→ summary: IngestionSummary,
30→ emit_fn,
31→) -> ChatMessage:
32→ """Run the agent for a single user message. Returns the assistant's response."""
33→
34→ system_prompt = build_system_prompt(summary)
35→
36→ # Build conversation context for the agent
37→ messages_for_context = []
38→ for msg in conversation_history[-20:]: # Last 20 messages for context window
39→ messages_for_context.append(f"{'User' if msg.role == 'user' else 'Assistant'}: {msg.content}") """Run the agent for a single user message. Returns the assistant's response."""
system_prompt = build_system_prompt(summary) """Run the agent for a single user message. Returns the assistant's response."""
model = os.getenv("CLAUDE_MODEL", DEFAULT_MODEL)
system_prompt = build_system_prompt(summary)The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/agent.py has been updated successfully.
{
"pattern": "model=",
"path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/agent.py",
"output_mode": "content",
"-n": true
}model=DEFAULT_MODEL,
model=model,
The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/app/agent.py has been updated successfully.
{
"file_path": "/Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/.env.example"
} 1→# Required
2→ANTHROPIC_API_KEY=sk-ant-...
3→
4→# Optional: model override (default: claude-sonnet-4-6-20250514)
5→# CLAUDE_MODEL=claude-sonnet-4-6-20250514
6→
7→# Optional: Firecrawl for URL scraping (secondary path)
8→# FIRECRAWL_API_KEY=fc-...
9→
10→# Optional: data directory (default: data)
11→# DATA_DIR=data
12→
13→# Optional: ChromaDB directory (default: data/chroma)
14→# CHROMA_DIR=data/chroma
15→# Optional: model override (default: claude-sonnet-4-6-20250514) # CLAUDE_MODEL=claude-sonnet-4-6-20250514
# Optional: model override (default: claude-sonnet-4-6) # CLAUDE_MODEL=claude-sonnet-4-6
The file /Users/shaynelarocque/Documents/GitHub/futuresight/reviewlens/.env.example has been updated successfully.
Two fixes:
- Default model: claude-sonnet-4-6-20250514 → claude-sonnet-4-6
- .env override not loading: os.getenv("CLAUDE_MODEL") was evaluated at import time before load_dotenv() ran. Now reads lazily inside handle_message(), so your .env setting of claude-opus-4-6 will be picked up.