Files
orgweb/main.py
2026-02-16 09:06:28 -07:00

598 lines
22 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import html
import mimetypes
import os
import re
import socketserver
import threading
import webbrowser
from dataclasses import dataclass
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path
from typing import Iterable
from urllib.parse import parse_qs, urlencode, urlparse
@dataclass
class OrgFile:
path: Path
relative_path: str
title: str
file_id: str | None
content: str
ROOT_DIR = Path(__file__).resolve().parent
TEMPLATE_PATH = ROOT_DIR / "templates" / "index.html"
STATIC_DIR = ROOT_DIR / "static"
TITLE_RE = re.compile(r"^\s*#\+TITLE:\s*(.+?)\s*$", flags=re.IGNORECASE)
ID_RE = re.compile(r"^\s*:ID:\s*(\S+)\s*$", flags=re.IGNORECASE)
ORG_LINK_RE = re.compile(r"\[\[([^\]]+)\](?:\[([^\]]*)\])?\]")
URL_RE = re.compile(r"https?://[^\s<>'\"()\[\]]+")
CREATED_LINE_RE = re.compile(r"^\s*(?:#\+)?CREATED:\s*(.+?)\s*$", flags=re.IGNORECASE)
ORG_TIMESTAMP_RE = re.compile(r"[\[<](\d{4})-(\d{2})-(\d{2})(?:\s+\w{3})?(?:\s+(\d{2}):(\d{2}))?[\]>]")
DEFAULT_CONFIG_PATH = ROOT_DIR / "config.yaml"
def scan_org_files(base_dir: Path) -> list[OrgFile]:
"""Recursively find .org files under base_dir and return their contents."""
org_files: list[OrgFile] = []
for root, dirs, files in os.walk(base_dir):
dirs[:] = [d for d in dirs if d not in {".git", ".venv", "venv", "__pycache__"}]
root_path = Path(root)
for filename in files:
if filename.lower().endswith(".org"):
file_path = root_path / filename
try:
content = file_path.read_text(encoding="utf-8")
except UnicodeDecodeError:
content = file_path.read_text(encoding="utf-8", errors="replace")
org_files.append(
OrgFile(
path=file_path,
relative_path=normalize_relative_path(str(file_path.relative_to(base_dir))),
title=extract_org_title(content, file_path.stem),
file_id=extract_org_id(content),
content=content,
)
)
org_files.sort(key=lambda f: f.relative_path.lower())
return org_files
def extract_org_title(content: str, fallback: str) -> str:
for line in content.splitlines():
match = TITLE_RE.match(line)
if match:
return match.group(1).strip()
return fallback
def extract_org_id(content: str) -> str | None:
for line in content.splitlines():
match = ID_RE.match(line)
if match:
return match.group(1).strip()
return None
def normalize_relative_path(path_value: str) -> str:
normalized = os.path.normpath(path_value.replace("\\", "/"))
if normalized == ".":
return ""
return normalized.lstrip("./")
def extract_org_link_targets(content: str) -> list[str]:
targets: list[str] = []
for match in ORG_LINK_RE.finditer(content):
targets.append(match.group(1).strip())
return targets
def resolve_link_target(
source_relative_path: str,
raw_target: str,
known_paths: set[str],
id_to_path: dict[str, str],
) -> str | None:
target = raw_target.strip()
if not target:
return None
if "::" in target:
target = target.split("::", 1)[0]
if "#" in target:
target = target.split("#", 1)[0]
if target.lower().startswith("id:"):
id_value = target[3:].strip().lower()
return id_to_path.get(id_value)
if target.startswith("file:"):
target = target[5:]
elif "://" in target:
return None
elif ":" in target and not target.endswith(".org"):
return None
source_dir = os.path.dirname(source_relative_path)
if target.startswith("/"):
candidate = normalize_relative_path(target.lstrip("/"))
else:
candidate = normalize_relative_path(os.path.join(source_dir, target))
if not candidate:
return None
if candidate in known_paths:
return candidate
if not candidate.endswith(".org"):
with_suffix = f"{candidate}.org"
if with_suffix in known_paths:
return with_suffix
return None
def find_backlinks(org_files: Iterable[OrgFile], selected_path: str) -> list[OrgFile]:
files = list(org_files)
known_paths = {f.relative_path for f in files}
id_to_path = {f.file_id.lower(): f.relative_path for f in files if f.file_id}
backlinks: list[OrgFile] = []
seen_sources: set[str] = set()
for source in files:
if source.relative_path == selected_path:
continue
link_targets = extract_org_link_targets(source.content)
for raw_target in link_targets:
resolved = resolve_link_target(source.relative_path, raw_target, known_paths, id_to_path)
if resolved == selected_path and source.relative_path not in seen_sources:
backlinks.append(source)
seen_sources.add(source.relative_path)
break
backlinks.sort(key=lambda f: (f.title.lower(), f.relative_path.lower()))
return backlinks
def build_backlink_counts(org_files: Iterable[OrgFile]) -> dict[str, int]:
files = list(org_files)
known_paths = {f.relative_path for f in files}
id_to_path = {f.file_id.lower(): f.relative_path for f in files if f.file_id}
counts = {f.relative_path: 0 for f in files}
for source in files:
seen_targets: set[str] = set()
for raw_target in extract_org_link_targets(source.content):
resolved = resolve_link_target(source.relative_path, raw_target, known_paths, id_to_path)
if not resolved or resolved == source.relative_path or resolved in seen_targets:
continue
if resolved in counts:
counts[resolved] += 1
seen_targets.add(resolved)
return counts
def _timestamp_match_to_sort_key(match: re.Match[str]) -> int | None:
try:
year = int(match.group(1))
month = int(match.group(2))
day = int(match.group(3))
hour = int(match.group(4) or "0")
minute = int(match.group(5) or "0")
except ValueError:
return None
return year * 100000000 + month * 1000000 + day * 10000 + hour * 100 + minute
def extract_created_sort_key(content: str) -> int | None:
for line in content.splitlines():
created_match = CREATED_LINE_RE.match(line)
if created_match:
ts_match = ORG_TIMESTAMP_RE.search(created_match.group(1))
if ts_match:
key = _timestamp_match_to_sort_key(ts_match)
if key is not None:
return key
first_ts = ORG_TIMESTAMP_RE.search(content)
if first_ts:
return _timestamp_match_to_sort_key(first_ts)
return None
def note_href(relative_path: str, edit_mode: bool) -> str:
params = {"file": relative_path}
if edit_mode:
params["edit"] = "1"
return "/?" + urlencode(params)
def truncate_label(text: str, max_chars: int = 32) -> str:
if len(text) <= max_chars:
return text
if max_chars <= 3:
return "." * max_chars
return text[: max_chars - 3] + "..."
def render_plain_text_with_links(text: str) -> str:
rendered_parts: list[str] = []
cursor = 0
for match in URL_RE.finditer(text):
start, end = match.span()
if start > cursor:
rendered_parts.append(html.escape(text[cursor:start]))
url = match.group(0)
safe_url = html.escape(url, quote=True)
rendered_parts.append(f"<a class='org-link' href='{safe_url}' target='_blank' rel='noopener noreferrer'>{safe_url}</a>")
cursor = end
if cursor < len(text):
rendered_parts.append(html.escape(text[cursor:]))
return "".join(rendered_parts)
def render_line_with_links(
line: str, source_relative_path: str, known_paths: set[str], id_to_path: dict[str, str]
) -> str:
rendered_parts: list[str] = []
cursor = 0
for match in ORG_LINK_RE.finditer(line):
start, end = match.span()
if start > cursor:
rendered_parts.append(render_plain_text_with_links(line[cursor:start]))
raw_target = match.group(1).strip()
label = (match.group(2) or "").strip()
resolved = resolve_link_target(source_relative_path, raw_target, known_paths, id_to_path)
if resolved:
safe_href = html.escape(note_href(resolved, False), quote=True)
link_text = label if label else raw_target
rendered_parts.append(f"<a class='org-link' href='{safe_href}'>{html.escape(link_text)}</a>")
else:
rendered_parts.append(render_plain_text_with_links(match.group(0)))
cursor = end
if cursor < len(line):
rendered_parts.append(render_plain_text_with_links(line[cursor:]))
return "".join(rendered_parts)
def render_org_to_html(
content: str, source_relative_path: str, known_paths: set[str], id_to_path: dict[str, str]
) -> str:
"""Very small org-ish renderer: headings become section titles, body keeps line breaks."""
html_lines: list[str] = []
for raw_line in content.splitlines():
line = raw_line.rstrip("\n")
stripped = line.lstrip()
if stripped.startswith("*"):
stars = len(stripped) - len(stripped.lstrip("*"))
if stars > 0 and len(stripped) > stars and stripped[stars] == " ":
level = min(stars + 1, 6)
title = html.escape(stripped[stars + 1 :])
html_lines.append(f"<h{level}>{title}</h{level}>")
continue
if not stripped:
html_lines.append("<div class='spacer'></div>")
else:
rendered_line = render_line_with_links(line, source_relative_path, known_paths, id_to_path)
html_lines.append(f"<p>{rendered_line}</p>")
return "\n".join(html_lines)
def find_org_file(org_files: Iterable[OrgFile], relative_path: str | None) -> OrgFile | None:
if relative_path is None:
return None
for org_file in org_files:
if org_file.relative_path == relative_path:
return org_file
return None
def build_index_page(
org_files: Iterable[OrgFile],
selected_path: str | None = None,
edit_mode: bool = False,
status_message: str | None = None,
status_level: str = "success",
) -> str:
org_files = list(org_files)
if not org_files:
body = "<p>No .org files were found in this directory.</p>"
nav = ""
backlinks_html = "<p class='backlinks-empty'>Open a note to see backlinks.</p>"
else:
selected = find_org_file(org_files, selected_path) or org_files[0]
known_paths = {f.relative_path for f in org_files}
id_to_path = {f.file_id.lower(): f.relative_path for f in org_files if f.file_id}
backlinks = find_backlinks(org_files, selected.relative_path)
backlink_counts = build_backlink_counts(org_files)
nav_items = []
for f in org_files:
active = "active" if f.relative_path == selected.relative_path else ""
safe_href = html.escape(note_href(f.relative_path, False), quote=True)
safe_title = html.escape(truncate_label(f.title))
safe_path = html.escape(truncate_label(f.relative_path))
full_title = html.escape(f.title, quote=True)
full_path = html.escape(f.relative_path, quote=True)
searchable = html.escape(f"{f.title} {f.relative_path}".lower(), quote=True)
backlinks_count = backlink_counts.get(f.relative_path, 0)
created_key = extract_created_sort_key(f.content)
created_key_attr = "" if created_key is None else str(created_key)
nav_items.append(
f"<a class='file-link {active}' data-search='{searchable}' "
f"data-backlinks='{backlinks_count}' data-created-ts='{created_key_attr}' href='{safe_href}'>"
f"<span class='file-title' title='{full_title}'>{safe_title}</span>"
f"<span class='file-path' title='{full_path}'>{safe_path}</span>"
"</a>"
)
nav = "\n".join(nav_items)
mode_toggle = (
f"<a class='mode-link' href='{html.escape(note_href(selected.relative_path, False), quote=True)}'>Preview</a>"
if edit_mode
else f"<a class='mode-link' href='{html.escape(note_href(selected.relative_path, True), quote=True)}'>Edit</a>"
)
status_html = ""
if status_message:
safe_message = html.escape(status_message)
safe_level = "error" if status_level == "error" else "success"
status_html = f"<p class='status status-{safe_level}'>{safe_message}</p>"
if backlinks:
backlink_items = []
for source in backlinks:
safe_href = html.escape(note_href(source.relative_path, edit_mode), quote=True)
safe_title = html.escape(truncate_label(source.title))
safe_path = html.escape(truncate_label(source.relative_path))
full_title = html.escape(source.title, quote=True)
full_path = html.escape(source.relative_path, quote=True)
backlink_items.append(
f"<a class='backlink-item' href='{safe_href}'>"
f"<span class='file-title' title='{full_title}'>{safe_title}</span>"
f"<span class='file-path' title='{full_path}'>{safe_path}</span>"
"</a>"
)
backlinks_html = "\n".join(backlink_items)
else:
backlinks_html = "<p class='backlinks-empty'>No notes link to this note yet.</p>"
if edit_mode:
body = (
f"<h2>Editing {html.escape(selected.relative_path)}</h2>"
f"<div class='toolbar'>{mode_toggle}</div>"
f"{status_html}"
"<form class='editor-form' method='post' action='/edit'>"
f"<input type='hidden' name='file' value='{html.escape(selected.relative_path, quote=True)}'>"
f"<textarea class='editor-box' name='content'>{html.escape(selected.content)}</textarea>"
"<button class='submit-btn' type='submit'>Submit</button>"
"</form>"
)
else:
body = (
f"<h2>{html.escape(selected.relative_path)}</h2>"
f"<div class='toolbar'>{mode_toggle}</div>"
f"{status_html}"
f"<article class='org-content'>{render_org_to_html(selected.content, selected.relative_path, known_paths, id_to_path)}</article>"
)
template = TEMPLATE_PATH.read_text(encoding="utf-8")
return (
template.replace("{{NAV_ITEMS}}", nav)
.replace("{{MAIN_CONTENT}}", body)
.replace("{{BACKLINKS}}", backlinks_html)
)
def make_handler(base_dir: Path):
class OrgRequestHandler(BaseHTTPRequestHandler):
def do_GET(self) -> None:
parsed = urlparse(self.path)
if parsed.path.startswith("/static/"):
self.serve_static(parsed.path)
return
if parsed.path != "/":
self.send_error(404, "Not found")
return
params = parse_qs(parsed.query)
selected_path = params.get("file", [None])[0]
edit_mode = params.get("edit", ["0"])[0] == "1"
saved = params.get("saved", ["0"])[0] == "1"
error = params.get("error", [""])[0]
status_message = None
status_level = "success"
if saved:
status_message = "Saved successfully."
elif error == "missing":
status_message = "Select a valid .org file first."
status_level = "error"
elif error == "write":
status_message = "Could not write this file."
status_level = "error"
org_files = scan_org_files(base_dir)
html_page = build_index_page(
org_files,
selected_path=selected_path,
edit_mode=edit_mode,
status_message=status_message,
status_level=status_level,
)
encoded = html_page.encode("utf-8")
self.send_response(200)
self.send_header("Content-Type", "text/html; charset=utf-8")
self.send_header("Content-Length", str(len(encoded)))
self.end_headers()
self.wfile.write(encoded)
def do_POST(self) -> None:
parsed = urlparse(self.path)
if parsed.path != "/edit":
self.send_error(404, "Not found")
return
content_length = int(self.headers.get("Content-Length", "0"))
body = self.rfile.read(content_length).decode("utf-8", errors="replace")
params = parse_qs(body)
selected_path = params.get("file", [None])[0]
new_content = params.get("content", [""])[0]
org_files = scan_org_files(base_dir)
selected = find_org_file(org_files, selected_path)
if selected is None:
self.redirect_with_query("/", {"edit": "1", "error": "missing"})
return
try:
selected.path.write_text(new_content, encoding="utf-8")
except OSError:
self.redirect_with_query("/", {"file": selected.relative_path, "edit": "1", "error": "write"})
return
self.redirect_with_query("/", {"file": selected.relative_path, "edit": "1", "saved": "1"})
def redirect_with_query(self, path: str, params: dict[str, str]) -> None:
location = f"{path}?{urlencode(params)}"
self.send_response(303)
self.send_header("Location", location)
self.send_header("Content-Length", "0")
self.end_headers()
def serve_static(self, request_path: str) -> None:
rel_path = request_path[len("/static/") :]
candidate = (STATIC_DIR / rel_path).resolve()
if STATIC_DIR.resolve() not in candidate.parents and candidate != STATIC_DIR.resolve():
self.send_error(403, "Forbidden")
return
if not candidate.is_file():
self.send_error(404, "Not found")
return
data = candidate.read_bytes()
content_type = mimetypes.guess_type(str(candidate))[0] or "application/octet-stream"
self.send_response(200)
self.send_header("Content-Type", content_type)
self.send_header("Content-Length", str(len(data)))
self.end_headers()
self.wfile.write(data)
def log_message(self, fmt: str, *args) -> None:
return
return OrgRequestHandler
def serve(base_dir: Path, host: str, port: int, open_browser: bool = True) -> None:
handler_cls = make_handler(base_dir)
class ReusableHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
daemon_threads = True
allow_reuse_address = True
server = ReusableHTTPServer((host, port), handler_cls)
actual_port = server.server_address[1]
url = f"http://{host}:{actual_port}/"
print(f"Serving org files from {base_dir}")
print(f"Open {url}")
if open_browser:
threading.Timer(0.4, lambda: webbrowser.open(url)).start()
try:
server.serve_forever()
except KeyboardInterrupt:
print("\nShutting down.")
finally:
server.server_close()
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Serve local .org files in a browser-friendly web view.")
parser.add_argument(
"--config",
default=str(DEFAULT_CONFIG_PATH),
help=f"Path to config YAML file (default: {DEFAULT_CONFIG_PATH})",
)
parser.add_argument("--dir", default="notes", help="Directory to scan for .org files (default: notes)")
parser.add_argument(
"--host",
default=None,
help="Host/IP to bind the web server (overrides config bind_addr)",
)
parser.add_argument(
"--port",
type=int,
default=None,
help="Port to bind (overrides config bind_port)",
)
parser.add_argument("--no-browser", action="store_true", help="Don't auto-open the browser")
return parser.parse_args()
def _strip_quotes(value: str) -> str:
if len(value) >= 2 and ((value[0] == value[-1] == "'") or (value[0] == value[-1] == '"')):
return value[1:-1]
return value
def load_runtime_config(config_path: Path) -> dict[str, str | int]:
config: dict[str, str] = {}
if config_path.exists():
text = config_path.read_text(encoding="utf-8")
for raw_line in text.splitlines():
line = raw_line.strip()
if not line or line.startswith("#"):
continue
if ":" not in line:
continue
key, value = line.split(":", 1)
key = key.strip()
value = value.strip()
if not key:
continue
if "#" in value:
value = value.split("#", 1)[0].strip()
config[key] = _strip_quotes(value)
bind_addr = config.get("bind_addr", "127.0.0.1")
bind_port_raw = config.get("bind_port", "8000")
try:
bind_port = int(bind_port_raw)
except ValueError as exc:
raise ValueError(f"Invalid bind_port in {config_path}: {bind_port_raw!r}") from exc
if not 1 <= bind_port <= 65535:
raise ValueError(f"bind_port out of range in {config_path}: {bind_port}")
return {"bind_addr": bind_addr, "bind_port": bind_port}
def main() -> None:
args = parse_args()
config = load_runtime_config(Path(args.config))
base_dir = Path(args.dir).resolve()
host = args.host if args.host is not None else str(config["bind_addr"])
port = args.port if args.port is not None else int(config["bind_port"])
serve(base_dir, host, port, open_browser=not args.no_browser)
if __name__ == "__main__":
main()