patchodon/patchodon

#!/usr/bin/env python3

import argparse
import hashlib
import html2text
import os
import re
import requests
import sys
import time
from pathlib import Path

# NOTES: html2text: html2text
# the replies are listed by context, should be link-listed to avoid issues,
# should specify next hash to provide some kind of a filter
# visibility public+unlisted, all unlisted, all private, all direct

VERSION = "0.1.0"

DPASTE_URL = "https://dpaste.com"  # TODO any good way to parametrize this?

STATUS_LENGTH_LIMIT = 400  # TODO obtain from instance

html2text.config.IGNORE_ANCHORS = True


def trace(x):
    sys.stderr.write(sys.argv[0] + ": " + x + "\n")


def api_token(args):
    if args.debug_api_token:
        return args.debug_api_token
    if args.env_api_token:
        return os.environ["PATCHODON_API_TOKEN"]
    raise "API token not specified"


def auth_headers(args):
    if not args.instance_url:
        raise "mastodon instance not specified"

    token = api_token(args)

    return {"Authorization": f"Bearer {token}"}


def do_post_status(args, body, parent=None, optional=None):
    if len(body) > STATUS_LENGTH_LIMIT:
        raise "required status body too long"

    st = body + (
        "\n" + optional[0 : (STATUS_LENGTH_LIMIT - len(body) - 1)]
        if optional
        else ""
    )
    data = {"status": st, "visibility": "direct"}  # TODO parametrize direct
    if parent:
        data["in_reply_to_id"] = parent

    r = requests.post(
        args.instance_url + "/api/v1/statuses",
        data=data,
        headers=auth_headers(args),
    )

    if r.status_code != 200:
        raise "mastodon status posting failed ({r.status_code})"

    rj = r.json()
    return (rj["id"], rj["url"])


def do_pastebin_file(file):
    # DPASTE API USE RULES:
    # - user-agent must be set properly
    # - 1 second between requests
    trace(f"sending `{file}' to dpaste...")
    r = requests.post(
        DPASTE_URL + "/api/v2/",
        data={
            "content": Path(file).read_text(),
            "syntax": "diff",
            "title": os.path.basename(file),
            "expiry_days": 1,  # TODO remove after testing
        },
        headers={"User-agent": f"patchodon v{VERSION}"},
    )
    time.sleep(1.1)
    if r.status_code != 201:
        raise f"dpaste POST failed for `{file}'"
    return r.headers["location"] + ".txt"


def split_off_diff(s):
    return s.split("\ndiff --git ")[0]


def mapl(f, xs):
    return list(map(f, xs))


def mayline(s):
    if s:
        return s + "\n"
    else:
        return ""


def do_post(args):
    files = args.patchfile
    if not files:
        trace("reading patchfile series from stdin")
        files = mapl(lambda x: x.rstrip(chars="\n"), sys.stdin.readlines())
    n_patches = len(files)
    hashes = mapl(
        lambda x: hashlib.sha1(Path(x).read_text().encode()).hexdigest(), files
    )
    short_hashes = mapl(lambda x: x[0:8], hashes)
    full_hash = hashlib.sha1(" ".join(hashes).encode()).hexdigest()
    paste_raw_urls = mapl(do_pastebin_file, files)
    trace("posting the header...")
    parent_post_id, url = do_post_status(
        args,
        f"{mayline(args.recipient)}{mayline(args.subject)}"
        f"[patchodon: {full_hash} / {' '.join(short_hashes)}]",
    )
    for fn, pst, hsh, series in zip(
        files, paste_raw_urls, hashes, range(n_patches)
    ):
        trace(f"posting patch {series+1}/{n_patches}...")
        parent_post_id, _ = do_post_status(
            args,
            f"{mayline(args.recipient)}"
            f"[patchodon {series+1}/{n_patches} {hsh}]\n"
            f"{pst}\n",
            parent=parent_post_id,
            optional=split_off_diff(Path(fn).read_text()),
        )
    print(url)


def find_head_post(args):
    r = requests.get(
        args.instance_url + "/api/v2/search",
        headers=auth_headers(args),
        params={"resolve": "true", "limit": "10", "q": args.patch_url},
    )
    if r.status_code != 200:
        raise "status URL search failed!"

    sts = list(
        filter(lambda x: x["url"] == args.patch_url, r.json()["statuses"])
    )

    if len(sts) < 1:
        raise "status URL not found"

    if len(sts) > 1:
        raise "ambiguous status URL?"

    st = sts[0]
    return (st["id"], st["account"]["id"], st["content"])


def get_descendant_statuses(args, parent):
    r = requests.get(
        args.instance_url + f"/api/v1/statuses/{parent}/context",
        headers=auth_headers(args),
    )
    if r.status_code != 200:
        raise f"retrieval of context failed for {parent}"
    rj = r.json()
    return rj["descendants"] if "descendants" in rj else []


re_head = re.compile(
    r"^\[patchodon: ([0-9a-f]{40}) /(( +[0-9a-f]{8})+)\]$", re.MULTILINE
)

re_patch = re.compile(
    r"^\[patchodon ([0-9]+)/([0-9]+) ([0-9a-f]{40})\]"
    r" *\n(https://dpaste.com/[a-zA-Z0-9]+\.txt)$",
    re.MULTILINE,
)


def parse_matching_status(st, parent, account, n, total_n, short_hash):
    if st["in_reply_to_id"] != parent:
        trace(f"wrong reply in status {st['id']}")
        return None
    if st["account"]["id"] != account:
        trace(f"bad account in status {st['id']}")
        return None
    st_content = html2text.html2text(st["content"])
    match = re_patch.search(st_content)
    if not match:
        return None
    gs = match.groups()
    if gs[0] != str(n) or gs[1] != str(total_n):
        trace(f"patch mis-ordered in status {st['id']}")
        return None
    long_hash = gs[2]
    if long_hash[0:8] != short_hash:
        trace(f"patch hash mismatch in status {st['id']}")
        return None
    url = gs[3]
    r = requests.get(url)
    if r.status_code != 200:
        trace(f"could not get patch from status {st['id']} via {url}")
        return None
    if long_hash != hashlib.sha1(r.text.encode()).hexdigest():
        trace(f"patch hash differs from file in status {st['id']}")
        return None
    return (st["id"], r.text)


def do_get(args):
    st_id, st_acct_id, st_content_html = find_head_post(args)
    st_content = html2text.html2text(st_content_html)
    # parse out the hash and subhashes
    match = re_head.search(st_content)
    if not match:
        raise "no patchodon header found"
    full_hash = match.groups()[0]
    short_hashes = list(
        filter(lambda x: len(x) > 0, match.groups()[1].split(" "))
    )
    patches = [None for _ in short_hashes]
    n_patches = len(patches)
    assert n_patches > 0
    parent = st_id
    for i, short_hash in enumerate(short_hashes):
        trace(f"getting patch {i+1} ({short_hash})...")
        # get context, all replies from the same author as the original status ID, subhashes must match
        sts = get_descendant_statuses(args, parent)
        ok_sts = list(
            filter(
                lambda x: x != None,
                map(
                    lambda x: parse_matching_status(
                        x, parent, st_acct_id, i + 1, n_patches, short_hash
                    ),
                    sts,
                ),
            )
        )
        if len(ok_sts) == 0:
            raise f"no suitable patches found for {i+1} ({short_hash})"
        if len(ok_sts) > 1:
            raise f"ambiguous statuses for patch {i+1} ({short_hash})"
        ok_st_id, ok_st_patch = ok_sts[0]
        parent = ok_st_id
        patches[i] = ok_st_patch

    # verify the full hash
    hashes = list(map(lambda x: hashlib.sha1(x.encode()).hexdigest(), patches))
    computed_full_hash = hashlib.sha1(" ".join(hashes).encode()).hexdigest()
    if computed_full_hash != full_hash:
        raise "hash checksums do not match!"

    # print out stuff
    if args.out_prefix:
        for i, patch in enumerate(patches):
            path = args.out_prefix + f"{i+1:04d}.patch"
            if not args.overwrite and os.path.exists(path):
                raise f"refusing to overwrite {path}"
            Path(path).write_text(patch)
    else:
        for patch in patches:
            sys.stdout.write(patch)
            sys.stdout.write("\n")  # be nice


def main():
    ap = argparse.ArgumentParser(
        prog=sys.argv[0],
        epilog="patchodon.py version " + VERSION + " is a free software.",
        description="Publicly send and receive git patch series via Mastodon.",
    )

    if "API token sources":
        group = ap.add_mutually_exclusive_group()
        group.add_argument(
            "--debug-api-token",
            help=(
                "specify the API token on command line (not very secure,"
                " good for debugging only)"
            ),
        )
        group.add_argument(
            "-e",
            "--env-api-token",
            action="store_true",
            help="get the API token from environment PATCHODON_API_TOKEN",
        )

    ap.add_argument(
        "-i",
        "--instance-url",
        help=(
            "mastodon instance URL to use, such as `https://mastodon.example/'"
        ),
    )

    cmds = ap.add_subparsers(required=True, dest="command")

    if "POST command":
        post = cmds.add_parser("post")
        post.add_argument(
            "-r",
            "--recipient",
            default=None,
            help=(
                "user tag to prepend to all posted statuses (required esp. for"
                " direct sending of statuses)"
            ),
        )
        post.add_argument(
            "-s",
            "--subject",
            default=None,
            help=(
                "opening text of the initial post, ideally used to specify the"
                " target project and patch topic"
            ),
        )
        post.add_argument(
            "patchfile",
            nargs="*",
            help=(
                "filenames of the patch series; taken from stdin if none are"
                " specified (useful for piping the output of git-format-patch"
                " into patchodon)"
            ),
        )

    if "GET command":
        get = cmds.add_parser("get")
        get.add_argument(
            "patch_url",
            help=(
                "root URL of the status where the patch was posted (the status"
                " should contain the patch hash)"
            ),
        )
        get.add_argument(
            "-C",
            "--out-prefix",
            help=(
                "instead of writing to stdout (for piping to git-am), write"
                " the numbered patchfiles to files with a given prefix"
                " (specifying `./patchodon-' will produce files like"
                " `./patchodon-0001.patch')"
            ),
        )
        get.add_argument(
            "--overwrite",
            action="store_true",
            help="overwrite existing patch files instead of failing",
        )

    ap.add_argument(
        "-c",
        "--config",
        default=os.environ["HOME"] + "/.patchodon.ini",
        help=(
            "specify a custom config INI file that may specify a section"
            " [patchodon] with keys instance_url and api_token; defaults to"
            " `$HOME/.patchodon.ini', specify `/dev/null' to avoid config"
            " loading"
        ),
    )
    args = ap.parse_args()

    # TODO patch args from config (if found)

    if args.command == "post":
        do_post(args)
    elif args.command == "get":
        do_get(args)
    else:
        raise ("fatal: args borked")


if __name__ == "__main__":
    main()