#!/usr/bin/env python3 """ Gitea Issue Body Parser Extracts structured data from Gitea issue markdown bodies: - Title - Context section - Acceptance criteria (checkboxes) - Labels - Epic/parent references Usage: python3 scripts/gitea_issue_parser.py dict: """Parse a Gitea issue body into structured JSON.""" result = { "title": title, "context": "", "criteria": [], "labels": labels or [], "epic_ref": None, "sections": {}, } if not body: return result # Extract epic/parent reference from title or body epic_pattern = r"#(\d+)" title_refs = re.findall(epic_pattern, title) body_refs = re.findall(epic_pattern, body[:200]) # Check early body refs # Look for "Closes #N" or "Part of #N" or "Epic: #N" close_match = re.search(r"(?:Closes?|Fixes?|Resolves?)\s+#(\d+)", body, re.IGNORECASE) part_match = re.search(r"(?:Part of|Epic|Parent|Blocks?)\s+#(\d+)", body, re.IGNORECASE) if close_match: result["epic_ref"] = f"#{close_match.group(1)}" elif part_match: result["epic_ref"] = f"#{part_match.group(1)}" elif title_refs: result["epic_ref"] = f"#{title_refs[0]}" elif body_refs: result["epic_ref"] = f"#{body_refs[0]}" # Split into sections by ## headers section_pattern = r"^##\s+(.+)$" lines = body.split("\n") current_section = None current_content = [] for line in lines: header_match = re.match(section_pattern, line) if header_match: # Save previous section if current_section: result["sections"][current_section] = "\n".join(current_content).strip() current_section = header_match.group(1).strip().lower() current_content = [] else: current_content.append(line) # Save last section if current_section: result["sections"][current_section] = "\n".join(current_content).strip() # Extract context for key in ["context", "background", "description", "problem"]: if key in result["sections"]: result["context"] = result["sections"][key] break # Extract acceptance criteria (checkboxes) criteria_section = None for key in ["acceptance criteria", "acceptance_criteria", "criteria", "requirements", "definition of done"]: if key in result["sections"]: criteria_section = result["sections"][key] break if criteria_section: checkbox_pattern = r"-\s*\[[ xX]?\]\s*(.+)" for match in re.finditer(checkbox_pattern, criteria_section): result["criteria"].append(match.group(1).strip()) # Also try plain numbered/bulleted lists if no checkboxes found if not result["criteria"]: list_pattern = r"^\s*(?:\d+\.|-|\*)\s+(.+)" for match in re.finditer(list_pattern, criteria_section, re.MULTILINE): result["criteria"].append(match.group(1).strip()) # If no sectioned criteria found, scan whole body for checkboxes if not result["criteria"]: for match in re.finditer(r"-\s*\[[ xX]?\]\s*(.+)", body): result["criteria"].append(match.group(1).strip()) return result def parse_from_url(api_url: str, token: str = None) -> dict: """Parse an issue from a Gitea API URL.""" import urllib.request headers = {} if token: headers["Authorization"] = f"token {token}" req = urllib.request.Request(api_url, headers=headers) resp = json.loads(urllib.request.urlopen(req, timeout=30).read()) title = resp.get("title", "") body = resp.get("body", "") labels = [l["name"] for l in resp.get("labels", [])] return parse_issue_body(body, title, labels) def main(): parser = argparse.ArgumentParser(description="Parse Gitea issue body into structured JSON") parser.add_argument("input", nargs="?", help="Issue body file (or - for stdin)") parser.add_argument("--url", help="Gitea API URL for the issue") parser.add_argument("--stdin", action="store_true", help="Read from stdin") parser.add_argument("--token", help="Gitea API token (or set GITEA_TOKEN env var)") parser.add_argument("--title", default="", help="Issue title (for epic ref extraction)") parser.add_argument("--labels", nargs="*", default=[], help="Issue labels") parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output") args = parser.parse_args() import os token = args.token or os.environ.get("GITEA_TOKEN") if args.url: result = parse_from_url(args.url, token) elif args.stdin or (args.input and args.input == "-"): body = sys.stdin.read() result = parse_issue_body(body, args.title, args.labels) elif args.input: with open(args.input) as f: body = f.read() result = parse_issue_body(body, args.title, args.labels) else: parser.print_help() sys.exit(1) indent = 2 if args.pretty else None print(json.dumps(result, indent=indent)) if __name__ == "__main__": main()