From b22e5f225a5d75deeaff1feae4bc6a4773240789 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Brandenburg?= <bbb@mpi-sws.org>
Date: Mon, 8 Aug 2022 14:45:40 +0200
Subject: [PATCH] CI: detect & remove multiline verbatim blocks

---
 scripts/extract-comments.py       | 7 +++++--
 scripts/flag-typos-in-comments.sh | 1 -
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/scripts/extract-comments.py b/scripts/extract-comments.py
index 2b4f854f3..21d396e67 100755
--- a/scripts/extract-comments.py
+++ b/scripts/extract-comments.py
@@ -6,6 +6,7 @@ import os
 import re
 
 INLINE_CODE_RE = re.compile(r'\[[^]]*?\]')
+VERBATIM_RE = re.compile(r'<<.*?>>')
 
 def comment_ranges(src):
     "Identify comments in Coq .v files."
@@ -41,8 +42,10 @@ def process(opts, fname):
     out = sys.stdout
     for (a, b) in comment_ranges(src):
         txt = src[a:b]
-        print(INLINE_CODE_RE.sub('', txt))
-#     out.close()
+        txt = txt.replace('\n', ' ')
+        txt = VERBATIM_RE.sub('', txt)
+        txt = INLINE_CODE_RE.sub('', txt)
+        print(txt)
 
 def parse_args():
     parser = argparse.ArgumentParser(
diff --git a/scripts/flag-typos-in-comments.sh b/scripts/flag-typos-in-comments.sh
index 07ecd96b3..72d3d9a80 100755
--- a/scripts/flag-typos-in-comments.sh
+++ b/scripts/flag-typos-in-comments.sh
@@ -10,7 +10,6 @@ do
 
 	#Here, sed is used to remove verbatim text (enclosed in <<>>)
 	for WORD in $(scripts/extract-comments.py "$SRC" \
-		| sed 's/<<.*>>//g' \
 		| hunspell -p $KNOWN_EXCEPTIONS -l -d en_US \
 		| sort \
 		| uniq)
-- 
GitLab