48 lines
2.2 KiB
Python
48 lines
2.2 KiB
Python
import os
|
|
import re
|
|
|
|
def clean_articles(directory):
|
|
for filename in os.listdir(directory):
|
|
if filename.endswith(".md"):
|
|
path = os.path.join(directory, filename)
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
# Remove *Target: ...* line
|
|
content = re.sub(r"^\*Target:.*?\*[\r\n]*", "", content, flags=re.MULTILINE)
|
|
|
|
# Remove footer metadata starting with bolded targets or notes
|
|
# Usually starts after the last separator --- or near the end
|
|
# Patterns to remove:
|
|
# - Word count target: ...
|
|
# - Internal links to add: ...
|
|
# - Note: AI-assisted draft ...
|
|
# - Author bio: ... (We might want to keep author bio, but the user said "draft doesn't look good",
|
|
# so let's remove the "meta" parts and keep only the content.)
|
|
|
|
# Remove specific lines
|
|
content = re.sub(r"\*\*Word count target:\*\*.*", "", content)
|
|
content = re.sub(r"\*\*Internal links to add:\*\*.*", "", content)
|
|
content = re.sub(r"\*\*Author bio:\*\*.*", "", content)
|
|
content = re.sub(r"\*\*Note:\*\* AI-assisted draft.*", "", content)
|
|
|
|
# Also catch these patterns without bold
|
|
content = re.sub(r"\*Target:.*", "", content)
|
|
content = re.sub(r"Word count target:.*", "", content)
|
|
content = re.sub(r"Internal links to add:.*", "", content)
|
|
content = re.sub(r"Author bio:.*", "", content)
|
|
content = re.sub(r"Note: AI-assisted draft.*", "", content)
|
|
content = re.sub(r"Screenshots to include:.*", "", content)
|
|
|
|
# Clean up trailing whitespace and empty separators at the end
|
|
content = content.replace("---", "\n---\n") # Ensure space around separators
|
|
content = re.sub(r"---[\s\n]*$", "", content) # Remove trailing separators
|
|
content = content.strip()
|
|
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
print(f"Cleaned {filename}")
|
|
|
|
if __name__ == "__main__":
|
|
clean_articles(r"c:\Users\a931627\Documents\QRMASTER\articles")
|