blog and backlinks
This commit is contained in:
47
tmp/clean_articles.py
Normal file
47
tmp/clean_articles.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
def clean_articles(directory):
|
||||
for filename in os.listdir(directory):
|
||||
if filename.endswith(".md"):
|
||||
path = os.path.join(directory, filename)
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
# Remove *Target: ...* line
|
||||
content = re.sub(r"^\*Target:.*?\*[\r\n]*", "", content, flags=re.MULTILINE)
|
||||
|
||||
# Remove footer metadata starting with bolded targets or notes
|
||||
# Usually starts after the last separator --- or near the end
|
||||
# Patterns to remove:
|
||||
# - Word count target: ...
|
||||
# - Internal links to add: ...
|
||||
# - Note: AI-assisted draft ...
|
||||
# - Author bio: ... (We might want to keep author bio, but the user said "draft doesn't look good",
|
||||
# so let's remove the "meta" parts and keep only the content.)
|
||||
|
||||
# Remove specific lines
|
||||
content = re.sub(r"\*\*Word count target:\*\*.*", "", content)
|
||||
content = re.sub(r"\*\*Internal links to add:\*\*.*", "", content)
|
||||
content = re.sub(r"\*\*Author bio:\*\*.*", "", content)
|
||||
content = re.sub(r"\*\*Note:\*\* AI-assisted draft.*", "", content)
|
||||
|
||||
# Also catch these patterns without bold
|
||||
content = re.sub(r"\*Target:.*", "", content)
|
||||
content = re.sub(r"Word count target:.*", "", content)
|
||||
content = re.sub(r"Internal links to add:.*", "", content)
|
||||
content = re.sub(r"Author bio:.*", "", content)
|
||||
content = re.sub(r"Note: AI-assisted draft.*", "", content)
|
||||
content = re.sub(r"Screenshots to include:.*", "", content)
|
||||
|
||||
# Clean up trailing whitespace and empty separators at the end
|
||||
content = content.replace("---", "\n---\n") # Ensure space around separators
|
||||
content = re.sub(r"---[\s\n]*$", "", content) # Remove trailing separators
|
||||
content = content.strip()
|
||||
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
print(f"Cleaned {filename}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
clean_articles(r"c:\Users\a931627\Documents\QRMASTER\articles")
|
||||
Reference in New Issue
Block a user