import os import re def clean_articles(directory): for filename in os.listdir(directory): if filename.endswith(".md"): path = os.path.join(directory, filename) with open(path, "r", encoding="utf-8") as f: content = f.read() # Remove *Target: ...* line content = re.sub(r"^\*Target:.*?\*[\r\n]*", "", content, flags=re.MULTILINE) # Remove footer metadata starting with bolded targets or notes # Usually starts after the last separator --- or near the end # Patterns to remove: # - Word count target: ... # - Internal links to add: ... # - Note: AI-assisted draft ... # - Author bio: ... (We might want to keep author bio, but the user said "draft doesn't look good", # so let's remove the "meta" parts and keep only the content.) # Remove specific lines content = re.sub(r"\*\*Word count target:\*\*.*", "", content) content = re.sub(r"\*\*Internal links to add:\*\*.*", "", content) content = re.sub(r"\*\*Author bio:\*\*.*", "", content) content = re.sub(r"\*\*Note:\*\* AI-assisted draft.*", "", content) # Also catch these patterns without bold content = re.sub(r"\*Target:.*", "", content) content = re.sub(r"Word count target:.*", "", content) content = re.sub(r"Internal links to add:.*", "", content) content = re.sub(r"Author bio:.*", "", content) content = re.sub(r"Note: AI-assisted draft.*", "", content) content = re.sub(r"Screenshots to include:.*", "", content) # Clean up trailing whitespace and empty separators at the end content = content.replace("---", "\n---\n") # Ensure space around separators content = re.sub(r"---[\s\n]*$", "", content) # Remove trailing separators content = content.strip() with open(path, "w", encoding="utf-8") as f: f.write(content) print(f"Cleaned {filename}") if __name__ == "__main__": clean_articles(r"c:\Users\a931627\Documents\QRMASTER\articles")