import os
import re

def clean_articles(directory):
    for filename in os.listdir(directory):
        if filename.endswith(".md"):
            path = os.path.join(directory, filename)
            with open(path, "r", encoding="utf-8") as f:
                content = f.read()

            # Remove *Target: ...* line
            content = re.sub(r"^\*Target:.*?\*[\r\n]*", "", content, flags=re.MULTILINE)
            
            # Remove footer metadata starting with bolded targets or notes
            # Usually starts after the last separator --- or near the end
            # Patterns to remove:
            # - Word count target: ...
            # - Internal links to add: ...
            # - Note: AI-assisted draft ...
            # - Author bio: ... (We might want to keep author bio, but the user said "draft doesn't look good", 
            #   so let's remove the "meta" parts and keep only the content.)
            
            # Remove specific lines
            content = re.sub(r"\*\*Word count target:\*\*.*", "", content)
            content = re.sub(r"\*\*Internal links to add:\*\*.*", "", content)
            content = re.sub(r"\*\*Author bio:\*\*.*", "", content)
            content = re.sub(r"\*\*Note:\*\* AI-assisted draft.*", "", content)
            
            # Also catch these patterns without bold
            content = re.sub(r"\*Target:.*", "", content)
            content = re.sub(r"Word count target:.*", "", content)
            content = re.sub(r"Internal links to add:.*", "", content)
            content = re.sub(r"Author bio:.*", "", content)
            content = re.sub(r"Note: AI-assisted draft.*", "", content)
            content = re.sub(r"Screenshots to include:.*", "", content)

            # Clean up trailing whitespace and empty separators at the end
            content = content.replace("---", "\n---\n") # Ensure space around separators
            content = re.sub(r"---[\s\n]*$", "", content) # Remove trailing separators
            content = content.strip()

            with open(path, "w", encoding="utf-8") as f:
                f.write(content)
            print(f"Cleaned {filename}")

if __name__ == "__main__":
    clean_articles(r"c:\Users\a931627\Documents\QRMASTER\articles")