From 950946762feaeda97b015ec552e4790b9d9c9e1c Mon Sep 17 00:00:00 2001 From: jorts Date: Sun, 31 Aug 2025 11:31:58 +0100 Subject: [PATCH] updated fetch (idk what changed) --- tools/fetch.py | 68 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/tools/fetch.py b/tools/fetch.py index 583f543..9c5c588 100644 --- a/tools/fetch.py +++ b/tools/fetch.py @@ -249,7 +249,7 @@ def is_arctic_shift_api(url: str) -> bool: return 'arctic-shift.photon-reddit.com' in parsed.netloc and '/api/' in parsed.path def archive_arctic_shift_api(url: str) -> str: - """Archive Arctic Shift API results as minimal Markdown""" + """Archive Arctic Shift API results as HTML""" try: headers = {"User-Agent": "Mozilla/5.0 (ArchiveBot/1.0)"} response = requests.get(url, timeout=30, headers=headers) @@ -259,7 +259,7 @@ def archive_arctic_shift_api(url: str) -> str: comments = data.get('data', []) if not comments: - return "# Reddit Comments\n\nNo comments found.\n" + return "

Reddit Comments

No comments found.

" # Extract search info from URL for title parsed_url = urlparse(url) @@ -281,7 +281,7 @@ def archive_arctic_shift_api(url: str) -> str: title = "Comments by " + " • ".join(title_parts) if title_parts else "Reddit Comments" - md_content = f"# {title}\n\n" + html_content = f"

{html.escape(title)}

\n\n" for comment in comments: # Extract comment info @@ -300,9 +300,11 @@ def archive_arctic_shift_api(url: str) -> str: date_obj = datetime.datetime.fromtimestamp(created_utc, tz=datetime.timezone.utc) date_str = date_obj.strftime('%Y-%m-%d %H:%M UTC') - # Format the comment - md_content += f"**{permalink}**\n" - md_content += f"{reddit_url}\n\n" + # Format the comment as HTML + html_content += '
\n' + html_content += f'
\n' + html_content += f' {html.escape(permalink)}\n' + html_content += f'
\n' # User info line user_info = f"u/{author} • {score} points" @@ -310,21 +312,23 @@ def archive_arctic_shift_api(url: str) -> str: user_info += f" • {date_str}" if subreddit: user_info += f" • r/{subreddit}" - md_content += f"{user_info}\n\n" + html_content += f'
{html.escape(user_info)}
\n' # Comment body (handle newlines properly) if body: # Replace \n with actual newlines and clean up clean_body = body.replace('\\n', '\n').strip() - md_content += f"{clean_body}\n\n" + # Convert newlines to HTML line breaks and escape HTML + clean_body_html = html.escape(clean_body).replace('\n', '
\n') + html_content += f'
{clean_body_html}
\n' - md_content += "---\n\n" + html_content += '
\n
\n\n' - return md_content + return html_content except Exception as e: print(f"⚠ Arctic Shift API archiving failed ({e})") - return f"# Error\n\nFailed to archive API response: {e}\n" + return f"

Error

Failed to archive API response: {html.escape(str(e))}

" def convert_ihsoyct_to_api_url(url: str) -> str: """Convert ihsoyct.github.io URL to Arctic Shift API URL""" @@ -373,10 +377,10 @@ def archive(url: str, out_dir: pathlib.Path, force: bool): url = convert_ihsoyct_to_api_url(url) print(f" API URL: {url}") - # Check for API URL and change extension to .md + # Check for API URL and change extension to .html is_api_url = is_arctic_shift_api(url) if is_api_url or is_reddit_search_tool(original_url): - fname = fname.with_suffix('.md') + fname = fname.with_suffix('.html') if fname.exists() and not force: print(f"✓ cached: {original_url}") @@ -389,8 +393,42 @@ def archive(url: str, out_dir: pathlib.Path, force: bool): if is_arctic_shift_api(url): content = archive_arctic_shift_api(url) - # For markdown, just add header and content - final_content = generate_markdown_archive_header(original_url, archive_date) + content + # Enhanced styling with archive header for HTML + archive_style = """ + + """ + final_content = ( + "\n" + + "\n" + + archive_style + "\n" + + generate_archive_header(original_url, archive_date) + "\n" + + content + ) elif is_reddit_url(url): content = archive_reddit(url) # Enhanced styling with archive header for HTML