Fetches each feed in an OPML file and checks for the when it was last updated. Useful for finding discontinued or inactive feeds.
0
stale-feed-checker.sh
72 lines 2.1 kB view raw
1#!/usr/bin/env bash 2 3OPML_FILE="" 4DAYS=90 5 6usage() { 7 echo "Usage: $0 -f <opml_file> [-d <days>]" 8 echo "Example: $0 -f subscriptions.opml -d 180" 9 exit 1 10} 11 12while getopts "f:d:h" opt; do 13 case ${opt} in 14 f ) OPML_FILE=$OPTARG ;; 15 d ) DAYS=$OPTARG ;; 16 h ) usage ;; 17 * ) usage ;; 18 esac 19done 20 21if [[ -z "$OPML_FILE" || ! -f "$OPML_FILE" ]]; then 22 echo "Error: Please provide a valid OPML file." 23 usage 24fi 25 26NOW=$(date +%s) 27LIMIT_SEC=$((DAYS * 86400)) 28 29echo "Analysing feeds in $OPML_FILE (flagging older than $DAYS days)..." 30echo "------------------------------------------------------------------" 31 32feed_urls=$(tr '><' '\n\n' < "$OPML_FILE" | grep -Eiho 'xmlUrl="[^"]+"' | cut -d'"' -f2) 33 34for url in $feed_urls; do 35 # Fetch feed, handle compression, and extract the first date-like element. 36 # grep -m 1 stops parsing after the first match, terminating the curl download early. 37 date_str=$(curl -sL --compressed --max-time 10 "$url" 2>/dev/null | \ 38 grep -Eio -m 1 '<(pubDate|updated|lastBuildDate|published)>[^<]+|"date_published":"[^"]+|"date_modified":"[^"]+' | \ 39 head -n 1 | \ 40 sed -E 's/<[^>]+>|"date_(published|modified)":"//g' | tr -d '\r\n') 41 42 if [[ -z "$date_str" ]]; then 43 echo "[UNKNOWN] $url - No parsable date found (or feed is unreachable)." 44 continue 45 fi 46 47 feed_ts="" 48 49 # Try GNU date 50 if date -d "$date_str" +%s >/dev/null 2>&1; then 51 feed_ts=$(date -d "$date_str" +%s) 52 # Try gdate (macOS if GNU coreutils is installed) 53 elif command -v gdate >/dev/null 2>&1 && gdate -d "$date_str" +%s >/dev/null 2>&1; then 54 feed_ts=$(gdate -d "$date_str" +%s) 55 fi 56 57 if [[ -z "$feed_ts" ]]; then 58 echo "[PARSE ERROR] $url - Found date but couldn't parse format: '$date_str'" 59 continue 60 fi 61 62 diff=$((NOW - feed_ts)) 63 64 if (( diff > LIMIT_SEC )); then 65 days_old=$((diff / 86400)) 66 echo "[STALE] $url - $days_old days old (Last updated: $date_str)" 67 fi 68 69done 70 71echo "------------------------------------------------------------------" 72echo "Finished."