Merge pull request #19 from websages/refactor

+25

.github/workflows/test.yml

··· 1 + name: Run Tests 2 + 3 + on: 4 + push: 5 + branches: [ "master", "main" ] 6 + pull_request: 7 + branches: [ "master", "main" ] 8 + 9 + jobs: 10 + test: 11 + runs-on: ubuntu-latest 12 + 13 + steps: 14 + - uses: actions/checkout@v4 15 + 16 + - name: Setup Perl environment 17 + uses: shogo82148/actions-setup-perl@v1 18 + with: 19 + perl-version: '5.38' 20 + 21 + - name: Install Dependencies 22 + run: cpanm JSON LWP::Simple Test::More 23 + 24 + - name: Run Tests 25 + run: make test

+3

Makefile

··· 82 82 83 83 tempdir: 84 84 echo $(TMPDIR) 85 + 86 + test: 87 + prove -l t/*.t

+44 -148

htdocs/lib/tumble.pm

··· 5 5 use lsrfsh::MySQL; 6 6 7 7 use DBI; 8 - use POSIX qw( strftime ); 9 - use Time::Local qw( timelocal timegm ); 10 - use Cwd qw( abs_path getcwd ); 11 - use File::Spec; 12 - 13 - use YAML qw( LoadFile ); 14 - 15 8 use strict; 16 9 use warnings; 10 + 11 + use tumble::Content; 12 + use YAML qw( LoadFile ); 13 + use Cwd qw( abs_path getcwd ); 14 + use File::Spec; 17 15 18 16 my $CONFIG = LoadFile( 'config.yaml' ); 19 17 ··· 21 19 sub setup { 22 20 my $self = shift; 23 21 22 + # Ensure STDOUT handles UTF-8 to prevent "Wide character in print" errors 23 + binmode(STDOUT, ':encoding(UTF-8)'); 24 + 24 25 $self->run_modes([qw/ 25 26 displayTumble 26 27 /]); ··· 40 41 } 41 42 42 43 $self->{'dbh'} = lsrfsh::MySQL->new( config => 'config.yaml' ); 44 + $self->{'content_processor'} = tumble::Content->new( config => $CONFIG ); 43 45 44 46 $self->start_mode( 'displayTumble' ); 45 47 ··· 77 79 78 80 my ( $c, $d, $date ); 79 81 80 - foreach my $item ( reverse sort { $a cmp $b } keys %{$data} ) { 81 - my ( $content ); 82 + foreach my $item_id ( reverse sort { $a cmp $b } keys %{$data} ) { 83 + my $item = $data->{$item_id}; 84 + 85 + # Delegate processing to Tumble::Content 86 + # This handles date formatting, twitter/youtube embeds, title truncation etc. 87 + my $processed = $self->{'content_processor'}->process_item( $item ); 88 + 89 + # Update the data hash with processed values 90 + $data->{$item_id} = $processed; 91 + my $formatted_timestamp = $processed->{'timestamp'}; 92 + my $content = $processed->{'content'}; 82 93 83 - if ( 84 - $data->{$item}->{'timestamp'} =~ 85 - /(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})/ 86 - ) { 87 - # Parse the timestamp: $1=year, $2=month, $3=day, $4=hour, $5=minute, $6=second 88 - my ($year, $month, $day, $hour, $minute, $second) = ($1, $2, $3, $4, $5, $6); 89 - 90 - # Convert to epoch time (month is 0-based in timelocal) 91 - # Note: timelocal interprets time as local time based on server timezone 92 - my $epoch = timelocal($second, $minute, $hour, $day, $month - 1, $year - 1900); 93 - 94 - # Format as RFC 822 date (RSS pubDate format) 95 - # Get localtime components for the timestamp 96 - my @lt = localtime($epoch); 97 - 98 - # Calculate timezone offset by comparing GMT and local time 99 - # timegm returns GMT epoch for given local time components 100 - my $gmt_epoch = timegm(@lt); 101 - my $local_epoch = timelocal(@lt); 102 - my $tz_offset_seconds = $local_epoch - $gmt_epoch; 103 - my $tz_offset_minutes = $tz_offset_seconds / 60; 104 - 105 - # Format timezone offset as +HHMM or -HHMM 106 - my $tz_sign = $tz_offset_minutes >= 0 ? '+' : '-'; 107 - my $tz_hours = abs(int($tz_offset_minutes / 60)); 108 - my $tz_mins = abs(int($tz_offset_minutes % 60)); 109 - my $tz_offset = sprintf("%s%02d%02d", $tz_sign, $tz_hours, $tz_mins); 94 + # Date header logic 95 + if ( defined $processed->{_date_components} ) { 96 + my $comps = $processed->{_date_components}; 110 97 111 - $data->{$item}->{'timestamp'} = 112 - POSIX::strftime("%a, %d %b %Y %H:%M:%S $tz_offset", @lt); 113 - 114 - if ( ( !$d ) || ( $3 ne $d ) ) { 115 - $d = $3; 116 - 117 - $date->{'day'} = POSIX::strftime( 118 - "%a", 0, $5, $4, $3, $2 - 1, $1 - 1900 119 - ); 120 - $date->{'mon'} = POSIX::strftime( 121 - "%b", 0, $5, $4, $3, $2 - 1, $1 - 1900 122 - ); 123 - 124 - $c .= $self->wrap( 125 - wrapper => 'tumble_date', 126 - month => $date->{'mon'}, 127 - day => $date->{'day'}, 128 - date => $d 129 - ); 130 - } 131 - } 132 - 133 - for ( $data->{$item}->{'type'} ) { 134 - /ircLink/ && do { 135 - if ( $data->{$item}->{'title'} =~ /^(http:\/\/.*)/ ) { 136 - if ( length( $1 ) > 40 ) { 137 - $data->{$item}->{'title'} = substr( $1, 0, 40 ) . '...'; 138 - } 139 - } 140 - 141 - my $link_filler = $data->{$item}->{'title'}; 142 - 143 - # fall back to normal linking of images if they could be nsfw 144 - if (($data->{$item}->{'content_type'} =~ /image/) and ($data->{$item}->{'user'} !~ /nsfw|otd/)) { 145 - $link_filler = '<img src="' . $data->{$item}->{'url'} . '">'; 146 - } 147 - 148 - if ($data->{$item}->{'url'} =~ /twitter/) { 149 - use LWP::Simple; 150 - use JSON; 151 - my @parts = split('/' , $data->{$item}->{'url'}); 152 - my $id = $parts[-1]; 153 - # This is so URIs like id/photos/1 don't try to call json 154 - next if $id !~ /[0-9]+/; 155 - next if $#parts > 6; 156 - my $tw_uri = "https://api.twitter.com/1/statuses/oembed.json?id=" . $id; 157 - my $tw_j = get( $tw_uri ); 158 - next unless $tw_j; 159 - my $stuff = from_json($tw_j); 160 - $link_filler = $stuff->{'html'}; 161 - } 162 - 163 - # Handle YouTube URLs - extract video ID and create embed 164 - my $is_youtube = 0; 165 - if ($data->{$item}->{'url'} =~ /youtube\.com|youtu\.be/i) { 166 - my $video_id; 167 - my $url = $data->{$item}->{'url'}; 168 - 169 - # Handle various YouTube URL formats (case-insensitive, with or without www/protocol) 170 - if ($url =~ /(?:youtube\.com\/watch\?v=|youtube\.com\/embed\/|youtu\.be\/)([a-zA-Z0-9_-]{11})/i) { 171 - $video_id = $1; 172 - } elsif ($url =~ /youtube\.com\/watch\?.*[&?]v=([a-zA-Z0-9_-]{11})/i) { 173 - $video_id = $1; 174 - } 175 - 176 - if ($video_id) { 177 - # Create responsive YouTube embed (standalone, not wrapped in link) 178 - $content = '<div class="youtube-embed-wrapper">' . 179 - '<iframe width="560" height="315" ' . 180 - 'src="https://www.youtube.com/embed/' . $video_id . '?rel=0" ' . 181 - 'frameborder="0" ' . 182 - 'allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" ' . 183 - 'allowfullscreen></iframe>' . 184 - '</div>'; 185 - $is_youtube = 1; 186 - } 187 - } 188 - 189 - unless ($is_youtube) { 190 - $content = 191 - '<a href="http://' . $CONFIG->{'baseurl'} . 192 - qq{/irclink/?} . 193 - $data->{$item}->{'ircLinkID'} . 194 - qq{">} . 195 - $link_filler . 196 - qq{</a>}; 197 - } 98 + if ( ( !$d ) || ( $comps->{'raw_day'} ne $d ) ) { 99 + $d = $comps->{'raw_day'}; 198 100 199 - }; 101 + $date->{'day'} = $comps->{'day'}; 102 + $date->{'mon'} = $comps->{'mon'}; 200 103 201 - /image/ && do { 202 - $content = 203 - qq{<img src="} . 204 - $data->{$item}->{'url'} . 205 - qq{" alt="image" />}; 206 - }; 207 - 208 - /quote/ && do { 209 - # For quote items, build description text 210 - my $quote_text = $data->{$item}->{'quote'} || ''; 211 - my $author_text = $data->{$item}->{'author'} || ''; 212 - # Build the description text - will be escaped in wrap() function 213 - $content = '"' . $quote_text . '" --' . $author_text; 214 - }; 104 + $c .= $self->wrap( 105 + wrapper => 'tumble_date', 106 + month => $date->{'mon'}, 107 + day => $date->{'day'}, 108 + date => $d 109 + ); 110 + } 215 111 } 216 112 113 + my %template_vars = ( 114 + wrapper => 'tumble_item_' . $processed->{'type'}, 115 + author => $processed->{'user'}, 116 + baseurl => $CONFIG->{'baseurl'}, 117 + %{$processed} 118 + ); 119 + 120 + # Add content or description depending on item type 217 121 # For XML/RSS feeds, wrap HTML content in CDATA sections (for ircLink and image items) 218 122 my $xml_content = $content; 219 - if ( $self->{'arg'}->{'dtype'} =~ /xml|rss/ && defined $content && $content ne '' && $data->{$item}->{'type'} ne 'quote' ) { 123 + if ( $self->{'arg'}->{'dtype'} =~ /xml|rss/ && defined $content && $content ne '' && $processed->{'type'} ne 'quote' ) { 220 124 # Wrap HTML content in CDATA for RSS descriptions (quote already has CDATA) 221 125 $xml_content = '<![CDATA[' . $content . ']]>'; 222 126 } 223 127 224 - my %template_vars = ( 225 - wrapper => 'tumble_item_' . $data->{$item}->{'type'}, 226 - author => $data->{$item}->{'user'}, 227 - baseurl => $CONFIG->{'baseurl'}, 228 - %{$data->{$item}} 229 - ); 230 - 231 - # Add content or description depending on item type 232 - if ( $data->{$item}->{'type'} eq 'quote' ) { 128 + if ( $processed->{'type'} eq 'quote' ) { 233 129 # For quote items, pass description (will be escaped in wrap() function) 234 130 $template_vars{'description'} = $content if defined $content; 235 131 } elsif ( defined $xml_content ) {

+144

htdocs/lib/tumble/Content.pm

··· 1 + package tumble::Content; 2 + 3 + use strict; 4 + use warnings; 5 + 6 + use POSIX qw( strftime ); 7 + use Time::Local qw( timelocal timegm ); 8 + use LWP::Simple qw(get); 9 + use JSON qw(from_json); 10 + 11 + sub new { 12 + my ($class, %args) = @_; 13 + my $self = { 14 + config => $args{config} || {}, 15 + fetcher => $args{fetcher} || \&LWP::Simple::get, 16 + }; 17 + bless $self, $class; 18 + return $self; 19 + } 20 + 21 + sub process_item { 22 + my ($self, $item_in) = @_; 23 + my $item = { %$item_in }; 24 + 25 + $self->_process_dates($item); 26 + $self->_process_content($item); 27 + 28 + return $item; 29 + } 30 + 31 + sub _process_dates { 32 + my ($self, $item) = @_; 33 + if ($item->{timestamp} && $item->{timestamp} =~ /(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})/) { 34 + my ($year, $month, $day, $hour, $minute, $second) = ($1, $2, $3, $4, $5, $6); 35 + 36 + my $epoch = timelocal($second, $minute, $hour, $day, $month - 1, $year - 1900); 37 + my @lt = localtime($epoch); 38 + 39 + my $gmt_epoch = timegm(@lt); 40 + my $local_epoch = timelocal(@lt); 41 + my $tz_offset_seconds = $local_epoch - $gmt_epoch; 42 + my $tz_offset_minutes = $tz_offset_seconds / 60; 43 + 44 + my $tz_sign = $tz_offset_minutes >= 0 ? '+' : '-'; 45 + my $tz_hours = abs(int($tz_offset_minutes / 60)); 46 + my $tz_mins = abs(int($tz_offset_minutes % 60)); 47 + my $tz_offset = sprintf("%s%02d%02d", $tz_sign, $tz_hours, $tz_mins); 48 + 49 + $item->{timestamp} = POSIX::strftime("%a, %d %b %Y %H:%M:%S $tz_offset", @lt); 50 + 51 + # Capture date parts for grouping if needed 52 + $item->{_date_components} = { 53 + day => POSIX::strftime("%a", 0, $5, $4, $3, $2 - 1, $1 - 1900), 54 + mon => POSIX::strftime("%b", 0, $5, $4, $3, $2 - 1, $1 - 1900), 55 + raw_day => $3 56 + }; 57 + } 58 + } 59 + 60 + sub _process_content { 61 + my ($self, $item) = @_; 62 + 63 + if ($item->{type} eq 'ircLink') { 64 + $self->_process_irclink($item); 65 + } elsif ($item->{type} eq 'image') { 66 + $item->{content} = qq{<img src="} . $item->{url} . qq{" alt="image" />}; 67 + } elsif ($item->{type} eq 'quote') { 68 + my $quote_text = $item->{quote} || ''; 69 + my $author_text = $item->{author} || ''; 70 + $item->{content} = '"' . $quote_text . '" --' . $author_text; 71 + } 72 + } 73 + 74 + sub _process_irclink { 75 + my ($self, $item) = @_; 76 + 77 + # Title truncation 78 + if ($item->{title} =~ /^(http:\/\/.*)/) { 79 + if (length($1) > 40) { 80 + $item->{title} = substr($1, 0, 40) . '...'; 81 + } 82 + } 83 + 84 + my $link_filler = $item->{title}; 85 + 86 + # Image content type check 87 + if (($item->{content_type} && $item->{content_type} =~ /image/) && 88 + ($item->{user} && $item->{user} !~ /nsfw|otd/)) { 89 + $link_filler = '<img src="' . $item->{url} . '">'; 90 + } 91 + 92 + my $is_youtube = 0; 93 + 94 + # Twitter 95 + if ($item->{url} && $item->{url} =~ /twitter/) { 96 + my @parts = split('/', $item->{url}); 97 + my $id = $parts[-1]; 98 + # basic check 99 + if ($id =~ /[0-9]+/) { 100 + my $tw_uri = "https://api.twitter.com/1/statuses/oembed.json?id=" . $id; 101 + my $tw_j = $self->{fetcher}->($tw_uri); 102 + if ($tw_j) { 103 + my $stuff = eval { from_json($tw_j) }; 104 + if ($stuff && $stuff->{html}) { 105 + $link_filler = $stuff->{html}; 106 + } 107 + } 108 + } 109 + } 110 + 111 + # YouTube 112 + if ($item->{url} && $item->{url} =~ /youtube\.com|youtu\.be/i) { 113 + my $video_id; 114 + my $url = $item->{url}; 115 + if ($url =~ /(?:youtube\.com\/watch\?v=|youtube\.com\/embed\/|youtu\.be\/)([a-zA-Z0-9_-]{11})/i) { 116 + $video_id = $1; 117 + } elsif ($url =~ /youtube\.com\/watch\?.*[&?]v=([a-zA-Z0-9_-]{11})/i) { 118 + $video_id = $1; 119 + } 120 + 121 + if ($video_id) { 122 + $item->{content} = '<div class="youtube-embed-wrapper">' . 123 + '<iframe width="560" height="315" ' . 124 + 'src="https://www.youtube.com/embed/' . $video_id . '?rel=0" ' . 125 + 'frameborder="0" ' . 126 + 'allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" ' . 127 + 'allowfullscreen></iframe>' . 128 + '</div>'; 129 + $is_youtube = 1; 130 + } 131 + } 132 + 133 + unless ($is_youtube) { 134 + my $baseurl = $self->{config}->{baseurl} || ''; 135 + $item->{content} = '<a href="http://' . $baseurl . 136 + qq{/irclink/?} . 137 + ($item->{ircLinkID} || '') . 138 + qq{">} . 139 + $link_filler . 140 + qq{</a>}; 141 + } 142 + } 143 + 144 + 1;

+2

htdocs/robots.txt

··· 1 + User-agent: * 2 + Disallow:

+86

t/content.t

··· 1 + use strict; 2 + use warnings; 3 + use Test::More; 4 + use FindBin; 5 + use lib "$FindBin::Bin/../htdocs/lib"; 6 + 7 + BEGIN { use_ok('tumble::Content') }; 8 + 9 + # Mock fetcher for Twitter 10 + sub mock_fetcher { 11 + my ($url) = @_; 12 + if ($url =~ /api.twitter.com/) { 13 + return '{"html": "<blockquote>Mock Tweet</blockquote>"}'; 14 + } 15 + return undef; 16 + } 17 + 18 + my $config = { baseurl => 'tumble.example.com' }; 19 + my $processor = tumble::Content->new( 20 + config => $config, 21 + fetcher => \&mock_fetcher 22 + ); 23 + 24 + subtest 'process_item: date formatting' => sub { 25 + my $item = { 26 + timestamp => '2023-10-27 10:00:00', 27 + type => 'text', 28 + title => 'Test Title', 29 + }; 30 + 31 + my $processed = $processor->process_item($item); 32 + 33 + ok($processed->{timestamp}, 'Timestamp converted'); 34 + like($processed->{timestamp}, qr/^\w+, \d+ \w+ \d{4}/, 'Timestamp looks like RFC 822'); 35 + }; 36 + 37 + subtest 'process_item: youtube embed' => sub { 38 + my $item = { 39 + type => 'ircLink', 40 + url => 'https://www.youtube.com/watch?v=dQw4w9WgXcQ', 41 + title => 'Rick Roll', 42 + }; 43 + 44 + my $processed = $processor->process_item($item); 45 + 46 + like($processed->{content}, qr/iframe/, 'Contains iframe for YouTube'); 47 + like($processed->{content}, qr/dQw4w9WgXcQ/, 'Contains video ID'); 48 + }; 49 + 50 + subtest 'process_item: twitter embed' => sub { 51 + my $item = { 52 + type => 'ircLink', 53 + url => 'https://twitter.com/user/status/1234567890', 54 + title => 'Tweet', 55 + }; 56 + 57 + my $processed = $processor->process_item($item); 58 + 59 + # logic: if twitter, content is the oembed html wrapped in a link? 60 + # Original code: $link_filler = $stuff->{'html'}; 61 + # Then: $content = <a ...>$link_filler</a> 62 + # Wait, the original code sets $link_filler. 63 + 64 + # We need to verify that our new module produces the 'content' field similarly. 65 + # But wait, original code constructs the <a href...> wrapper around $link_filler. 66 + # So we expect the content to contain the mocked HTML "<blockquote>Mock Tweet</blockquote>" 67 + 68 + like($processed->{content}, qr/Mock Tweet/, 'Contains mocked tweet content'); 69 + }; 70 + 71 + subtest 'process_item: normal link construction' => sub { 72 + my $item = { 73 + type => 'ircLink', 74 + url => 'http://example.com', 75 + title => 'Example', 76 + ircLinkID => 123, 77 + }; 78 + 79 + my $processed = $processor->process_item($item); 80 + # original: <a href="http://$baseurl/irclink/?$id">$title</a> 81 + 82 + like($processed->{content}, qr/href="http:\/\/tumble.example.com\/irclink\/\?123"/, 'Link constructed with baseurl'); 83 + like($processed->{content}, qr/>Example<\/a>/, 'Link text is title'); 84 + }; 85 + 86 + done_testing();

Configure Feed

Configure Feed