package NewsExtractor::SiteSpecificExtractor::www_setn_com; use utf8; use Moo; extends 'NewsExtractor::GenericExtractor'; use Importer 'NewsExtractor::TextUtil' => 'normalize_whitespace'; sub journalist { my ($self) = @_; my $content_text = $self->content_text; my @patterns = ( qr{\b記者\s*([\p{Letter}ã€]+?)\s*ï¼\s*(?:\p{Letter}+?)å ±å°Ž\b}, qr{\bæ–‡ï¼([\p{Letter}ã€]+)\b}, qr{\b (?:三立準氣象 | \p{Letter}{2} ä¸å¿ƒ) ï¼ (\p{Letter}+?) å ±å°Ž\b}x, qr{\bå¥åº·é†«ç™‚網記者(\p{Letter}+?)/å ±å°Ž\n}x, qr{\b助ç†ç·¨è¼¯ï¼(\p{Letter}+?)\n}x ); my $name; for my $pat (@patterns) { ($name) = $content_text =~ $pat; last if defined $name; } return $name && normalize_whitespace($name); } 1;