#!/usr/bin/perl # use strict; use warnings; use utf8; use Encode; use FileHandle; use Text::CSV_XS; use PerlIO::encoding; use Encode qw(:fallbacks); use HTML::Entities; die "please specify the name of file for input.\n" if ($#ARGV < 0); die "please specify the encoding type.\n" if ($#ARGV < 1); my $charCodeOfCSV = 'cp932'; $charCodeOfCSV = $ARGV[1] if $#ARGV >= 1; my $fileName = $ARGV[0]; open my $fh, "<:encoding($charCodeOfCSV)", $fileName || die "error, Cannot open $fileName¥n"; my $csv = Text::CSV_XS->new({ binary => 1, eol => $/ }); #日本語を読み込むときはbinaryを1にする my ($i, $numberOfLinked) = (0, 0); my %linkedHash; LOOP_OF_GETLINE: while (my $row = $csv->getline($fh)) { my @fields = @$row; next if ($i == 0 && $fields[3] eq 'level'); $i++; print STDERR "[$i/$numberOfLinked]\r" unless ($i % 2000); my ($linked) = $fields[1] =~ /<→(.*?)>/; if (defined($linked)) { $numberOfLinked++; if (defined $linkedHash{$linked}) { $linkedHash{$linked}++; } else { $linkedHash{$linked} = 1; } } } print STDERR encode('shift_jis', "\nnumber of Linked: $numberOfLinked\n"); foreach my $w (keys %linkedHash) { print encode('utf-8', "$w\n"); } close $fh;