Skip to content
Snippets Groups Projects
unicode.pl 3.07 KiB
use strict;
use warnings;
use feature qw(:5.30 signatures);
no warnings qw(experimental::signatures);

use Encode;
use Unicode::UCD qw(charinfo namedseq);

use Irssi;

our $VERSION = 0.1;
our %IRSSI = (
    name        => 'unicode.pl',
    description => 'Look up information on Unicode codepoints.',
    author      => 'Síle Ekaterin Liszka',
    contact     => 'sheila@vulpine.house',
    license     => 'MIT',
);

sub _charinfo($str) {
    my @input = split //, $str;
    my @output = ();
    my $pos = 0;

    for my $char (@input) {
        my $code = ord($char);
        my $codepoint = sprintf('U+' . ($code < 0xffff ? '%04X' : '%06X'), $code);
        my $info = charinfo($codepoint);

		unless (defined($info)) {
			return "$codepoint is not a valid Unicode codepoint.";
		}
		unless (exists($info->{name}) and ($info->{name} ne '')) {
			return "$codepoint is in a PUA or not yet defined.";
		}
		if ($pos == 0) {
			push @output, output($info);
			$pos++;
		} else {
			unless ($info->{combining} > 0) {
				last;
			}
			push @output, output($info);
			$pos++;
		}
    }
    return encode('UTF-8', join('; ', @output));
}
sub output($info) {
    my $fmt = '$codepoint $code $name (XML: &#$decimal; Hex: $hex Oct: $oct)';

    my %data = (
        codepoint => 'U+' . $info->{code},
        code      => chr(hex($info->{code})),
        name      => $info->{name},
        decimal   => hex $info->{code},
        hex       => join(' ', map sprintf('%02x', $_), unpack('U0C*', chr hex $info->{code})),
        oct       => join(' ', map sprintf('%03o', $_), unpack('U0C*', chr hex $info->{code})),
    );

    $fmt =~ s/\$([a-z]+)/$data{$1}/g;

    return $fmt;
}

sub cmd_process($server, $trigger, $str, $nick, $target) {
    if ($trigger eq 'unicode') {
        if ($str =~ /^U\+([0-9A-F]+)$/i) {
            $str = chr(hex($1));
        } else {