#!/usr/bin/env perl

# Syntax highlight text using Vim
# simplified version to use it as a monolithic function within lesspipe
#
# This software is copyright (c) 2002-2006 by Geoff Richards.
#
# This software is copyright (c) 2011 by Randy Stauner.
#
# This software is copyright (c) 2021-2024 by Wolfgang Friebel.
#
# This is free software; you can redistribute it and/or modify it under
# the same terms as the Perl 5 programming language system itself.
#
use warnings;
use strict;
use 5.9.3;
use IO::File;
use File::Copy;
use File::Temp;
use Term::ANSIColor qw(color colorvalid);
use IPC::Open3;
use Getopt::Std;

our ($opt_c, $opt_h, $opt_l, $opt_L);
&getopts('chl:L') || usage();
usage() if $opt_h;

# do a clean up if we get a CTRL-C
our ($tdir,$script_fh, $markup_fh);
$SIG{INT} = sub { if ($tdir) {
	close $script_fh if $script_fh;
	close $markup_fh if $markup_fh;
	unlink $tdir; print "\n"; exit 1 }};

$tdir = File::Temp->newdir('/tmp/vimcolorXXXX');
my $defaults = set_defaults();
languages(@ARGV) if $opt_L;

my ($file) = shift;
my $filetype = $opt_l;
my $report_colors = $opt_c;

if (! $file or $file eq '-') {
	$file = "$tdir/inputfile";
	File::Copy::syscopy(\*STDIN, $file);
}

my %ANSI_COLORS = (
	Comment		=> 'blue',
	Constant	=> 'red',
	Identifier	=> 'cyan',
	Statement	=> 'yellow',
	PreProc		=> 'magenta',
	Type		=> 'green',
	Special		=> 'bright_magenta',
	Underlined	=> 'underline',
	Ignore		=> 'bright_white',
	Error		=> 'on_red',
	Todo		=> 'on_cyan',
);

# get colors from the active color scheme
my $cmds = "$tdir/commands";
my $colorfile = "$tdir/colors";
open F, ">$cmds" or die "$!";
print F "redir! >$colorfile|colorscheme|";
print F "hi $_|" for keys %ANSI_COLORS;
print F "q!";
close F;
run($defaults->{vim_command}, $defaults->{vim_options}, '-S', $cmds, $colorfile);
open F, $colorfile or die $!;
my ($key, %attrs);
my %t = map {$_, $_} qw(bold underline reverse italic blink undercurl standout);
$t{undercurl} = 'underline';
$t{standout} = 'reverse';
while (<F>) {
	last if /^unknown/; # skip rest of file, no color support
	next if /^\s*$/;
	$key = $1 if /^(\w+)\s+\w/;
	$attrs{$key} .= ($1 =~ /(\d+)/ ? "ANSI$1 " : "$1 ") if /ctermfg=(\w+)/;
	if (/term=([\w,]+)/) {
		for my $what (split /,/, $1) {
			$attrs{$key} .= "$t{$what} " if exists $t{$what};
		}
	}
}
close F;
my %COLORS = %ANSI_COLORS;
for (keys %ANSI_COLORS) {
	$COLORS{$_} = $attrs{$_} if $attrs{$_} && $attrs{$_} !~ /\bnone\b/i; 
}

# allow the environment to overwrite:

if ($ENV{TEXT_VIMCOLOR_ANSI}) {
	my @ADD = split /\s*[=;]\s*/, $ENV{TEXT_VIMCOLOR_ANSI};
	if (@ADD % 2) {
		warn "### TEXT_VIMCOLOR_ANSI has wrong content: $ENV{TEXT_VIMCOLOR_ANSI}\n";
	} else {
		while (@ADD) {
			my ($k, $v) = splice @ADD, -2;
			$COLORS{$k} = $v;
		}
	}
}

# These extra syntax group are available but linked to the groups above by
# default in vim. They can get their own highlighting (all_syntax_groups => 1).
# Gets activated by setting a color for at least one of these members.
my %SYNTAX_LINKS;

$SYNTAX_LINKS{$_} = 'Constant' for qw(String Character Number Boolean Float);
$SYNTAX_LINKS{$_} = 'Identifier' for qw(Function);
$SYNTAX_LINKS{$_} = 'Statement' for qw(Conditional Repeat Label Operator Keyword Exception);
$SYNTAX_LINKS{$_} = 'PreProc' for qw(Include Define Macro PreCondit);
$SYNTAX_LINKS{$_} = 'Type' for qw(StorageClass Structure Typedef);
$SYNTAX_LINKS{$_} = 'Special' for qw(Tag SpecialChar Delimiter SpecialComment Debug);

my $all_groups = 0;
$all_groups = 1 if grep {defined $COLORS{$_}} keys %SYNTAX_LINKS;
# Copy ansi color for main group to all subgroups.
$COLORS{$_} ||= $COLORS{$SYNTAX_LINKS{$_}} for keys %SYNTAX_LINKS;

my %colors;
my $reset = color('reset');
for (keys (%COLORS)) {
	if (colorvalid($COLORS{$_})) {
		$colors{$_} = color($COLORS{$_});
	} else {
		warn "### invalid color name '$COLORS{$_}' (Term::ANSIColor) ###\n";
		$colors{$_} = $reset;
	}
}
# Build a lookup table to determine if a syntax exists.
my %SYNTAX_TYPE = map {$_, 1} keys %COLORS;

my $syntax = do_markup($defaults, $file, $filetype);

sub set_defaults {
	my $ex = 'vim';
	$ex = 'nvim' if grep{ -e "$_/nvim" } split /:/, $ENV{PATH};
	my @opts = $ex eq 'vim' ? qw(--not-a-term -XZ) : qw(headless);
	return {
		vim_command => $ex,
		vim_options => [@opts, qw(-R -i NONE -u NONE -N -n), "+set nomodeline"],
		all_syntax_groups => $all_groups,
		vim_let => {perl_include_pod => 1, 'b:is_bash' => 1},
	};
}

# Actually run Vim and turn the script's output into a datastructure.
sub do_markup {
	my ($defaults, $file, $filetype) = @_;

	(my $filename = $file) =~s/"/\\"/g;

	# Create a temp file to put the output in.
	my $out_file = File::Temp->new(TEMPLATE => 'vimcXXXX', DIR => '/tmp');
	# Create a temp file for the 'script', which is given to vim
	# with the -s option.
	my $script_file = "$tdir/scriptfile";
	open $script_fh, ">$script_file" or die "$!\n";
	my $markup_file = "$tdir/markupfile";
	open $markup_fh, ">$markup_file" or die "$!\n";
	my $filetype_set = defined $filetype ? ":set filetype=$filetype" : '';
	my $vim_let = $defaults->{vim_let};

	my $markscript= <<'EOF';
set report=1000000
if !strlen(&filetype)
	filetype detect
endif
syn on
new
set modifiable
set paste
set isprint+=9
wincmd p
let s:end = line("$")
let s:lnum = 1
while s:lnum <= s:end
	let s:line = getline(s:lnum)
	let s:len = strlen(s:line)
	let s:new = ""
	let s:col = 1
	while s:col <= s:len
		let s:startcol = s:col " The start column for processing text
		let s:id = synID(s:lnum, s:col, 1)
		let s:col = s:col + 1
		while s:col <= s:len && s:id == synID(s:lnum, s:col, 1) | let s:col = s:col + 1 | endwhile
		let s:id = synIDtrans(s:id)
		let s:name = synIDattr(s:id, 'name')
		let s:new = s:new . '>' . s:name . '>' . substitute(substitute(substitute(strpart(s:line, s:startcol - 1, s:col - s:startcol), '&', '\&a', 'g'), '<', '\&l', 'g'), '>', '\&g', 'g') . '<' . s:name . '<'
		if s:col > s:len
			break
		endif
	endwhile
	exe "normal \<C-W>pa" . strtrans(s:new) . "\n\e\<C-W>p"
	let s:lnum = s:lnum + 1
	+
endwhile
wincmd p
normal dd
EOF
	if ($defaults->{all_syntax_groups}) {
		my %a= (map {$_, 1} ('Normal', keys %ANSI_COLORS, keys %SYNTAX_LINKS));
		printf $markup_fh "hi %-16s ctermfg=7\n", $_ for keys %a;
	}
	print $markup_fh $markscript;
	close $markup_fh;
	my @script_lines = (
		map { "$_\n" }
			# do :edit before :let or the buffer variables may get reset
			":edit $filename",
			(
				map  { ":let $_=$vim_let->{$_}" }
				grep { defined	$vim_let->{$_} }
					keys %$vim_let
			),

			':filetype on',
			$filetype_set,
			":source $markup_file",
			":write! $out_file",
			':qall!',
	);

	print $script_fh @script_lines;
	close $script_fh;

	run(
		$defaults->{vim_command},
		@{$defaults->{vim_options}},
		$filename, ('-s' => "$script_file")
	);

	my $data = do { local $/; <$out_file> };

	# Given an array ref ($syntax), we add a new syntax chunk to it, unescaping
	# the text & making sure that consecutive chunks of the same type are merged.
	my ($syntax, $content, %types);
	$data =~ s/\x0D\x0A?/\n/g;
	$data =~ s/<(.*?)<>\1>//g; # repeating blocks of the same type
	for (keys %colors) {
		$types{$_} = 1 if $data =~ s/>$_>(.*?)<$_</$colors{$_}$1/g; #coloring
	}
	# first line without the reset sequence
	$data =~ s/^>(.*?)>(.*?)<\1</$2/; #no color
	$data =~ s/>(.*?)>(.*?)<\1</$reset$2/g; #no color
	# Unescape ampersands and pointies, restore escape
	my %s=(l=>'<',g=>'>',a=>'&');
	$data =~s/&([lga])/$s{$1}/g;
	$data =~ s/\^\[/\e/g;
	# get start and end of file correct
	$data .= $reset if $data !~ /\n$/;
	print $data if $data;

	return if ! $report_colors;
	print "\n### Report color settings:\n";
	print "$_ ($colors{$_}$COLORS{$_}$reset)\n" for keys %types;
}

# This is a subroutine which runs a program.
# It takes a list of the program name and arguments.
sub run {
	my ($prog, @args) = @_;

	{
		my ($in, $out) = (Symbol::gensym(), Symbol::gensym());
		my $err_fh = Symbol::gensym();

		my $pid = IPC::Open3::open3($in, $out, $err_fh, $prog => @args);

		# close these to avoid any ambiguity that might cause this to block
		# (see also the paragraph about "select" in IPC::Open3)
		close($in);
		close($out);

		# read handle before waitpid to avoid hanging on older systems
		my $errout = do { local $/; <$err_fh> };

		my $gotpid = waitpid($pid, 0);
		die "couldn't run the program '$prog'" if $gotpid == -1;
		my $error = $? >> 8;
		if ($error) {
			$errout =~ s/\n+\z//;
			my $details = $errout eq '' ? '' :
										"\n$prog wrote this error output:\n$errout\n";
			die "$prog returned an error code of '$error'$details";
		}
	}
}

sub languages {
	my ($pat) = @_;
	$pat ||= '';
	my $cmds = "$tdir/commands";
	my $typefile = "$tdir/types";
	open F, ">$cmds" or die "$!";
	print F "redir! >$typefile|echo getcompletion(\'$pat\', 'filetype')|q!";
	close F;
	run($defaults->{vim_command}, $defaults->{vim_options}, '-S', $cmds, $typefile);
	open F, $typefile or die $!;
	$_ = <F>; $_ = <F>;
	s/[\[\]',]//g;
	print "$_\n";
	close F;
	exit;
}

sub usage {
	print <<EOF;
usage: vimcolor [-c] [-l language] [filename]
usage: vimcolor -h
usage: vimcolor -L [pattern]

This program works by running the Vim text editor and getting it to apply its
excellent syntax highlighting (aka 'font-locking') to an input file, and mark
pieces of text according to whether it thinks they are comments, keywords,
strings, etc. The Perl code then reads back this markup and converts it
to text marked with ANSI escape sequences (using Term::ANSIColor)
based on the Vim syntax coloring of the input file.

OPTIONS:
    -c report color settings at the end of the program
    -h print this help and exit
    -l specify the type of file Vim should expect, if not recognized correctly
    -L list supported languages (file types) [that match pattern] and exit

ARGUMENT
    filename  name of the file to colorize. If not given or - then STDIN is used

A color scheme set in vim is respected. You can alter the color scheme using
the TEXT_VIMCOLOR_ANSI environment variable in the format of "SynGroup=color;"
For example:

    TEXT_VIMCOLOR_ANSI='Comment=green;Statement = magenta; '

By default only the following SynGroup syntax groups are used:

    Comment Constant Identifier Statement PreProc Type Special Underlined
    Ignore Error Todo

If you want to benefit from finer-grained syntax highlighting you can
request in TEXT_VIMCOLOR_ANSI additional syntax groups from the list

    Character Number Boolean Float Function Conditional Repeat Label Operator
    Keyword Exception Include Define Macro PreCondit StorageClass Structure
    Typedef Tag SpecialChar Delimiter SpecialComment Debug

and chose appropriate colors.
EOF
	exit;
}
