#!/bin/bash

function print_help_and_exit
{
cat >&2 << 'EOF'

'voronota-js-fast-iface-data-graph' script generates interface data graphs of protein complexes.

Options:
    --input                   string  *  input file path or '_list' to read file paths from stdin
    --restrict-input          string     query to restrict input atoms, default is '[]'
    --subselect-contacts      string     query to subselect inter-chain contacts, default is '[]'
    --with-reference          string     input reference complex structure file path, default is ''
    --output-data-prefix      string  *  output data files prefix
    --output-table-file       string     output table file path, default is '_stdout' to print to stdout
    --processors              number     maximum number of processors to run in parallel, default is 1
    --sbatch-parameters       string     sbatch parameters to run in parallel, default is ''
    --stdin-file              string     input file path to replace stdin
    --run-faspr               string     path to FASPR binary to rebuild side-chains
    --coarse-grained                     flag to output a coarse-grained graph
    --input-is-script                    flag to treat input file as vs script
    --as-assembly                        flag to treat input file as biological assembly
    --help | -h                          flag to display help message and exit

Standard output:
    space-separated table of generated file paths
    
Examples:

    voronota-js-fast-iface-data-graph --input model.pdb --output-prefix ./data_graphs/
    
    ls *.pdb | voronota-js-fast-iface-data-graph --input _list --processors 8 --output-prefix ./data_graphs/ | column -t

EOF
exit 1
}

function cat_stdin
{
	STDIN_SOURCE="$1"
	if [ "$STDIN_SOURCE" == "_stream" ]
	then
		cat
	else
		cat "$STDIN_SOURCE"
	fi
}

function get_input_basename
{
	INPUT_MAIN_BASENAME="$(basename $1)"
	INPUT_STDIN_FILE="$2"
	if [ "$INPUT_MAIN_BASENAME" == "_stream" ] || [ "$INPUT_MAIN_BASENAME" == "_list" ]
	then
		if [ "$INPUT_STDIN_FILE" != "_stream" ] 
		then
			basename "$INPUT_STDIN_FILE"
			return 0
		fi
	fi
	echo "$INPUT_MAIN_BASENAME"
}

function substitute_id_in_filename
{
	SUBSTITUTE_BASENAME="$(basename "$1")"
	SUBSTITUTE_TEMPLATE="$2"
	
	echo "$SUBSTITUTE_TEMPLATE" \
	| sed "s|-BASENAME-|${SUBSTITUTE_BASENAME}|"
}

readonly ZEROARG=$0
ALLARGS=("$@")

if [ -z "$1" ]
then
	print_help_and_exit
fi

if [[ $ZEROARG == *"/"* ]]
then
	cd "$(dirname ${ZEROARG})"
	export PATH="$(pwd):${PATH}"
	cd - &> /dev/null
fi

export LC_ALL=C

command -v voronota-js &> /dev/null || { echo >&2 "Error: 'voronota-js' executable not in binaries path"; exit 1; }

INFILE=""
RESTRICT_INPUT="[]"
SUBSELECT_CONTACTS="[]"
WITH_REFERENCE=""
OUTPUT_DATA_PREFIX=""
OUTPUT_TABLE_FILE="_stdout"
MAX_PROCESSORS="1"
SBATCH_PARAMETERS=""
STDIN_FILE="_stream"
RUN_FASPR=""
COARSE_GRAINED="false"
INPUT_IS_SCRIPT="false"
AS_ASSEMBLY="false"
HELP_MODE="false"

while [[ $# > 0 ]]
do
	OPTION="$1"
	OPTARG="$2"
	shift
	case $OPTION in
	--input)
		INFILE="$OPTARG"
		shift
		;;
	--restrict-input)
		RESTRICT_INPUT="$OPTARG"
		shift
		;;
	--subselect-contacts)
		SUBSELECT_CONTACTS="$OPTARG"
		shift
		;;
	--with-reference)
		WITH_REFERENCE="$OPTARG"
		shift
		;;
	--output-data-prefix)
		OUTPUT_DATA_PREFIX="$OPTARG"
		shift
		;;
	--output-table-file)
		OUTPUT_TABLE_FILE="$OPTARG"
		shift
		;;
	--processors)
		MAX_PROCESSORS="$OPTARG"
		shift
		;;
	--sbatch-parameters)
		SBATCH_PARAMETERS="$OPTARG"
		shift
		;;
	--stdin-file)
		STDIN_FILE="$OPTARG"
		shift
		;;
	--run-faspr)
		RUN_FASPR="$OPTARG"
		shift
		;;
	--coarse-grained)
		COARSE_GRAINED="true"
		;;
	--input-is-script)
		INPUT_IS_SCRIPT="true"
		;;
	--as-assembly)
		AS_ASSEMBLY="true"
		;;
	-h|--help)
		HELP_MODE="true"
		;;
	*)
		echo >&2 "Error: invalid command line option '$OPTION'"
		exit 1
		;;
	esac
done

if [ "$HELP_MODE" == "true" ]
then
	print_help_and_exit
fi

if [ -z "$INFILE" ]
then
	echo >&2 "Error: input file not provided"
	exit 1
fi

if [ -z "$OUTPUT_DATA_PREFIX" ]
then
	echo >&2 "Error: output data files prefix not provided"
	exit 1
fi

if [ -n "$WITH_REFERENCE" ] && [ ! -s "$WITH_REFERENCE" ]
then
	echo >&2 "Error: input reference file '$WITH_REFERENCE' does not exist"
	exit 1
fi

if [ -n "$RUN_FASPR" ] && [ ! -s "$RUN_FASPR" ]
then
	echo >&2 "Error: FASPR binary executable file '$RUN_FASPR' does not exist"
	exit 1
fi

if [[ $INFILE == "_scriptline_"* ]]
then
	readonly TMPLDIR=$(mktemp -d)
	trap "rm -r $TMPLDIR" EXIT
	
	echo "$INFILE" | sed 's/^_scriptline_//' | sed 's/_-_/ /g' \
	> "$TMPLDIR/_extracted_script_line"
	
	if [ ! -s "$TMPLDIR/_extracted_script_line" ]
	then
		echo >&2 "Error: no input string line extracted"
		exit 1
	fi
	
	"$ZEROARG" "${ALLARGS[@]}" --input-is-script --input "$TMPLDIR/_extracted_script_line"
	
	exit 0
fi

if [ "$INFILE" != "_list" ] && [ "$INFILE" != "_stream" ] && [ ! -s "$INFILE" ]
then
	echo >&2 "Error: input file '$INFILE' does not exist"
	exit 1
fi

if [ "$STDIN_FILE" != "_stream" ] && [ ! -s "$STDIN_FILE" ]
then
	echo >&2 "Error: stdin replacement file '$STDIN_FILE' does not exist"
	exit 1
fi

if [ "$INFILE" == "_stream" ]
then
	readonly TMPLDIR=$(mktemp -d)
	trap "rm -r $TMPLDIR" EXIT
	
	cat_stdin "$STDIN_FILE" > "$TMPLDIR/_stream"
	
	if [ ! -s "$TMPLDIR/_stream" ]
	then
		echo >&2 "Error: no stdin data"
		exit 1
	fi
	
	"$ZEROARG" "${ALLARGS[@]}" --input "$TMPLDIR/_stream"
	
	exit 0
fi

if [ -z "$MAX_PROCESSORS" ] || [ "$MAX_PROCESSORS" -ne "$MAX_PROCESSORS" ] || [ "$MAX_PROCESSORS" -lt "1" ]
then
	echo >&2 "Error: invalid number of processors '$MAX_PROCESSORS', must be a positive number"
	exit 1
fi

if [ -n "$SBATCH_PARAMETERS" ]
then
	command -v sbatch &> /dev/null || { echo >&2 "Error: 'sbatch' executable not in binaries path"; exit 1; }
	command -v squeue &> /dev/null || { echo >&2 "Error: 'squeue' executable not in binaries path"; exit 1; }
	
	if [ "$INFILE" != "_list" ]
	then
		echo >&2 "Error: sbatch usage requested, but input is not '_list'"
		exit 1
	fi
	
	if [ "$MAX_PROCESSORS" -lt "2" ]
	then
		echo >&2 "Error: sbatch usage requested, but requested number of processors is less than 2"
		exit 1
	fi
fi

if [ "$INFILE" == "_list" ] && [ "$MAX_PROCESSORS" -gt "1" ]
then
	readonly TMPLDIR=$(mktemp -d)
	trap "rm -r $TMPLDIR" EXIT
	
	cat_stdin "$STDIN_FILE" | egrep . | sort | uniq > "$TMPLDIR/input_list"
	
	if [ ! -s "$TMPLDIR/input_list" ]
	then
		echo >&2 "Error: no stdin data"
		exit 1
	fi
	
	NUM_OF_INPUTS="$(cat $TMPLDIR/input_list | wc -l)"
	SIZE_OF_PORTION="$(echo "a=$NUM_OF_INPUTS; b=$MAX_PROCESSORS; if(a%b) a/b+1 else a/b" | bc)"
	
	if [ "$SIZE_OF_PORTION" -gt "19997" ]
	then
		SIZE_OF_PORTION="19997"
	fi
	
	mkdir -p "$TMPLDIR/portions"
	
	split -l "$SIZE_OF_PORTION" "$TMPLDIR/input_list" "$TMPLDIR/portions/portion_"
	
	mkdir -p "$TMPLDIR/children_tables"
	
	if [ -n "$SBATCH_PARAMETERS" ]
	then
		mkdir -p "$TMPLDIR/slurm_logs"
		
		find $TMPLDIR/portions/ -type f -not -empty \
		| awk -v outdir="$TMPLDIR/children_tables" '{print "--stdin-file " $1 " --output-table-file " outdir "/" NR ".txt"}' \
		| xargs -L 1 sbatch -o "$TMPLDIR/slurm_logs/slurmjob-%j.out" -e "$TMPLDIR/slurm_logs/slurmjob-%j.err" $SBATCH_PARAMETERS "$ZEROARG" "${ALLARGS[@]}" --sbatch-parameters '' --processors 1 --input _list \
		| egrep '^Submitted batch job ' \
		| awk '{print $4}' \
		> "$TMPLDIR/slurm_job_ids"
		
		sleep 1
		REMAINING_SLURM_JOBS="$(squeue | grep -f "$TMPLDIR/slurm_job_ids" | wc -l)"
		while [ "$REMAINING_SLURM_JOBS" -gt "0" ]
		do
			sleep 5
			REMAINING_SLURM_JOBS="$(squeue | grep -f "$TMPLDIR/slurm_job_ids" | wc -l)"
		done
		
		find "$TMPLDIR/slurm_logs/" -type f -not -empty | xargs -L 1 cat >&2
	else
		find $TMPLDIR/portions/ -type f -not -empty \
		| awk -v outdir="$TMPLDIR/children_tables" '{print "--stdin-file " $1 " --output-table-file " outdir "/" NR ".txt"}' \
		| xargs -L 1 -P "$MAX_PROCESSORS" "$ZEROARG" "${ALLARGS[@]}" --processors 1 --input _list
	fi
		
	find "$TMPLDIR/children_tables" -type f -not -empty \
	| sort \
	| xargs -L 1 cat \
	| awk '{if(NR==1 || $1!="input_name") print $0}' \
	> "$TMPLDIR/full_output_table"
	
	if [ -n "$OUTPUT_TABLE_FILE" ] && [ "$OUTPUT_TABLE_FILE" != "_stdout" ]
	then
		mkdir -p "$(dirname "$OUTPUT_TABLE_FILE")"
		cat "$TMPLDIR/full_output_table" > "$OUTPUT_TABLE_FILE"
	else
		cat "$TMPLDIR/full_output_table"
	fi
	
	exit 0
fi

INFILE_BASENAME="$(get_input_basename $INFILE $STDIN_FILE)"
OUTPUT_TABLE_FILE="$(substitute_id_in_filename "$INFILE_BASENAME" "$OUTPUT_TABLE_FILE")"

{
cat << EOF
var common_params={}
common_params.input_is_script='$INPUT_IS_SCRIPT';
common_params.input_as_assembly='$AS_ASSEMBLY';
common_params.restrict_input_atoms='$RESTRICT_INPUT';
common_params.contacts_subselection='$SUBSELECT_CONTACTS';
common_params.run_faspr='$RUN_FASPR';
common_params.with_reference='$WITH_REFERENCE';
common_params.coarse_grained='$COARSE_GRAINED';
common_params.output_prefix='$OUTPUT_DATA_PREFIX';
common_params.output_table_file='$OUTPUT_TABLE_FILE';
var input_info_array=[];
EOF

{
if [ "$INFILE" == "_list" ]
then
	cat_stdin "$STDIN_FILE" | egrep . | sort | uniq
else
	echo "$INFILE"
fi
} | while read -r SUBINFILE
do
SUBINFILE_BASENAME="$(get_input_basename $SUBINFILE $STDIN_FILE)"
cat << EOF
input_info_array.push({"input_file": "$SUBINFILE", "input_file_name": "$SUBINFILE_BASENAME"});
EOF
done

cat << 'EOF'
prepare_object=function(input_file, title, run_faspr)
{
	if(input_file===undefined || input_file==="")
	{
		throw ("No input file");
	}
	
	if(title===undefined || title==="")
	{
		throw ("No object title");
	}
	
	voronota_delete_objects("-names", title);
	
	voronota_unpick_objects();
	
	if(common_params.input_is_script=="true")
	{
		voronota_source("-file", input_file);
		voronota_assert_partial_success("Failed when running provided input script");
	}
	else
	{
		voronota_import("-file", input_file, "-as-assembly", common_params.input_as_assembly);
		voronota_assert_partial_success("Failed to import file");
	}
	
	voronota_list_objects();
	voronota_assert_full_success("Failed to list objects");
	var num_of_objects=voronota_last_output().results[0].output.objects.length;
	var loaded_object_title=voronota_last_output().results[0].output.objects[num_of_objects-1].name;
	
	voronota_rename_object("-original", loaded_object_title, "-new", title);
	voronota_assert_full_success("Failed to rename object");
	
	voronota_pick_objects("-names", title);
	voronota_assert_full_success("Failed to pick new object");
	
	voronota_restrict_atoms("-use", common_params.restrict_input_atoms);
	voronota_assert_full_success("Failed to restrict input atoms by the input query");
	
	voronota_restrict_atoms("-use", "[-protein]");
	voronota_assert_full_success("Failed to restrict input atoms to protein only");
	
	if(run_faspr!="")
	{
		voronota_faspr("-lib-file", run_faspr);
		voronota_assert_full_success("Failed to run FASPR");
	}
	
	voronota_select_atoms_close_to_interchain_interface("-name", "actii");
	voronota_assert_full_success("Failed to select interface atoms");
	
	voronota_restrict_atoms("-use", "[actii]");
	voronota_assert_full_success("Failed to restrict input atoms to interface atoms");
	
	voronota_construct_contacts("-calculate-bounding-arcs -calculate-adjacencies -probe 1.4 -skip-sas -skip-same-chain -no-calculate-volumes -no-tag-peripherial");
	voronota_assert_full_success("Failed to construct inter-chain contacts");
	
	voronota_select_contacts("-use", "([-inter-chain] and "+common_params.contacts_subselection+")", "-name", "inter_chain_contacts")
	voronota_assert_full_success("Failed to select inter-chain contacts");
}

generate_model_data_graph_files=function(input_name)
{
	if(input_name===undefined || input_name==="")
	{
		throw ("No input name");
	}
	
	voronota_pick_objects("-names", "model");
	voronota_assert_full_success("Failed to pick model object");
	
	voronota_voromqa_global("-adj-contact-energy", "VE", "-adj-contact-energy-split-to-sas", "VESSn");
	voronota_assert_full_success("Failed to compute VoroMQA-light scores");
	
	voronota_mock_voromqa_local_contacts("-use", "[inter_chain_contacts]", "-adj-contact-energy", "MVE", "-adj-contact-energy-split-to-sas", "MVESSn");
	voronota_assert_full_success("Failed to compute mock VoroMQA-light scores");
	
	voronota_set_adjunct_of_contacts_by_expression("-use", "[inter_chain_contacts]", "-expression", "_multiply", "-input-adjuncts", ["VESSn_a", "area"], "-output-adjunct", "VESSa");
	voronota_assert_full_success("Failed multiply split sas energy adjuncts and areas");
	
	voronota_set_adjunct_of_contacts_by_expression("-use", "[inter_chain_contacts]", "-expression", "_multiply", "-input-adjuncts", ["VESSn_b", "area"], "-output-adjunct", "VESSb");
	voronota_assert_full_success("Failed multiply split sas energy adjuncts and areas");
	
	voronota_set_adjunct_of_contacts_by_expression("-use", "[inter_chain_contacts]", "-expression", "_multiply", "-input-adjuncts", ["MVESSn_a", "area"], "-output-adjunct", "MVESSa");
	voronota_assert_full_success("Failed multiply split sas energy adjuncts and areas");
	
	voronota_set_adjunct_of_contacts_by_expression("-use", "[inter_chain_contacts]", "-expression", "_multiply", "-input-adjuncts", ["MVESSn_b", "area"], "-output-adjunct", "MVESSb");
	voronota_assert_full_success("Failed multiply split sas energy adjuncts and areas");
	
	var adjuncts_to_output=["contact_index", "ir_contact_index", "area", "distance", "adjacency", "boundary", "VE", "VESSa", "VESSb", "MVE", "MVESSa", "MVESSb"];
	
	if(common_params.with_reference)
	{
		voronota_cad_score("-target", "model", "-model", "target", "-t-sel", "[inter_chain_contacts]", "-t-adj-inter-residue", "IRCADn_s0", "-t-adj-inter-residue-relevant-areas", "IRAS");
		voronota_assert_full_success("Failed to compute CAD-score");
		
		voronota_smooth_adjacent_contact_adjunct_values("-use", "[inter_chain_contacts]", "-adj-in", "IRCADn_s0", "-adj-out", "IRCADn_s1", "-iterations 1", "-default-value", 0);
		voronota_assert_full_success("Failed to smooth contact CAD-score values");
		
		voronota_smooth_adjacent_contact_adjunct_values("-use", "[inter_chain_contacts]", "-adj-in", "IRCADn_s0", "-adj-out", "IRCADn_s2", "-iterations 2", "-default-value", 0);
		voronota_assert_full_success("Failed to smooth contact CAD-score values");
		
		voronota_set_adjunct_of_contacts_by_expression("-use", "[inter_chain_contacts]", "-expression", "_multiply", "-input-adjuncts", ["IRCADn_s0", "area"], "-output-adjunct", "IRCADs0");
		voronota_assert_full_success("Failed multiply CAD-score values and areas");
		
		voronota_set_adjunct_of_contacts_by_expression("-use", "[inter_chain_contacts]", "-expression", "_multiply", "-input-adjuncts", ["IRCADn_s1", "area"], "-output-adjunct", "IRCADs1");
		voronota_assert_full_success("Failed multiply CAD-score values and areas");
		
		voronota_set_adjunct_of_contacts_by_expression("-use", "[inter_chain_contacts]", "-expression", "_multiply", "-input-adjuncts", ["IRCADn_s2", "area"], "-output-adjunct", "IRCADs2");
		voronota_assert_full_success("Failed multiply CAD-score values and areas");
		
		voronota_set_adjunct_of_contacts_by_expression("-use", "[inter_chain_contacts]", "-expression", "_divide", "-input-adjuncts", ["IRAS_m", "IRAS_t"], "-output-adjunct", "IRASn");
		voronota_assert_full_success("Failed divide model and target areas areas");
		
		voronota_set_adjunct_of_contacts_by_expression("-use", "[inter_chain_contacts]", "-expression", "_bound", "-input-adjuncts", ["IRASn"], "-parameters", [0, 1], "-output-adjunct", "IRASnb");
		voronota_assert_full_success("Failed divide model and target areas areas");
		
		voronota_set_adjunct_of_contacts_by_expression("-use", "[inter_chain_contacts]", "-expression", "_multiply", "-input-adjuncts", ["IRASnb", "area"], "-output-adjunct", "IRAS");
		voronota_assert_full_success("Failed divide model and target areas areas");
		
		adjuncts_to_output.push("IRCADs0");
		adjuncts_to_output.push("IRCADs1");
		adjuncts_to_output.push("IRCADs2");
		adjuncts_to_output.push("IRAS");
	}
	
	var summary={}
	
	summary.input_name=input_name;
	summary.output_vertices_file=common_params.output_prefix+""+input_name+"_vertices.txt";
	summary.output_edges_file=common_params.output_prefix+""+input_name+"_edges.txt";
	
	voronota_export_adjuncts_of_contacts("-inter-residue", common_params.coarse_grained, "-file", summary.output_vertices_file, "-contacts-use", "[inter_chain_contacts]", "-no-serial", "-adjuncts", adjuncts_to_output, "-adjacency-file", summary.output_edges_file);
	voronota_assert_full_success("Failed to export contacts data");
	
	var summary_table={}
	summary_table.header="";
	summary_table.row="";
	
	Object.keys(summary).forEach(function(key)
	{
			summary_table.header+=key+" ";
	});
	
	Object.keys(summary).forEach(function(key)
	{
		value=summary[key];
		if(typeof value === 'number')
		{
			summary_table.row+=parseFloat(value.toFixed(5))+" ";
		}
		else
		{
			summary_table.row+=value+" ";
		}
	});
	
	summary_table.header=summary_table.header.trim();
	summary_table.row=summary_table.row.trim();
	
	return summary_table;
}

if(input_info_array.length<1)
{
	throw ("No input file paths");
}

if(common_params.input_is_script===undefined || common_params.input_is_script==="")
{
	common_params.input_is_script="false";
}

if(common_params.input_as_assembly===undefined || common_params.input_as_assembly==="")
{
	common_params.input_as_assembly="false";
}

if(common_params.restrict_input_atoms===undefined || common_params.restrict_input_atoms==="")
{
	common_params.restrict_input_atoms='[]';
}

if(common_params.contacts_subselection===undefined || common_params.contacts_subselection==="")
{
	common_params.contacts_subselection='[]';
}

if(common_params.run_faspr===undefined)
{
	common_params.run_faspr="";
}

if(common_params.output_prefix===undefined || common_params.output_prefix==="")
{
	throw ("No output prefix");
}

if(common_params.output_table_file===undefined || common_params.output_table_file==="")
{
	common_params.output_table_file="_stdout";
}

voronota_setup_defaults("-no-load-alt-voromqa-potential -faster-load-voromqa-potentials");

if(common_params.with_reference)
{
	prepare_object(common_params.with_reference, "target", "");
}

if(common_params.output_prefix!=="./")
{
	shell('mkdir -p "$(dirname '+common_params.output_prefix+'file)"');
}

var full_summary_table="";

for(var i=0;i<input_info_array.length;i++)
{
	var subinput=input_info_array[i];
	
	try
	{
		prepare_object(subinput.input_file, "model", common_params.run_faspr);
		
		var summary_table=generate_model_data_graph_files(subinput.input_file_name);
		
		if(full_summary_table=="")
		{
			full_summary_table+=summary_table.header+"\n";
		}
		
		full_summary_table+=summary_table.row+"\n";
	}
	catch(error)
	{
		log("Failed with '"+subinput.input_file_name+"': "+error);
	}
	
	voronota_delete_objects("-names", "model");
}

if(common_params.output_table_file!=="_stdout")
{
	shell('mkdir -p "$(dirname '+common_params.output_table_file+')"');
}

fwrite(common_params.output_table_file, full_summary_table);

EOF

} \
| voronota-js --no-setup-defaults

