#!/bin/busybox sh

#
# (c) 2014 freetz.org
#

GETOPT="busybox getopt"
GREP="busybox grep"
SED="busybox sed"
TR="busybox tr"
DOS2UNIX="$TR -d $'\15\32'" # dos2unix is not available by default

SELF=$(basename "$0")

usage() {
	local to_usage= to_help=
	case "${SELF}" in
		*2tsv|*2csv|*2xml)
			;;
		*)
			to_usage=" <-t|--to tsv|csv|xml>"
			to_help=$'\n'"    -t|--to tsv|csv|xml                        output format, default: tsv"
			;;
	esac
cat << EOF
Usage: ${SELF} [-h|--help] [-b|--book <active|bookid1,bookid2,...|all>] [-m|--method <pbd|cat>|-]${to_usage}
    -h|--help                                  print this help and exit
    -b|--book active|bookid1,bookid2,...|all   phonebooks to export, default: $(default_book)
    -e|--encoding                              include encoding in the header line (tsv,csv only), default: no encoding included
    -i|--internal                              show internal contacts (tsv,csv only), default: do not show them
    -a|--anonymize                             apply a simple anonymization algorithm to the data, default: disabled
    -m|--method pbd|cat|stdin|-                phonebook retrieval method, default: $(default_retrieval_method)
                                                * pbd --exportbook <bookid>
                                                * cat /var/flash/phonebook
                                                * stdin
                                                * - same as stdin${to_help}
EOF
}

linearize() {
	if [ -n "$1" ]; then
		$TR $'\n' "$1"
	else
		$TR -d $'\n'
	fi
}

# prerequisite: input is linearized
normalize_whitespaces() {
	$SED -r -e '{
		s,\t, ,g             # replace tabs with spaces
		s/[ ]{2,}/ /g        # replace multiple spaces with a single one (also within xml-element value, design decision)
		s, ?(<[/?]?) ?,\1,g  # remove unnecessary spaces before/after
		s, ?([/?]?>) ?,\1,g  # xml-element open/close tokens
	}'
}

# prerequisite: input is linearized && whitespace-normalized && "unnecessary var flash only elements are removed"
ensure_well_formed_xml() {
	# xml in /var/flash/phonebook is not well-formed
	$SED -r -e '{
		s,(<[?][^?>]+)(>),\1?\2,                   # xml-declaration is wrong: <? ... > instead of <? ... ?>
		s,([?]>)(<phonebook[ >]),\1<phonebooks>\2, # multiple root elements (<phonebook>), fix it by surrounding it with
		s,(</phonebook>)$,\1</phonebooks>,         # <phonebooks> (AVM uses the same xml-element in the exported version)
	}'
}

# pbd retrieval method joins multiple xml documents together without taking care of the proper xml structure (for reasons of simplicity)
# this function fixes the resulting (invalid) xml document by removing unnecessary parts
# prerequisite: input is linearized && whitespace-normalized
fix_join_effects() {
	$SED -r -e 's,</phonebooks><[?][^>]+><phonebooks>,,g'
}

# prerequisite: input is linearized && whitespace-normalized
remove_unnecessary_var_flash_only_elements() {
	$SED -r -e '{
		s,<featureflags>[^<]*</featureflags>,,g
		s,<uniqueid>[^<]*</uniqueid>(<phonebook[ >]),\1,g
	}'
}

# joins all arguments starting from the 2nd one together using the 1st character of $1 as delimiter
# i.e. converts $1 $2 $3 ... to $2${DELIMITER}$3${DELIMITER}$4${DELIMITER}... whereas DELIMITER=${1:0:1}
join() {
	(IFS="$1"; shift; echo -n "$*")
}

# creates regex or-pattern from all arguments, i.e. $1|$2|$3|...
regex_or_pattern() {
	join $'|' "$@"
}

# prerequisite: input is whitespace-normalized
xml_element_per_line() {
	# tags like <element/> are not supported yet (just not necessary as of now)
	local element_name_pattern=$(regex_or_pattern "$@")

	# put each xml-element on a separate line
	$SED -r -e 's,(.)(<('"${element_name_pattern}"')[ >]),\1\n\2,g' \
	| $SED -r -e 's,(</('"${element_name_pattern}"')>)(.),\1\n\3,g'
}

# prerequisite: input is linearized && whitespace-normalized && well-formed
pretty_print_xml() {
	# TODO: indentation?
	$SED -r -e '{
		s,>$,>\n,
		s,><,>\n<,g
	}'
}

# prerequisite: BOOK variable is set and its value is valid
filter_by_book_id_pattern() {
	local filter_pattern="<phonebook[ >]" # all pattern
	if [ "${BOOK}" != "all" ]; then
		local filter_pattern="<phonebook[^>]* owner=\"$(regex_or_pattern ${BOOK})\"[^>]*>"
		if echo ${BOOK} | $GREP -qE "(^| )0( |$)"; then
			# book with id 0 has no attributes
			filter_pattern="(${filter_pattern}|<phonebook>)"
		fi
	fi
	echo -n "${filter_pattern}"
}

# prerequisite: input is linearized && whitespace-normalized
filter_by_book_id() {
	if [ "${BOOK}" = "all" ]; then
		# don't filter anything, just pass the input through
		cat -
		return
	fi

	xml_element_per_line phonebook \
	| $SED -r -n -e '{
		/^<phonebook[ >]/ !p                   # keep all non-phonebook-lines
		/^'"$(filter_by_book_id_pattern)"'/ p  # keep all phonebook-lines with id contained in $BOOK
	}' \
	| linearize
}

# prerequisite: input is linearized && whitespace-normalized
anonymize() {
	if [ "${ANONYMIZE}" -eq 0 ]; then
		cat -
		return
	fi

	$SED -r -e '{
		s,(<(realName|id|selfId|[Ii]mage[Uu][Rr][Ll])( [^>]+)?>)[^<]+(</\2>),\1anonymized\4,g
		s,(<(number)( [^>]+)?>)[^<]+(</\2>),\1\+49-000-1234567890\4,g
		s,(<(email)( [^>]+)?>)[^<]+(</\2>),\1anonymized@server.com\4,g
		s,(<(mod_time)( [^>]+)?>)[^<]+(</\2>),\11234567890\4,g
		s,(name=")[^"]+("),\1anonymized\2,g
	}'
}

unescape_xml_entity_references() {
	# see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
	$SED -r -e '{
		s,&lt\;,<,g
		s,&gt\;,>,g
		s,&amp\;,\&,g
		s,&quot\;,",g
		s,&apos;,'"'"',g
	}'
}

# prerequisite: input is linearized && whitespace-normalized && well-formed
to_tsv() {
	# header line
	echo -ne "VIP\tContactName\tNumber\tNumberType\tQuickDialNumber\tVanityNumber"

	local encoding_related_code=
	if [ "${INCLUDE_ENCODING}" -eq 0 ]; then
		# finish header line, no encoding requested
		echo
	else
		encoding_related_code='/^<[?][^>]* encoding="[^"]*"[^>]*>$/ {s,^.* (encoding="[^"]*").*$,\t\1,p}'
	fi

	local filter_internal_contacts_pattern=
	if [ "${SHOW_INTERNAL}" -eq 0 ]; then
		# the following contacts are considered to be internal:
		#  - containing at least one number with type "intern" or "memo"
		#  - containing at least one "@hd-telefonie"-number
		#  - having the following words in their realName: Anrufbeantworter, Wecker, Rundruf
		filter_internal_contacts_pattern='/(<number[^>]+type="(intern|memo)"|<number[^>]*>[^<]*@hd-telefonie|<realName[^>]*>[^<]*(Anrufbeantworter|Wecker|Rundruf))/ !'
	fi

	xml_element_per_line phonebooks contact \
	| $SED -r -n -e '{
		'"${encoding_related_code}"'                                                        # extract encoding and keep it if requested
		/^<contact[ >]/ {                                                                   # consider only contact lines
			'"${filter_internal_contacts_pattern}"'{                                    # filter internal contacts out if necessary
				s,<(category)/>,<\1></\1>,                                          # transform <category/> to <category></category>
				s,<([Ii]mage[Uu][Rr][Ll]|id|selfId|mod_time|uniqueid)>[^<]*</\1>,,g # remove
				s,<(services|setup)[^>]*>.*</\1>,,g                                 #        irrelevant
				s,<([Ii]mage[Uu][Rr][Ll]|services|setup)/>,,g                       #                   elements
				s,</?(contact|person|telephony)[^>]*>,,g                            #                            and
				s, (prio|id)="[^"]*",,g                                             #                                attributes
				s,<number[^>]*/>,,g                                                 # remove number elements
				s,<number[^>]*></number>,,g                                         #                        with no number
				s,(type="fax)_(home|work)",\1",g                                    # replace "fax_*" with just "fax" (cosmetic only)
				p
			}
		}
	}' \
	| xml_element_per_line number \
	| $SED -r -n -e '{                                                                          # convert xml to tsv (tab separated values)
		/^[ \t]*encoding="[^"]*"$/ p                                                        # keep encoding
		/^<(category)>[^<]*<\/\1><(realName)>[^<]*<\/\2>$/ {
			s,<(category|realName)>([^<]*)<\/\1>,\2\t,g                                 # keep category & realName in the hold buffer
			h
		}
		/^<number[ >]/ {                                                                    # and add it to each phone number line
			s,<(number)([^>]*)>([^<]*)<\/\1>,\3\t\2,g                                   # extract number, keep other number attributes
			#
			/ type="[^"]*"/ !{                                                          # if type-attribute does not exist
				s,(.*[\t])([^\t]*),\1\t\2,                                          # append empty cell
			}
			/ type="[^"]*"/ {                                                           # if type-attribute exists
				s,(.*[\t])([^\t]*) type="([^"]*)"(.*),\1\3\t\2\4,                   # extract it
			}
			#
			/ quickdial="[^"]*"/ !{                                                     # if quickdial-attribute does not exist
				s,(.*[\t])([^\t]*),\1\t\2,                                          # append empty cell
			}
			/ quickdial="[^"]*"/ {                                                      # if quickdial-attribute exists
				s,(.*[\t])([^\t]*) quickdial="([^"]*)"(.*),\1\3\t\2\4,              # extract it
			}
			#
			/ vanity="[^"]*"/ !{                                                        # if vanity-attribute does not exist
				s,(.*[\t])([^\t]*),\1\t\2,                                          # append empty cell
			}
			/ vanity="[^"]*"/ {                                                         # if vanity-attribute exists
				s,(.*[\t])([^\t]*) vanity="([^"]*)"(.*),\1\3\t\2\4,                 # extract it
			}
			#
			G                                                                           # append category & realName to the pattern space by getting them from the hold buffer
			s,([^\n]*)\n([^\n]*),\2\1,                                                  # put category & realName in front of the number and its attributes
			p
		}
	}' \
	| unescape_xml_entity_references
}

tsv_to_csv() {
	$SED -r -e '{
		s,","",g                                                                            # escape double-quote sign according to RFC4180 2.7
		s,([^\t]*),"\1",g                                                                   # escape each cell with "
		s/\t/,/g                                                                            # replace tab with comma
	}'
}

determine_active_book_id() {
	local active_book_id= flash_configd="/var/flash/configd"

	if [ -e "${flash_configd}" ] && ! checkempty "${flash_configd}" 2>/dev/null; then
		# This branch works for firmware versions >= 05.5x. Firmware versions 05.2x
		# do have non-empty /var/flash/configd (so we enter the branch) but don't
		# contain <current_book> element in it.

		# TODO: it should be possible to query the active book id using something like
		# "ctlmgr_ctl r configd settings/.../current_book" (key is unknown)
		# "ctlmgr_ctl r configd settings/PBDLUA/current_book" doesn't work unfortunately
		active_book_id=$(cat "${flash_configd}" | linearize ' ' | normalize_whitespaces | $SED -r -n -e 's,.*<(current_book)>([^<]+)</\1>.*,\2,p')
	fi

	if [ -z "${active_book_id}" ]; then
		# This branch seems to work for firmware versions < 05.5x.
		active_book_id=$(ctlmgr_ctl r telcfg settings/Phonebook/Id 2>/dev/null)
	fi

	if [ -z "${active_book_id}" ]; then
		echo >&2 "Warning: failed to determine active-book-id, assuming 0"
		active_book_id="0"
	fi

	echo -n ${active_book_id}
}

running_on_fritzbox() {
	# heuristic
	uname -m | $GREP -qi "^mips"
}

default_book() {
	running_on_fritzbox && echo -n active || echo -n all
}

# default phonebook retrieval method
default_retrieval_method() {
	running_on_fritzbox && echo -n pbd || echo -n stdin
}

get_avm_config_variable() {
	local name="$1" value=""
	eval "value=\$${name}"
	if [ -n "${value}" ]; then
		echo -n "${value}"
		return 0
	fi

	local f
	for f in /etc/init.d/rc.conf /etc/init.d/rc.init; do
		[ -e "${f}" ] || continue

		value=$(cat "${f}" | $SED -r -n -e "s,^export ${name}=([\"']?)([^\"'])+\1$,\2,p")

		if [ -n "${value}" ]; then
			echo -n "${value}"
			return 0
		fi
	done

	return 1
}

# returns AVM firmware version as integer (i.e. with all dots removed)
get_avm_firmware_version() {
	# use CONFIG_VERSION if set, /etc/.version otherwise
	local v="${CONFIG_VERSION:-$(cat /etc/.version 2>/dev/null || echo 0000)}"
	echo "${v//.}"
}

check_if_fritzbox_uses_xml_based_phonebook() {
	# source: http://www.wehavemorefun.de/fritzbox/CONFIG_FONBOOK2
	[ "$(get_avm_config_variable CONFIG_FONBOOK2)" = "y" ] && return 0

	echo >&2 "Error: this box doesn't seem to use XML-based phonebook"
	exit 1
}

retrieve_book_pbd() {
	check_if_fritzbox_uses_xml_based_phonebook

	if ! type pbd >/dev/null 2>&1; then
		echo >&2 "Warning: pbd not found, falling back to the cat-method"
		retrieve_book_cat
		return $?
	fi

	local book_id book_ids="" xml_content="" cmd

	# AVM constants
	local pbd_export="/var/tmp/pbd.export"
	local online_book_ids="240 241 242 243 244 245 246 247 248 249 250 251 252 253 254"
	local internal_book_id=255

	book_ids="all_user_defined"
	# source: http://www.wehavemorefun.de/fritzbox/Online-Telefonbuch
	[ "$(get_avm_config_variable CONFIG_ONLINEPB)" = "y" ] && book_ids="${book_ids} ${online_book_ids}"
	[ "${SHOW_INTERNAL}" -ne 0 ]                           && book_ids="${book_ids} ${internal_book_id}"

	for book_id in ${book_ids}; do
		rm -f "${pbd_export}"

		[ "${book_id}" == "all_user_defined" ] && cmd="pbd --export" || cmd="pbd --exportbook ${book_id}"

		${cmd}
		if [ "$?" -ne 0 -o ! -f "${pbd_export}" ]; then
			echo >&2 "Warning: failed while doing ${cmd}, falling back to the cat-method"
			rm -f "${pbd_export}"
			retrieve_book_cat
			return $?
		fi

		xml_content=$(join $'\n' "${xml_content}" "$(cat "${pbd_export}")")
		rm -f "${pbd_export}"
	done

	echo -n "${xml_content}"
}

retrieve_book_cat() {
	check_if_fritzbox_uses_xml_based_phonebook

	local flash_phonebook="/var/flash/phonebook"
	if [ ! -e "${flash_phonebook}" ]; then
		echo >&2 "Error: \"${flash_phonebook}\" doesn't exist"
		exit 1
	fi
	if checkempty "${flash_phonebook}" 2>/dev/null; then
		echo >&2 "Error: \"${flash_phonebook}\" is empty"
		exit 1
	fi
	cat "${flash_phonebook}"
}

retrieve_book_stdin() {
	cat -
}

# parse arguments
BOOK=$(default_book)
INCLUDE_ENCODING=0
SHOW_INTERNAL=0
ANONYMIZE=0
METHOD=$(default_retrieval_method)
case "${SELF}" in
	*2tsv|*2csv|*2xml)
		TO="${SELF:$((${#SELF}-3)):3}"
		ARGS=$($GETOPT -o hb:eiam:   --long help,book:,encoding,internal,anonymize,method:     -n "${SELF}" -- "$@")
		;;
	*)
		TO="tsv"
		ARGS=$($GETOPT -o hb:eiam:t: --long help,book:,encoding,internal,anonymize,method:,to: -n "${SELF}" -- "$@")
		;;
esac
# we assume getopt call is the last command in both case-branches above
if [ "$?" -ne 0 ]; then
	usage
	exit 1
fi

eval set -- "${ARGS}"

while true; do
	case "$1" in
		-h|--help)
			usage
			exit 0
			;;
		-b|--book)
			case "$2" in
				active|all)
					BOOK="$2"
					;;
				*)
					if ! echo $2 | $GREP -qE "^[0-9]+([, ][ ]*[0-9]+)*$"; then
						echo >&2 "Error: invalid $1 parameter \"$2\""
						usage
						exit 1
					fi
					BOOK=$(echo $2 | $TR "," " ")
					;;
			esac
			shift 2
			;;
		-i|--internal)
			SHOW_INTERNAL=1
			shift
			;;
		-a|--anonymize)
			ANONYMIZE=1
			shift
			;;
		-e|--encoding)
			INCLUDE_ENCODING=1
			shift
			;;
		-m|--method)
			case "$2" in
				pbd|cat|stdin)
					METHOD="$2"
					;;
				-)
					METHOD="stdin"
					;;
				*)
					echo >&2 "Error: invalid $1 parameter \"$2\""
					usage
					exit 1
					;;
			esac
			shift 2
			;;
		-t|--to)
			case "$2" in
				tsv|csv|xml)
					TO="$2"
					;;
				*)
					echo >&2 "Error: invalid $1 parameter \"$2\""
					usage
					exit 1
					;;
			esac
			shift 2
			;;
		--)
			shift
			break
			;;
		*)
			echo >&2 "Error: internal error!"
			exit 1
			;;
	esac
done

if [ "$#" -ne 0 ]; then
	echo >&2 "Error: unexpected parameter(s) $@"
	usage
	exit 1
fi

if [ "${BOOK}" = "active" ]; then
	BOOK="$(determine_active_book_id)"
fi

# process
set -o pipefail

pb_content=$(retrieve_book_${METHOD}) # might fail, thus no direct piping
rc=$?; [ $rc -ne 0 ] && exit $rc

echo -n "${pb_content}" | $DOS2UNIX | linearize ' ' | normalize_whitespaces | fix_join_effects | remove_unnecessary_var_flash_only_elements | ensure_well_formed_xml | filter_by_book_id | anonymize | {
	case "${TO}" in
		tsv)
			to_tsv
			;;
		csv)
			to_tsv | tsv_to_csv
			;;
		xml)
			pretty_print_xml
			;;
	esac
}