#!/bin/bash

# pptx to markdown converter
# Revan Sopher 2014
# Creates folder pptimages and md file with unformatted text and images
# TODO don't destroy formatting

if [ $# -ne 1 ]; then
	echo "Usage $0 <file.pptx>"
	exit 1
fi

temp="/tmp/pptx2md"
output="$temp/$(basename $1).md"
output2="$temp/$(basename $1)2.md"

#extract file
unzip -oq -d $temp $1
if [ $? -ne 0 ]; then
	echo "Failed to extract."
	exit 1
fi

#replace <> with \n, images with placeholder, strip style.visibility
for filename in $(ls $temp/ppt/slides/*.xml | sort --version-sort -f); do
	cat $filename | sed -e "s/<p:cNvPr .\+ id=\"[0-9]\+\" name=\"Picture [0-9]\+\"\/>/pptx2md PICTURE GOES HERE/g" -e 's/\(<[^>]*>\)\+/\n\n/g' -e's/style.visibility\n//g' >> $output
done

#next, we replace placeholder text with an incrementing image link
imageNum=1
while read line; do
	if echo "$line" | grep -q "pptx2md PICTURE GOES HERE"; then
		echo "![image](pptimages/image${imageNum}.jpeg)" >> $output2
		imageNum=$((imageNum+1))
	else
		echo "$line" >> $output2
	fi
done < $output

#copy final files, clean up
cp -rf ${temp}/ppt/media ./pptimages/
cp $output2 .
rm -rf /tmp/pptx2md
