From 7d9899a2a9748a4a6896cd60306fcf32925de6d9 Mon Sep 17 00:00:00 2001 From: Gerd von Egidy Date: Tue, 1 Feb 2011 10:35:36 +0100 Subject: [PATCH] - switch from pdftotext to pdftohtml - explicit charset not needed for all converters --- mnogosearch-distconfig.patch | 6 +++--- mnogosearch.spec | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mnogosearch-distconfig.patch b/mnogosearch-distconfig.patch index 2f72758..396313b 100644 --- a/mnogosearch-distconfig.patch +++ b/mnogosearch-distconfig.patch @@ -1,6 +1,6 @@ diff -r -u mnogosearch-3.3.11.orig/etc/indexer.conf-dist mnogosearch-3.3.11/etc/indexer.conf-dist --- mnogosearch-3.3.11.orig/etc/indexer.conf-dist 2011-01-31 14:02:07.000000000 +0100 -+++ mnogosearch-3.3.11/etc/indexer.conf-dist 2011-02-01 09:49:45.000000000 +0100 ++++ mnogosearch-3.3.11/etc/indexer.conf-dist 2011-02-01 10:33:30.000000000 +0100 @@ -1,4 +1,4 @@ -#!/usr/local/mnogosearch/sbin/indexer -d +#!/usr/bin/indexer -d @@ -60,7 +60,7 @@ diff -r -u mnogosearch-3.3.11.orig/etc/indexer.conf-dist mnogosearch-3.3.11/etc/ #Mime application/x-troff-man text/plain "deroff" #Mime text/x-postscript text/plain "ps2ascii" -#Mime application/pdf text/plain "pdftotext $1 -" -+Mime application/pdf "text/plain; charset=utf-8" "pdftotext $1 -" ++Mime application/pdf text/html "pdftohtml -noframes -enc UTF-8 -i -stdout $1" #Mime application/vnd.ms-excel text/plain "xls2csv $1" -#Mime application/vnd.ms-excel text/html "xlhtml $1" +Mime application/vnd.ms-excel "text/html; charset=utf-8" "xlhtml $1" @@ -68,7 +68,7 @@ diff -r -u mnogosearch-3.3.11.orig/etc/indexer.conf-dist mnogosearch-3.3.11/etc/ #Mime "text/rtf*" text/xml "rtfx -w $1 2>/dev/null" -#Mime "text/rtf*" text/html "unrtf --html $1" -#Mime application/vnd.ms-powerpoint "text/html; charset=utf-8" "pptohtml $1" -+Mime "text/rtf*" "text/html; charset=utf-8" "unrtf --html $1" ++Mime "text/rtf*" text/html "unrtf --html $1" +Mime application/vnd.ms-powerpoint "text/html; charset=utf-8" "ppthtml $1" #Mime application/vnd.ms-powerpoint text/html "ppthtml $1" diff --git a/mnogosearch.spec b/mnogosearch.spec index a7166d9..fe509a3 100644 --- a/mnogosearch.spec +++ b/mnogosearch.spec @@ -36,7 +36,7 @@ Requires: %{name}-libs = %{version}-%{release} Requires: /usr/bin/wvHtml Requires: /usr/bin/xlhtml Requires: /usr/bin/ppthtml -Requires: /usr/bin/pdftotext +Requires: /usr/bin/pdftohtml Requires: /usr/bin/unrtf Requires(pre): shadow-utils @@ -310,6 +310,8 @@ service httpd condrestart > /dev/null 2>&1 || : %changelog * Mon Feb 1 2011 Gerd v. Egidy 3.3.11-4 - don't use stopwords by default anymore +- switch from pdftotext to pdftohtml +- explicit charset not needed for all converters * Mon Jan 31 2011 Gerd v. Egidy 3.3.11-3 - include upstream-patch to make adapting the config more easy -- 1.7.1