- switch from pdftotext to pdftohtml
authorGerd von Egidy <gerd.von.egidy@intra2net.com>
Tue, 1 Feb 2011 09:35:36 +0000 (10:35 +0100)
committerGerd von Egidy <gerd.von.egidy@intra2net.com>
Tue, 1 Feb 2011 09:35:36 +0000 (10:35 +0100)
- explicit charset not needed for all converters

mnogosearch-distconfig.patch
mnogosearch.spec

index 2f72758..396313b 100644 (file)
@@ -1,6 +1,6 @@
 diff -r -u mnogosearch-3.3.11.orig/etc/indexer.conf-dist mnogosearch-3.3.11/etc/indexer.conf-dist
 --- mnogosearch-3.3.11.orig/etc/indexer.conf-dist      2011-01-31 14:02:07.000000000 +0100
-+++ mnogosearch-3.3.11/etc/indexer.conf-dist   2011-02-01 09:49:45.000000000 +0100
++++ mnogosearch-3.3.11/etc/indexer.conf-dist   2011-02-01 10:33:30.000000000 +0100
 @@ -1,4 +1,4 @@
 -#!/usr/local/mnogosearch/sbin/indexer -d
 +#!/usr/bin/indexer -d
@@ -60,7 +60,7 @@ diff -r -u mnogosearch-3.3.11.orig/etc/indexer.conf-dist mnogosearch-3.3.11/etc/
  #Mime application/x-troff-man  text/plain                 "deroff"
  #Mime text/x-postscript        text/plain                 "ps2ascii"
 -#Mime application/pdf          text/plain                 "pdftotext $1 -"
-+Mime application/pdf          "text/plain; charset=utf-8" "pdftotext $1 -"
++Mime application/pdf           text/html                  "pdftohtml -noframes -enc UTF-8 -i -stdout $1"
  #Mime application/vnd.ms-excel text/plain                 "xls2csv $1"
 -#Mime application/vnd.ms-excel text/html                  "xlhtml $1"
 +Mime application/vnd.ms-excel "text/html; charset=utf-8"  "xlhtml $1"
@@ -68,7 +68,7 @@ diff -r -u mnogosearch-3.3.11.orig/etc/indexer.conf-dist mnogosearch-3.3.11/etc/
  #Mime "text/rtf*"              text/xml                   "rtfx -w $1 2>/dev/null"
 -#Mime "text/rtf*"              text/html                  "unrtf --html $1"
 -#Mime application/vnd.ms-powerpoint "text/html; charset=utf-8" "pptohtml $1"
-+Mime "text/rtf*"              "text/html; charset=utf-8"  "unrtf --html $1"
++Mime "text/rtf*"               text/html                  "unrtf --html $1"
 +Mime application/vnd.ms-powerpoint "text/html; charset=utf-8" "ppthtml $1"
  #Mime application/vnd.ms-powerpoint text/html             "ppthtml $1"
  
index a7166d9..fe509a3 100644 (file)
@@ -36,7 +36,7 @@ Requires:       %{name}-libs = %{version}-%{release}
 Requires:       /usr/bin/wvHtml
 Requires:       /usr/bin/xlhtml
 Requires:       /usr/bin/ppthtml
-Requires:       /usr/bin/pdftotext
+Requires:       /usr/bin/pdftohtml
 Requires:       /usr/bin/unrtf
 Requires(pre):  shadow-utils
 
@@ -310,6 +310,8 @@ service httpd condrestart > /dev/null 2>&1 || :
 %changelog
 * Mon Feb 1 2011 Gerd v. Egidy <gerd@egidy.de> 3.3.11-4
 - don't use stopwords by default anymore
+- switch from pdftotext to pdftohtml
+- explicit charset not needed for all converters
 
 * Mon Jan 31 2011 Gerd v. Egidy <gerd@egidy.de> 3.3.11-3
 - include upstream-patch to make adapting the config more easy