ports/124770: [NEW PORT] www/jericho-html: A java library to analyse and manipulate HTML

Marcin Cieslak saper at SYSTEM.PL
Thu Jun 19 23:50:06 UTC 2008


>Number:         124770
>Category:       ports
>Synopsis:       [NEW PORT] www/jericho-html: A java library to analyse and manipulate HTML
>Confidential:   no
>Severity:       non-critical
>Priority:       high
>Responsible:    freebsd-ports-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Thu Jun 19 23:50:05 UTC 2008
>Closed-Date:
>Last-Modified:
>Originator:     Marcin Cieslak
>Release:        FreeBSD 7.0-STABLE amd64
>Organization:
>Environment:
System: FreeBSD radziecki.saper.info 7.0-STABLE FreeBSD 7.0-STABLE #5: Thu May  8 23:14:51 CEST
>Description:
Jericho HTML Parser is a simple but powerful java library allowing
analysis and manipulation of parts of an HTML document, including
some common server-side tags, while reproducing verbatim any
unrecognised or invalid HTML. 

It also provides high-level HTML form manipulation functions.

WWW:	http://jerichohtml.sourceforge.net/doc/index.html

Generated with FreeBSD Port Tools 0.77
>How-To-Repeat:
>Fix:

--- jericho-html-2.5.shar begins here ---
# This is a shell archive.  Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file".  Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
#	jericho-html
#	jericho-html/Makefile
#	jericho-html/distinfo
#	jericho-html/pkg-descr
#	jericho-html/files
#	jericho-html/files/patch-encoding
#
echo c - jericho-html
mkdir -p jericho-html > /dev/null 2>&1
echo x - jericho-html/Makefile
sed 's/^X//' >jericho-html/Makefile << 'END-of-jericho-html/Makefile'
X# New ports collection makefile for:	jerichohtml
X# Date created:		2008-06-17
X# Whom:			Marcin Cieslak <saper at SYSTEM.PL>
X#
X# $FreeBSD$
X#
X
XPORTNAME=	jericho-html
XPORTVERSION=	2.5
XCATEGORIES=	www java
XMASTER_SITES=	${MASTER_SITE_SOURCEFORGE}
XMASTER_SITE_SUBDIR=	${PORTNAME:S,-,,}
X#PKGNAMEPREFIX=
X#PKGNAMESUFFIX=
X#DISTNAME=
X#EXTRACT_SUFX=
X#DISTFILES=
X#DIST_SUBDIR=	${PORTNAME}
X#EXTRACT_ONLY=
X
XMAINTAINER=	saper at SYSTEM.PL
XCOMMENT=	A java library to analyse and manipulate HTML
X
XUSE_ZIP=	yes
XUSE_JAVA=	1.3+
X
XINTERFACES:=	"compile-time-dependencies/slf4j-api-1.4.3.jar:\
X	compile-time-dependencies/commons-logging-api-1.1.jar:\
X	compile-time-dependencies/log4j-api-1.2.14.jar"
X
XPORTDOCS=	api
XPLIST_FILES+=	%%JAVAJARDIR%%/${PORTNAME}.jar
X
Xdo-build:
X	(cd ${WRKSRC} &&  ${RM} -rf classes/* && ${JAVAC}         \
X		-classpath ${INTERFACES:S, ,,g}                   \
X		-d classes src/java/au/id/jericho/lib/html/*.java \
X		src/java/au/id/jericho/lib/html/nodoc/*.java)
X	${JAR} -cf ${WRKSRC}/lib/${PORTNAME}.jar                  \
X		 -C ${WRKSRC}/classes .
X.if !defined(NOPORTDOCS)
X	(cd ${WRKSRC} && ${RM} -rf doc/* && ${JAVADOC} -quiet     \
X		-windowtitle "Jericho HTML Parser ${PORTVERSION}" \
X		-classpath ${INTERFACES:S, ,,g}:src/java:classes  \
X		-use -d ${WRKSRC}/doc/api                         \
X		-subpackages au.id.jericho.lib.html               \
X		-exclude au.id.jericho.lib.html.nodoc             \
X		-noqualifier au.id.jericho.lib.html               \
X		-group "Core package" au.id.jericho.lib.html)
X.endif
X
Xdo-install:
X	${INSTALL_DATA} ${WRKSRC}/lib/${PORTNAME}.jar ${JAVAJARDIR}
X.if !defined(NOPORTDOCS)
X	${MKDIR} ${DOCSDIR}
X	(cd ${WRKSRC}/doc && ${FIND} api | ${CPIO} -pdmu ${DOCSDIR})
X.endif
X
X.include <bsd.port.pre.mk>
X.include <bsd.port.post.mk>
END-of-jericho-html/Makefile
echo x - jericho-html/distinfo
sed 's/^X//' >jericho-html/distinfo << 'END-of-jericho-html/distinfo'
XMD5 (jericho-html-2.5.zip) = 64306d0eb82608e50496a680b319182d
XSHA256 (jericho-html-2.5.zip) = 212b9e8b72f9787dfafd046e8716f0d04365afcd3f4d2fb293e69d5b90e456b4
XSIZE (jericho-html-2.5.zip) = 1456664
END-of-jericho-html/distinfo
echo x - jericho-html/pkg-descr
sed 's/^X//' >jericho-html/pkg-descr << 'END-of-jericho-html/pkg-descr'
XJericho HTML Parser is a simple but powerful java library allowing
Xanalysis and manipulation of parts of an HTML document, including
Xsome common server-side tags, while reproducing verbatim any
Xunrecognised or invalid HTML. 
X
XIt also provides high-level HTML form manipulation functions.
X
XWWW:	http://jerichohtml.sourceforge.net/doc/index.html
END-of-jericho-html/pkg-descr
echo c - jericho-html/files
mkdir -p jericho-html/files > /dev/null 2>&1
echo x - jericho-html/files/patch-encoding
sed 's/^X//' >jericho-html/files/patch-encoding << 'END-of-jericho-html/files/patch-encoding'
X--- src/java/au/id/jericho/lib/html/StreamEncodingDetector.java.orig	2008-06-17 21:01:53.890292905 +0200
X+++ src/java/au/id/jericho/lib/html/StreamEncodingDetector.java	2008-06-17 21:02:43.940300330 +0200
X@@ -203,9 +203,9 @@
X 		// Assume the more likely case of four 8-bit characters <= U+00FF.
X 		// Check whether it fits some common EBCDIC strings that might be found at the start of a document:
X 		if (b1==0x4C) { // first character is EBCDIC '<' (ASCII 'L'), check a couple more characters before assuming EBCDIC encoding:
X-			if (b2==0x6F && b3==0xA7 && b4==0x94) return setEncoding(EBCDIC,"default EBCDIC encoding (<?xml...> detected)"); // first four bytes are "<?xm" in EBCDIC ("Lo§”" in Windows-1252)
X-			if (b2==0x5A && b3==0xC4 && b4==0xD6) return setEncoding(EBCDIC,"default EBCDIC encoding (<!DOCTYPE...> detected)"); // first four bytes are "<!DO" in EBCDIC ("LZÄÖ" in Windows-1252)
X-			if ((b2&b3&b4&0x80)!=0) return setEncoding(EBCDIC,"default EBCDIC-compatible encoding (HTML element detected)"); // all of the 3 bytes after the '<' have the high-order bit set, indicating EBCDIC letters such as "<HTM" ("LÈãÔ" in Windows-1252), or "<htm" ("Lˆ£”" in Windows-1252)
X+			if (b2==0x6F && b3==0xA7 && b4==0x94) return setEncoding(EBCDIC,"default EBCDIC encoding (<?xml...> detected)"); // first four bytes are "<?xm" in EBCDIC 
X+			if (b2==0x5A && b3==0xC4 && b4==0xD6) return setEncoding(EBCDIC,"default EBCDIC encoding (<!DOCTYPE...> detected)"); // first four bytes are "<!DO" in EBCDIC 
X+			if ((b2&b3&b4&0x80)!=0) return setEncoding(EBCDIC,"default EBCDIC-compatible encoding (HTML element detected)"); // all of the 3 bytes after the '<' have the high-order bit set, indicating EBCDIC letters such as "<HTM" or "<htm" 
X 			// although this is not an exhaustive check for EBCDIC, it is safer to assume a more common preliminary encoding if none of these conditions are met.
X 		}
X 		// Now confident that it is not EBCDIC, but some other 8-bit encoding.
END-of-jericho-html/files/patch-encoding
exit
--- jericho-html-2.5.shar ends here ---

>Release-Note:
>Audit-Trail:
>Unformatted:



More information about the freebsd-ports-bugs mailing list