Sophie: file-5.16-1.6.mga4 src

file-5.16-1.6.mga4.src.rpm

https://bugzilla.redhat.com/show_bug.cgi?id=1098222

https://github.com/file/file/commit/758e066df72fb1ac08d2eea91ddc3973d259e991
https://github.com/file/file/commit/74cafd7de9ec99a14f4480927580e501c8f852c3
https://github.com/file/file/commit/71a8b6c0d758acb0f73e2e51421a711b5e9d6668
https://github.com/file/file/commit/69a5a43b3b71f53b0577f41264a073f495799610
https://github.com/file/file/commit/4a284c89d6ef11aca34da65da7d673050a5ea320

diff -Naurp file-5.16/doc/magic.man file-5.16.oden/doc/magic.man
--- file-5.16/doc/magic.man	2013-04-22 17:30:10.000000000 +0200
+++ file-5.16.oden/doc/magic.man	2014-07-29 11:10:18.000000000 +0200
@@ -228,13 +228,25 @@ Regular expressions can take exponential
 performance is hard to predict, so their use is discouraged.
 When used in production environments, their performance
 should be carefully checked.
-The type specification can be optionally followed by
-.Dv /[c][s] .
+The size of the string to search should also be limited by specifying
+.Dv /<length> ,
+to avoid performance issues scanning long files.
+The type specification can also be optionally followed by
+.Dv /[c][s][l] .
 The
 .Dq c
 flag makes the match case insensitive, while the
 .Dq s
 flag update the offset to the start offset of the match, rather than the end.
+The
+.Dq l
+modifier, changes the limit of length to mean number of lines instead of a
+byte count.
+Lines are delimited by the platforms native line delimiter.
+When a line count is specified, an implicit byte count also computed assuming
+each line is 80 characters long.
+If neither a byte or line count is specified, the search is limited automatically
+to 8KiB.
 The regular expression is tested against line
 .Dv N + 1
 onwards, where
@@ -409,6 +421,9 @@ is octal, and
 .Dv 0x13
 is hexadecimal.
 .Pp
+Numeric operations are not performed on date types, instead the numeric
+value is interpreted as an offset.
+.Pp
 For string values, the string from the
 file must match the specified string.
 The operators
diff -Naurp file-5.16/magic/Magdir/commands file-5.16.oden/magic/Magdir/commands
--- file-5.16/magic/Magdir/commands	2013-03-25 15:06:55.000000000 +0100
+++ file-5.16.oden/magic/Magdir/commands	2014-07-29 11:10:18.000000000 +0200
@@ -49,7 +49,7 @@
 !:mime	text/x-awk
 0	string/wt	#!\ /usr/bin/awk	awk script text executable
 !:mime	text/x-awk
-0	regex		=^\\s{0,100}BEGIN\\s{0,100}[{]	awk script text
+0	regex/4096	=^\\s{0,100}BEGIN\\s{0,100}[{]	awk script text
 
 # AT&T Bell Labs' Plan 9 shell
 0	string/wt	#!\ /bin/rc	Plan 9 rc shell script text executable
diff -Naurp file-5.16/magic/Magdir/fortran file-5.16.oden/magic/Magdir/fortran
--- file-5.16/magic/Magdir/fortran	2013-06-08 15:50:40.000000000 +0200
+++ file-5.16.oden/magic/Magdir/fortran	2014-07-29 11:10:18.000000000 +0200
@@ -2,6 +2,6 @@
 #------------------------------------------------------------------------------
 # $File: fortran,v 1.7 2012/06/21 01:55:02 christos Exp $
 # FORTRAN source
-0	regex/100	\^[Cc][\ \t]	FORTRAN program
+0	regex/100l	\^[Cc][\ \t]	FORTRAN program
 !:mime	text/x-fortran
 !:strength - 5
diff -Naurp file-5.16/magic/Magdir/graphviz file-5.16.oden/magic/Magdir/graphviz
--- file-5.16/magic/Magdir/graphviz	2009-09-19 18:28:09.000000000 +0200
+++ file-5.16.oden/magic/Magdir/graphviz	2014-07-29 11:10:18.000000000 +0200
@@ -6,7 +6,7 @@
 # FIXME: These patterns match too generally. For example, the first
 # line matches a LaTeX file containing the word "graph" (with a {
 # following later) and the second line matches this file.
-#0	regex/100	[\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{	graphviz graph text
+#0	regex/100l	[\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{	graphviz graph text
 #!:mime	text/vnd.graphviz
-#0	regex/100	[\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{	graphviz digraph text
+#0	regex/100l	[\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{	graphviz digraph text
 #!:mime	text/vnd.graphviz
diff -Naurp file-5.16/magic/Magdir/marc21 file-5.16.oden/magic/Magdir/marc21
--- file-5.16/magic/Magdir/marc21	2011-09-08 23:58:42.000000000 +0200
+++ file-5.16.oden/magic/Magdir/marc21	2014-07-29 11:10:18.000000000 +0200
@@ -12,17 +12,17 @@
 20	string	45	
 
 # leader starts with 5 digits, followed by codes specific to MARC format
->0	regex/1	(^[0-9]{5})[acdnp][^bhlnqsu-z]	MARC21 Bibliographic
+>0	regex/1l	(^[0-9]{5})[acdnp][^bhlnqsu-z]	MARC21 Bibliographic
 !:mime	application/marc
->0	regex/1	(^[0-9]{5})[acdnosx][z]	MARC21 Authority
+>0	regex/1l	(^[0-9]{5})[acdnosx][z]	MARC21 Authority
 !:mime	application/marc
->0	regex/1	(^[0-9]{5})[cdn][uvxy]	MARC21 Holdings
+>0	regex/1l	(^[0-9]{5})[cdn][uvxy]	MARC21 Holdings
 !:mime	application/marc
-0	regex/1	(^[0-9]{5})[acdn][w]	MARC21 Classification
+0	regex/1l	(^[0-9]{5})[acdn][w]	MARC21 Classification
 !:mime	application/marc
->0	regex/1	(^[0-9]{5})[cdn][q]	MARC21 Community
+>0	regex/1l	(^[0-9]{5})[cdn][q]	MARC21 Community
 !:mime	application/marc
 
 # leader position 22-23, should be "00" but is it?
->0	regex/1	(^.{21})([^0]{2})	(non-conforming)
+>0	regex/1l	(^.{21})([^0]{2})	(non-conforming)
 !:mime	application/marc
diff -Naurp file-5.16/magic/Magdir/scientific file-5.16.oden/magic/Magdir/scientific
--- file-5.16/magic/Magdir/scientific	2010-09-20 21:19:17.000000000 +0200
+++ file-5.16.oden/magic/Magdir/scientific	2014-07-29 11:10:18.000000000 +0200
@@ -91,12 +91,12 @@
 # uppercase letters. However, examples have been seen without the date string,
 # e.g., the example on the chemime site.
 0	string	HEADER\ \ \ \ 
->&0	regex/1	\^.{40}
->>&0	regex/1	[0-9]{2}-[A-Z]{3}-[0-9]{2}\ {3}
->>>&0	regex/1s	[A-Z0-9]{4}.{14}$
->>>>&0	regex/1	[A-Z0-9]{4}	Protein Data Bank data, ID Code %s
+>&0	regex/1l	\^.{40}
+>>&0	regex/1l	[0-9]{2}-[A-Z]{3}-[0-9]{2}\ {3}
+>>>&0	regex/1ls	[A-Z0-9]{4}.{14}$
+>>>>&0	regex/1l	[A-Z0-9]{4}	Protein Data Bank data, ID Code %s
 !:mime	chemical/x-pdb
->>>>0	regex/1	[0-9]{2}-[A-Z]{3}-[0-9]{2}	\b, %s
+>>>>0	regex/1l	[0-9]{2}-[A-Z]{3}-[0-9]{2}	\b, %s
 
 # Type:	GDSII Stream file
 0	belong	0x00060002	GDSII Stream file
diff -Naurp file-5.16/magic/Magdir/troff file-5.16.oden/magic/Magdir/troff
--- file-5.16/magic/Magdir/troff	2009-09-19 18:28:12.000000000 +0200
+++ file-5.16.oden/magic/Magdir/troff	2014-07-29 11:10:18.000000000 +0200
@@ -16,9 +16,9 @@
 !:mime	text/troff
 0	search/1	'''		troff or preprocessor input text
 !:mime	text/troff
-0	regex/20	\^\\.[A-Za-z0-9][A-Za-z0-9][\ \t]	troff or preprocessor input text
+0	regex/20l	\^\\.[A-Za-z0-9][A-Za-z0-9][\ \t]	troff or preprocessor input text
 !:mime	text/troff
-0	regex/20	\^\\.[A-Za-z0-9][A-Za-z0-9]$	troff or preprocessor input text
+0	regex/20l	\^\\.[A-Za-z0-9][A-Za-z0-9]$	troff or preprocessor input text
 !:mime	text/troff
 
 # ditroff intermediate output text
diff -Naurp file-5.16/src/apprentice.c file-5.16.oden/src/apprentice.c
--- file-5.16/src/apprentice.c	2013-11-19 22:01:12.000000000 +0100
+++ file-5.16.oden/src/apprentice.c	2014-07-29 11:10:18.000000000 +0200
@@ -1317,7 +1317,8 @@ string_modifier_check(struct magic_set *
 	if ((ms->flags & MAGIC_CHECK) == 0)
 		return 0;
 
-	if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
+	if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
+	    (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
 		file_magwarn(ms,
 		    "'/BHhLl' modifiers are only allowed for pascal strings\n");
 		return -1;
@@ -1810,8 +1811,13 @@ parse(struct magic_set *ms, struct magic
 					m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
 					break;
 				case CHAR_PSTRING_4_LE:
-					if (m->type != FILE_PSTRING)
+					switch (m->type) {
+					case FILE_PSTRING:
+					case FILE_REGEX:
+						break;
+					default:
 						goto bad;
+					}
 					m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
 					break;
 				case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
diff -Naurp file-5.16/src/file.h file-5.16.oden/src/file.h
--- file-5.16/src/file.h	2014-07-29 11:10:02.000000000 +0200
+++ file-5.16.oden/src/file.h	2014-07-29 11:10:18.000000000 +0200
@@ -321,6 +321,7 @@ struct magic {
 #define PSTRING_2_LE				BIT(9)
 #define PSTRING_4_BE				BIT(10)
 #define PSTRING_4_LE				BIT(11)
+#define REGEX_LINE_COUNT			BIT(11)
 #define PSTRING_LEN	\
     (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE)
 #define PSTRING_LENGTH_INCLUDES_ITSELF		BIT(12)
diff -Naurp file-5.16/src/softmagic.c file-5.16.oden/src/softmagic.c
--- file-5.16/src/softmagic.c	2014-07-29 11:10:02.000000000 +0200
+++ file-5.16.oden/src/softmagic.c	2014-07-29 11:10:46.000000000 +0200
@@ -53,7 +53,7 @@ private int32_t mprint(struct magic_set
 private int32_t moffset(struct magic_set *, struct magic *);
 private void mdebug(uint32_t, const char *, size_t);
 private int mcopy(struct magic_set *, union VALUETYPE *, int, int,
-    const unsigned char *, uint32_t, size_t, size_t);
+    const unsigned char *, uint32_t, size_t, struct magic *);
 private int mconvert(struct magic_set *, struct magic *, int);
 private int print_sep(struct magic_set *, int);
 private int handle_annotation(struct magic_set *, struct magic *);
@@ -500,7 +500,7 @@ mprint(struct magic_set *ms, struct magi
 	case FILE_BEDATE:
 	case FILE_LEDATE:
 	case FILE_MEDATE:
-		if (file_printf(ms, m->desc, file_fmttime(p->l, FILE_T_LOCAL,
+		if (file_printf(ms, m->desc, file_fmttime(p->l + m->num_mask, FILE_T_LOCAL,
 		    tbuf)) == -1)
 			return -1;
 		t = ms->offset + sizeof(uint32_t);
@@ -510,7 +510,7 @@ mprint(struct magic_set *ms, struct magi
 	case FILE_BELDATE:
 	case FILE_LELDATE:
 	case FILE_MELDATE:
-		if (file_printf(ms, m->desc, file_fmttime(p->l, 0, tbuf)) == -1)
+		if (file_printf(ms, m->desc, file_fmttime(p->l + m->num_mask, 0, tbuf)) == -1)
 			return -1;
 		t = ms->offset + sizeof(uint32_t);
 		break;
@@ -518,7 +518,7 @@ mprint(struct magic_set *ms, struct magi
 	case FILE_QDATE:
 	case FILE_BEQDATE:
 	case FILE_LEQDATE:
-		if (file_printf(ms, m->desc, file_fmttime(p->q, FILE_T_LOCAL,
+		if (file_printf(ms, m->desc, file_fmttime(p->q + m->num_mask, FILE_T_LOCAL,
 		    tbuf)) == -1)
 			return -1;
 		t = ms->offset + sizeof(uint64_t);
@@ -527,7 +527,7 @@ mprint(struct magic_set *ms, struct magi
 	case FILE_QLDATE:
 	case FILE_BEQLDATE:
 	case FILE_LEQLDATE:
-		if (file_printf(ms, m->desc, file_fmttime(p->q, 0, tbuf)) == -1)
+		if (file_printf(ms, m->desc, file_fmttime(p->q + m->num_mask, 0, tbuf)) == -1)
 			return -1;
 		t = ms->offset + sizeof(uint64_t);
 		break;
@@ -535,7 +535,7 @@ mprint(struct magic_set *ms, struct magi
 	case FILE_QWDATE:
 	case FILE_BEQWDATE:
 	case FILE_LEQWDATE:
-		if (file_printf(ms, m->desc, file_fmttime(p->q, FILE_T_WINDOWS,
+		if (file_printf(ms, m->desc, file_fmttime(p->q + m->num_mask, FILE_T_WINDOWS,
 		    tbuf)) == -1)
 			return -1;
 		t = ms->offset + sizeof(uint64_t);
@@ -871,8 +871,9 @@ private int
 mconvert(struct magic_set *ms, struct magic *m, int flip)
 {
 	union VALUETYPE *p = &ms->ms_value;
+	uint8_t type;
 
-	switch (cvt_flip(m->type, flip)) {
+	switch (type = cvt_flip(m->type, flip)) {
 	case FILE_BYTE:
 		cvt_8(p, m);
 		return 1;
@@ -924,7 +925,8 @@ mconvert(struct magic_set *ms, struct ma
 	case FILE_BELDATE:
 		p->l = (int32_t)
 		    ((p->hl[0]<<24)|(p->hl[1]<<16)|(p->hl[2]<<8)|(p->hl[3]));
-		cvt_32(p, m);
+		if (type == FILE_BELONG)
+			cvt_32(p, m);
 		return 1;
 	case FILE_BEQUAD:
 	case FILE_BEQDATE:
@@ -935,7 +937,8 @@ mconvert(struct magic_set *ms, struct ma
 		     ((uint64_t)p->hq[2]<<40)|((uint64_t)p->hq[3]<<32)|
 		     ((uint64_t)p->hq[4]<<24)|((uint64_t)p->hq[5]<<16)|
 		     ((uint64_t)p->hq[6]<<8)|((uint64_t)p->hq[7]));
-		cvt_64(p, m);
+		if (type == FILE_BEQUAD)
+			cvt_64(p, m);
 		return 1;
 	case FILE_LESHORT:
 		p->h = (short)((p->hs[1]<<8)|(p->hs[0]));
@@ -946,7 +949,8 @@ mconvert(struct magic_set *ms, struct ma
 	case FILE_LELDATE:
 		p->l = (int32_t)
 		    ((p->hl[3]<<24)|(p->hl[2]<<16)|(p->hl[1]<<8)|(p->hl[0]));
-		cvt_32(p, m);
+		if (type == FILE_LELONG)
+			cvt_32(p, m);
 		return 1;
 	case FILE_LEQUAD:
 	case FILE_LEQDATE:
@@ -957,14 +961,16 @@ mconvert(struct magic_set *ms, struct ma
 		     ((uint64_t)p->hq[5]<<40)|((uint64_t)p->hq[4]<<32)|
 		     ((uint64_t)p->hq[3]<<24)|((uint64_t)p->hq[2]<<16)|
 		     ((uint64_t)p->hq[1]<<8)|((uint64_t)p->hq[0]));
-		cvt_64(p, m);
+		if (type == FILE_LEQUAD)
+			cvt_64(p, m);
 		return 1;
 	case FILE_MELONG:
 	case FILE_MEDATE:
 	case FILE_MELDATE:
 		p->l = (int32_t)
 		    ((p->hl[1]<<24)|(p->hl[0]<<16)|(p->hl[3]<<8)|(p->hl[2]));
-		cvt_32(p, m);
+		if (type == FILE_MELONG)
+			cvt_32(p, m);
 		return 1;
 	case FILE_FLOAT:
 		cvt_float(p, m);
@@ -1021,7 +1027,7 @@ mdebug(uint32_t offset, const char *str,
 
 private int
 mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
-    const unsigned char *s, uint32_t offset, size_t nbytes, size_t linecnt)
+    const unsigned char *s, uint32_t offset, size_t nbytes, struct magic *m)
 {
 	/*
 	 * Note: FILE_SEARCH and FILE_REGEX do not actually copy
@@ -1041,15 +1047,29 @@ mcopy(struct magic_set *ms, union VALUET
 			const char *last;	/* end of search region */
 			const char *buf;	/* start of search region */
 			const char *end;
-			size_t lines;
+			size_t lines, linecnt, bytecnt;
 
 			if (s == NULL) {
 				ms->search.s_len = 0;
 				ms->search.s = NULL;
 				return 0;
 			}
+
+			if (m->str_flags & REGEX_LINE_COUNT) {
+				linecnt = m->str_range;
+				bytecnt = linecnt * 80;
+			} else {
+				linecnt = 0;
+				bytecnt = m->str_range;
+			}
+
+			if (bytecnt == 0)
+				bytecnt = 8192;
+			if (bytecnt > nbytes)
+				bytecnt = nbytes;
+
 			buf = RCAST(const char *, s) + offset;
-			end = last = RCAST(const char *, s) + nbytes;
+			end = last = RCAST(const char *, s) + bytecnt;
 			/* mget() guarantees buf <= last */
 			for (lines = linecnt, b = buf; lines && b < end &&
 			     ((b = CAST(const char *,
@@ -1062,7 +1082,7 @@ mcopy(struct magic_set *ms, union VALUET
 					b++;
 			}
 			if (lines)
-				last = RCAST(const char *, s) + nbytes;
+				last = RCAST(const char *, s) + bytecnt;
 
 			ms->search.s = buf;
 			ms->search.s_len = last - buf;
@@ -1133,7 +1153,6 @@ mget(struct magic_set *ms, const unsigne
     int *need_separator, int *returnval)
 {
 	uint32_t soffset, offset = ms->offset;
-	uint32_t count = m->str_range;
 	int rv, oneed_separator, in_type;
 	char *sbuf, *rbuf;
 	union VALUETYPE *p = &ms->ms_value;
@@ -1145,13 +1164,12 @@ mget(struct magic_set *ms, const unsigne
 	}
 
 	if (mcopy(ms, p, m->type, m->flag & INDIR, s, (uint32_t)(offset + o),
-	    (uint32_t)nbytes, count) == -1)
+	    (uint32_t)nbytes, m) == -1)
 		return -1;
 
 	if ((ms->flags & MAGIC_DEBUG) != 0) {
 		fprintf(stderr, "mget(type=%d, flag=%x, offset=%u, o=%zu, "
-		    "nbytes=%zu, count=%u)\n", m->type, m->flag, offset, o,
-		    nbytes, count);
+		    "nbytes=%zu)\n", m->type, m->flag, offset, o, nbytes);
 		mdebug(offset, (char *)(void *)p, sizeof(union VALUETYPE));
 #ifndef COMPILE_ONLY
 		file_mdump(m);
@@ -1647,7 +1665,7 @@ mget(struct magic_set *ms, const unsigne
 			if ((ms->flags & MAGIC_DEBUG) != 0)
 				fprintf(stderr, "indirect +offs=%u\n", offset);
 		}
-		if (mcopy(ms, p, m->type, 0, s, offset, nbytes, count) == -1)
+		if (mcopy(ms, p, m->type, 0, s, offset, nbytes, m) == -1)
 			return -1;
 		ms->offset = offset;
 
@@ -2003,7 +2021,8 @@ magiccheck(struct magic_set *ms, struct
 			if (slen + idx > ms->search.s_len)
 				break;
 
-			v = file_strncmp(m->value.s, ms->search.s + idx, slen, m->str_flags);
+			v = file_strncmp(m->value.s, ms->search.s + idx, slen,
+			    m->str_flags);
 			if (v == 0) {	/* found match */
 				ms->search.offset += idx;
 				break;
@@ -2031,14 +2050,17 @@ magiccheck(struct magic_set *ms, struct
 		}
 		else {
 			regmatch_t pmatch[1];
+			size_t slen = ms->search.s_len;
 #ifndef REG_STARTEND
 #define	REG_STARTEND	0
-			size_t l = ms->search.s_len - 1;
-			char c = ms->search.s[l];
-			((char *)(intptr_t)ms->search.s)[l] = '\0';
+			char c;
+			if (slen != 0)
+				slen--;
+			c = ms->search.s[slen];
+			((char *)(intptr_t)ms->search.s)[slen] = '\0';
 #else
 			pmatch[0].rm_so = 0;
-			pmatch[0].rm_eo = ms->search.s_len;
+			pmatch[0].rm_eo = slen;
 #endif
 			rc = regexec(&rx, (const char *)ms->search.s,
 			    1, pmatch, REG_STARTEND);