https://bugzilla.redhat.com/show_bug.cgi?id=1098222 https://github.com/file/file/commit/758e066df72fb1ac08d2eea91ddc3973d259e991 https://github.com/file/file/commit/74cafd7de9ec99a14f4480927580e501c8f852c3 https://github.com/file/file/commit/71a8b6c0d758acb0f73e2e51421a711b5e9d6668 https://github.com/file/file/commit/69a5a43b3b71f53b0577f41264a073f495799610 https://github.com/file/file/commit/4a284c89d6ef11aca34da65da7d673050a5ea320 diff -Naurp file-5.16/doc/magic.man file-5.16.oden/doc/magic.man --- file-5.16/doc/magic.man 2013-04-22 17:30:10.000000000 +0200 +++ file-5.16.oden/doc/magic.man 2014-07-29 11:10:18.000000000 +0200 @@ -228,13 +228,25 @@ Regular expressions can take exponential performance is hard to predict, so their use is discouraged. When used in production environments, their performance should be carefully checked. -The type specification can be optionally followed by -.Dv /[c][s] . +The size of the string to search should also be limited by specifying +.Dv /<length> , +to avoid performance issues scanning long files. +The type specification can also be optionally followed by +.Dv /[c][s][l] . The .Dq c flag makes the match case insensitive, while the .Dq s flag update the offset to the start offset of the match, rather than the end. +The +.Dq l +modifier, changes the limit of length to mean number of lines instead of a +byte count. +Lines are delimited by the platforms native line delimiter. +When a line count is specified, an implicit byte count also computed assuming +each line is 80 characters long. +If neither a byte or line count is specified, the search is limited automatically +to 8KiB. The regular expression is tested against line .Dv N + 1 onwards, where @@ -409,6 +421,9 @@ is octal, and .Dv 0x13 is hexadecimal. .Pp +Numeric operations are not performed on date types, instead the numeric +value is interpreted as an offset. +.Pp For string values, the string from the file must match the specified string. The operators diff -Naurp file-5.16/magic/Magdir/commands file-5.16.oden/magic/Magdir/commands --- file-5.16/magic/Magdir/commands 2013-03-25 15:06:55.000000000 +0100 +++ file-5.16.oden/magic/Magdir/commands 2014-07-29 11:10:18.000000000 +0200 @@ -49,7 +49,7 @@ !:mime text/x-awk 0 string/wt #!\ /usr/bin/awk awk script text executable !:mime text/x-awk -0 regex =^\\s{0,100}BEGIN\\s{0,100}[{] awk script text +0 regex/4096 =^\\s{0,100}BEGIN\\s{0,100}[{] awk script text # AT&T Bell Labs' Plan 9 shell 0 string/wt #!\ /bin/rc Plan 9 rc shell script text executable diff -Naurp file-5.16/magic/Magdir/fortran file-5.16.oden/magic/Magdir/fortran --- file-5.16/magic/Magdir/fortran 2013-06-08 15:50:40.000000000 +0200 +++ file-5.16.oden/magic/Magdir/fortran 2014-07-29 11:10:18.000000000 +0200 @@ -2,6 +2,6 @@ #------------------------------------------------------------------------------ # $File: fortran,v 1.7 2012/06/21 01:55:02 christos Exp $ # FORTRAN source -0 regex/100 \^[Cc][\ \t] FORTRAN program +0 regex/100l \^[Cc][\ \t] FORTRAN program !:mime text/x-fortran !:strength - 5 diff -Naurp file-5.16/magic/Magdir/graphviz file-5.16.oden/magic/Magdir/graphviz --- file-5.16/magic/Magdir/graphviz 2009-09-19 18:28:09.000000000 +0200 +++ file-5.16.oden/magic/Magdir/graphviz 2014-07-29 11:10:18.000000000 +0200 @@ -6,7 +6,7 @@ # FIXME: These patterns match too generally. For example, the first # line matches a LaTeX file containing the word "graph" (with a { # following later) and the second line matches this file. -#0 regex/100 [\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{ graphviz graph text +#0 regex/100l [\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{ graphviz graph text #!:mime text/vnd.graphviz -#0 regex/100 [\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{ graphviz digraph text +#0 regex/100l [\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{ graphviz digraph text #!:mime text/vnd.graphviz diff -Naurp file-5.16/magic/Magdir/marc21 file-5.16.oden/magic/Magdir/marc21 --- file-5.16/magic/Magdir/marc21 2011-09-08 23:58:42.000000000 +0200 +++ file-5.16.oden/magic/Magdir/marc21 2014-07-29 11:10:18.000000000 +0200 @@ -12,17 +12,17 @@ 20 string 45 # leader starts with 5 digits, followed by codes specific to MARC format ->0 regex/1 (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic +>0 regex/1l (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic !:mime application/marc ->0 regex/1 (^[0-9]{5})[acdnosx][z] MARC21 Authority +>0 regex/1l (^[0-9]{5})[acdnosx][z] MARC21 Authority !:mime application/marc ->0 regex/1 (^[0-9]{5})[cdn][uvxy] MARC21 Holdings +>0 regex/1l (^[0-9]{5})[cdn][uvxy] MARC21 Holdings !:mime application/marc -0 regex/1 (^[0-9]{5})[acdn][w] MARC21 Classification +0 regex/1l (^[0-9]{5})[acdn][w] MARC21 Classification !:mime application/marc ->0 regex/1 (^[0-9]{5})[cdn][q] MARC21 Community +>0 regex/1l (^[0-9]{5})[cdn][q] MARC21 Community !:mime application/marc # leader position 22-23, should be "00" but is it? ->0 regex/1 (^.{21})([^0]{2}) (non-conforming) +>0 regex/1l (^.{21})([^0]{2}) (non-conforming) !:mime application/marc diff -Naurp file-5.16/magic/Magdir/scientific file-5.16.oden/magic/Magdir/scientific --- file-5.16/magic/Magdir/scientific 2010-09-20 21:19:17.000000000 +0200 +++ file-5.16.oden/magic/Magdir/scientific 2014-07-29 11:10:18.000000000 +0200 @@ -91,12 +91,12 @@ # uppercase letters. However, examples have been seen without the date string, # e.g., the example on the chemime site. 0 string HEADER\ \ \ \ ->&0 regex/1 \^.{40} ->>&0 regex/1 [0-9]{2}-[A-Z]{3}-[0-9]{2}\ {3} ->>>&0 regex/1s [A-Z0-9]{4}.{14}$ ->>>>&0 regex/1 [A-Z0-9]{4} Protein Data Bank data, ID Code %s +>&0 regex/1l \^.{40} +>>&0 regex/1l [0-9]{2}-[A-Z]{3}-[0-9]{2}\ {3} +>>>&0 regex/1ls [A-Z0-9]{4}.{14}$ +>>>>&0 regex/1l [A-Z0-9]{4} Protein Data Bank data, ID Code %s !:mime chemical/x-pdb ->>>>0 regex/1 [0-9]{2}-[A-Z]{3}-[0-9]{2} \b, %s +>>>>0 regex/1l [0-9]{2}-[A-Z]{3}-[0-9]{2} \b, %s # Type: GDSII Stream file 0 belong 0x00060002 GDSII Stream file diff -Naurp file-5.16/magic/Magdir/troff file-5.16.oden/magic/Magdir/troff --- file-5.16/magic/Magdir/troff 2009-09-19 18:28:12.000000000 +0200 +++ file-5.16.oden/magic/Magdir/troff 2014-07-29 11:10:18.000000000 +0200 @@ -16,9 +16,9 @@ !:mime text/troff 0 search/1 ''' troff or preprocessor input text !:mime text/troff -0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text +0 regex/20l \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text !:mime text/troff -0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text +0 regex/20l \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text !:mime text/troff # ditroff intermediate output text diff -Naurp file-5.16/src/apprentice.c file-5.16.oden/src/apprentice.c --- file-5.16/src/apprentice.c 2013-11-19 22:01:12.000000000 +0100 +++ file-5.16.oden/src/apprentice.c 2014-07-29 11:10:18.000000000 +0200 @@ -1317,7 +1317,8 @@ string_modifier_check(struct magic_set * if ((ms->flags & MAGIC_CHECK) == 0) return 0; - if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) { + if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) && + (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) { file_magwarn(ms, "'/BHhLl' modifiers are only allowed for pascal strings\n"); return -1; @@ -1810,8 +1811,13 @@ parse(struct magic_set *ms, struct magic m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE; break; case CHAR_PSTRING_4_LE: - if (m->type != FILE_PSTRING) + switch (m->type) { + case FILE_PSTRING: + case FILE_REGEX: + break; + default: goto bad; + } m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE; break; case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: diff -Naurp file-5.16/src/file.h file-5.16.oden/src/file.h --- file-5.16/src/file.h 2014-07-29 11:10:02.000000000 +0200 +++ file-5.16.oden/src/file.h 2014-07-29 11:10:18.000000000 +0200 @@ -321,6 +321,7 @@ struct magic { #define PSTRING_2_LE BIT(9) #define PSTRING_4_BE BIT(10) #define PSTRING_4_LE BIT(11) +#define REGEX_LINE_COUNT BIT(11) #define PSTRING_LEN \ (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE) #define PSTRING_LENGTH_INCLUDES_ITSELF BIT(12) diff -Naurp file-5.16/src/softmagic.c file-5.16.oden/src/softmagic.c --- file-5.16/src/softmagic.c 2014-07-29 11:10:02.000000000 +0200 +++ file-5.16.oden/src/softmagic.c 2014-07-29 11:10:46.000000000 +0200 @@ -53,7 +53,7 @@ private int32_t mprint(struct magic_set private int32_t moffset(struct magic_set *, struct magic *); private void mdebug(uint32_t, const char *, size_t); private int mcopy(struct magic_set *, union VALUETYPE *, int, int, - const unsigned char *, uint32_t, size_t, size_t); + const unsigned char *, uint32_t, size_t, struct magic *); private int mconvert(struct magic_set *, struct magic *, int); private int print_sep(struct magic_set *, int); private int handle_annotation(struct magic_set *, struct magic *); @@ -500,7 +500,7 @@ mprint(struct magic_set *ms, struct magi case FILE_BEDATE: case FILE_LEDATE: case FILE_MEDATE: - if (file_printf(ms, m->desc, file_fmttime(p->l, FILE_T_LOCAL, + if (file_printf(ms, m->desc, file_fmttime(p->l + m->num_mask, FILE_T_LOCAL, tbuf)) == -1) return -1; t = ms->offset + sizeof(uint32_t); @@ -510,7 +510,7 @@ mprint(struct magic_set *ms, struct magi case FILE_BELDATE: case FILE_LELDATE: case FILE_MELDATE: - if (file_printf(ms, m->desc, file_fmttime(p->l, 0, tbuf)) == -1) + if (file_printf(ms, m->desc, file_fmttime(p->l + m->num_mask, 0, tbuf)) == -1) return -1; t = ms->offset + sizeof(uint32_t); break; @@ -518,7 +518,7 @@ mprint(struct magic_set *ms, struct magi case FILE_QDATE: case FILE_BEQDATE: case FILE_LEQDATE: - if (file_printf(ms, m->desc, file_fmttime(p->q, FILE_T_LOCAL, + if (file_printf(ms, m->desc, file_fmttime(p->q + m->num_mask, FILE_T_LOCAL, tbuf)) == -1) return -1; t = ms->offset + sizeof(uint64_t); @@ -527,7 +527,7 @@ mprint(struct magic_set *ms, struct magi case FILE_QLDATE: case FILE_BEQLDATE: case FILE_LEQLDATE: - if (file_printf(ms, m->desc, file_fmttime(p->q, 0, tbuf)) == -1) + if (file_printf(ms, m->desc, file_fmttime(p->q + m->num_mask, 0, tbuf)) == -1) return -1; t = ms->offset + sizeof(uint64_t); break; @@ -535,7 +535,7 @@ mprint(struct magic_set *ms, struct magi case FILE_QWDATE: case FILE_BEQWDATE: case FILE_LEQWDATE: - if (file_printf(ms, m->desc, file_fmttime(p->q, FILE_T_WINDOWS, + if (file_printf(ms, m->desc, file_fmttime(p->q + m->num_mask, FILE_T_WINDOWS, tbuf)) == -1) return -1; t = ms->offset + sizeof(uint64_t); @@ -871,8 +871,9 @@ private int mconvert(struct magic_set *ms, struct magic *m, int flip) { union VALUETYPE *p = &ms->ms_value; + uint8_t type; - switch (cvt_flip(m->type, flip)) { + switch (type = cvt_flip(m->type, flip)) { case FILE_BYTE: cvt_8(p, m); return 1; @@ -924,7 +925,8 @@ mconvert(struct magic_set *ms, struct ma case FILE_BELDATE: p->l = (int32_t) ((p->hl[0]<<24)|(p->hl[1]<<16)|(p->hl[2]<<8)|(p->hl[3])); - cvt_32(p, m); + if (type == FILE_BELONG) + cvt_32(p, m); return 1; case FILE_BEQUAD: case FILE_BEQDATE: @@ -935,7 +937,8 @@ mconvert(struct magic_set *ms, struct ma ((uint64_t)p->hq[2]<<40)|((uint64_t)p->hq[3]<<32)| ((uint64_t)p->hq[4]<<24)|((uint64_t)p->hq[5]<<16)| ((uint64_t)p->hq[6]<<8)|((uint64_t)p->hq[7])); - cvt_64(p, m); + if (type == FILE_BEQUAD) + cvt_64(p, m); return 1; case FILE_LESHORT: p->h = (short)((p->hs[1]<<8)|(p->hs[0])); @@ -946,7 +949,8 @@ mconvert(struct magic_set *ms, struct ma case FILE_LELDATE: p->l = (int32_t) ((p->hl[3]<<24)|(p->hl[2]<<16)|(p->hl[1]<<8)|(p->hl[0])); - cvt_32(p, m); + if (type == FILE_LELONG) + cvt_32(p, m); return 1; case FILE_LEQUAD: case FILE_LEQDATE: @@ -957,14 +961,16 @@ mconvert(struct magic_set *ms, struct ma ((uint64_t)p->hq[5]<<40)|((uint64_t)p->hq[4]<<32)| ((uint64_t)p->hq[3]<<24)|((uint64_t)p->hq[2]<<16)| ((uint64_t)p->hq[1]<<8)|((uint64_t)p->hq[0])); - cvt_64(p, m); + if (type == FILE_LEQUAD) + cvt_64(p, m); return 1; case FILE_MELONG: case FILE_MEDATE: case FILE_MELDATE: p->l = (int32_t) ((p->hl[1]<<24)|(p->hl[0]<<16)|(p->hl[3]<<8)|(p->hl[2])); - cvt_32(p, m); + if (type == FILE_MELONG) + cvt_32(p, m); return 1; case FILE_FLOAT: cvt_float(p, m); @@ -1021,7 +1027,7 @@ mdebug(uint32_t offset, const char *str, private int mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, - const unsigned char *s, uint32_t offset, size_t nbytes, size_t linecnt) + const unsigned char *s, uint32_t offset, size_t nbytes, struct magic *m) { /* * Note: FILE_SEARCH and FILE_REGEX do not actually copy @@ -1041,15 +1047,29 @@ mcopy(struct magic_set *ms, union VALUET const char *last; /* end of search region */ const char *buf; /* start of search region */ const char *end; - size_t lines; + size_t lines, linecnt, bytecnt; if (s == NULL) { ms->search.s_len = 0; ms->search.s = NULL; return 0; } + + if (m->str_flags & REGEX_LINE_COUNT) { + linecnt = m->str_range; + bytecnt = linecnt * 80; + } else { + linecnt = 0; + bytecnt = m->str_range; + } + + if (bytecnt == 0) + bytecnt = 8192; + if (bytecnt > nbytes) + bytecnt = nbytes; + buf = RCAST(const char *, s) + offset; - end = last = RCAST(const char *, s) + nbytes; + end = last = RCAST(const char *, s) + bytecnt; /* mget() guarantees buf <= last */ for (lines = linecnt, b = buf; lines && b < end && ((b = CAST(const char *, @@ -1062,7 +1082,7 @@ mcopy(struct magic_set *ms, union VALUET b++; } if (lines) - last = RCAST(const char *, s) + nbytes; + last = RCAST(const char *, s) + bytecnt; ms->search.s = buf; ms->search.s_len = last - buf; @@ -1133,7 +1153,6 @@ mget(struct magic_set *ms, const unsigne int *need_separator, int *returnval) { uint32_t soffset, offset = ms->offset; - uint32_t count = m->str_range; int rv, oneed_separator, in_type; char *sbuf, *rbuf; union VALUETYPE *p = &ms->ms_value; @@ -1145,13 +1164,12 @@ mget(struct magic_set *ms, const unsigne } if (mcopy(ms, p, m->type, m->flag & INDIR, s, (uint32_t)(offset + o), - (uint32_t)nbytes, count) == -1) + (uint32_t)nbytes, m) == -1) return -1; if ((ms->flags & MAGIC_DEBUG) != 0) { fprintf(stderr, "mget(type=%d, flag=%x, offset=%u, o=%zu, " - "nbytes=%zu, count=%u)\n", m->type, m->flag, offset, o, - nbytes, count); + "nbytes=%zu)\n", m->type, m->flag, offset, o, nbytes); mdebug(offset, (char *)(void *)p, sizeof(union VALUETYPE)); #ifndef COMPILE_ONLY file_mdump(m); @@ -1647,7 +1665,7 @@ mget(struct magic_set *ms, const unsigne if ((ms->flags & MAGIC_DEBUG) != 0) fprintf(stderr, "indirect +offs=%u\n", offset); } - if (mcopy(ms, p, m->type, 0, s, offset, nbytes, count) == -1) + if (mcopy(ms, p, m->type, 0, s, offset, nbytes, m) == -1) return -1; ms->offset = offset; @@ -2003,7 +2021,8 @@ magiccheck(struct magic_set *ms, struct if (slen + idx > ms->search.s_len) break; - v = file_strncmp(m->value.s, ms->search.s + idx, slen, m->str_flags); + v = file_strncmp(m->value.s, ms->search.s + idx, slen, + m->str_flags); if (v == 0) { /* found match */ ms->search.offset += idx; break; @@ -2031,14 +2050,17 @@ magiccheck(struct magic_set *ms, struct } else { regmatch_t pmatch[1]; + size_t slen = ms->search.s_len; #ifndef REG_STARTEND #define REG_STARTEND 0 - size_t l = ms->search.s_len - 1; - char c = ms->search.s[l]; - ((char *)(intptr_t)ms->search.s)[l] = '\0'; + char c; + if (slen != 0) + slen--; + c = ms->search.s[slen]; + ((char *)(intptr_t)ms->search.s)[slen] = '\0'; #else pmatch[0].rm_so = 0; - pmatch[0].rm_eo = ms->search.s_len; + pmatch[0].rm_eo = slen; #endif rc = regexec(&rx, (const char *)ms->search.s, 1, pmatch, REG_STARTEND);