From bffbaabfbb139ef1596d8b20f0275293cecd9297 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 30 Jul 2014 08:35:16 +0200 Subject: libclamav: use libmspack This patch provides support for upstream / external libmspack version libmspack 0.4 (current). The old in-tree version of libmspack is removed while no loner used. BTS: #675558 clamav: https://bugzilla.clamav.net/show_bug.cgi?id=11062 Signed-off-by: Sebastian Andrzej Siewior --- configure.ac | 3 + libclamav/Makefile.am | 11 +- libclamav/cab.c | 684 ----------------- libclamav/cab.h | 81 -- libclamav/chmunpack.h | 122 --- libclamav/libmspack.c | 525 +++++++++++++ libclamav/libmspack.h | 7 + libclamav/mspack.c | 2026 ------------------------------------------------- libclamav/mspack.h | 294 ------- libclamav/scanners.c | 146 +--- 10 files changed, 541 insertions(+), 3358 deletions(-) delete mode 100644 libclamav/cab.c delete mode 100644 libclamav/cab.h delete mode 100644 libclamav/chmunpack.h create mode 100644 libclamav/libmspack.c create mode 100644 libclamav/libmspack.h delete mode 100644 libclamav/mspack.c delete mode 100644 libclamav/mspack.h diff --git a/configure.ac b/configure.ac index e230f939c2d3..41043753fc92 100644 --- a/configure.ac +++ b/configure.ac @@ -174,6 +174,9 @@ if test "$enable_llvm" = "yes" && test "$subdirfailed" != "no"; then fi AM_CONDITIONAL([ENABLE_LLVM], [test "$subdirfailed" != "yes" && test "$enable_llvm" != "no"]) + +PKG_CHECK_MODULES([LIBMSPACK], [libmspack]) + no_recursion="yes"; AC_OUTPUT([libclamav/Makefile]) diff --git a/libclamav/Makefile.am b/libclamav/Makefile.am index 1aab51bd6ccc..538e83dcdd03 100644 --- a/libclamav/Makefile.am +++ b/libclamav/Makefile.am @@ -147,6 +147,9 @@ if VERSIONSCRIPT libclamav_la_LDFLAGS += -Wl,@VERSIONSCRIPTFLAG@,@top_srcdir@/libclamav/libclamav.map endif +libclamav_la_CFLAGS += $(LIBMSPACK_CFLAGS) +libclamav_la_LDFLAGS += $(LIBMSPACK_LIBS) + include_HEADERS = clamav.h libclamav_la_SOURCES = \ @@ -204,8 +207,8 @@ libclamav_la_SOURCES = \ upx.h \ htmlnorm.c \ htmlnorm.h \ - chmunpack.c \ - chmunpack.h \ + libmspack.c \ + libmspack.h \ rebuildpe.c \ rebuildpe.h \ petite.c \ @@ -283,10 +286,6 @@ libclamav_la_SOURCES = \ regex_list.h \ regex_suffix.c \ regex_suffix.h \ - mspack.c \ - mspack.h \ - cab.c \ - cab.h \ entconv.c \ entconv.h \ entitylist.h \ diff --git a/libclamav/cab.c b/libclamav/cab.c deleted file mode 100644 index 6d2eade7d4ea..000000000000 --- a/libclamav/cab.c +++ /dev/null @@ -1,684 +0,0 @@ -/* - * Copyright (C) 2007-2008 Sourcefire, Inc. - * - * Authors: Tomasz Kojm - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ - -#if HAVE_CONFIG_H -#include "clamav-config.h" -#endif - -#include -#include -#include -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include - -#include "clamav.h" -#include "cltypes.h" -#include "others.h" -#include "mspack.h" -#include "cab.h" - -#define EC32(x) cli_readint32(&x) /* Convert little endian to host */ -#define EC16(x) cli_readint16(&x) - -/* hard limits */ -#define CAB_FOLDER_LIMIT 5000 -#define CAB_FILE_LIMIT 5000 - -/* Cabinet format data structures */ - -struct cab_hdr { - uint32_t signature; /* file signature */ - uint32_t res1; /* reserved */ - uint32_t cbCabinet; /* size of cabinet file */ - uint32_t res2; /* reserved */ - uint32_t coffFiles; /* offset of the first file entry */ - uint32_t res3; /* reserved */ - uint8_t versionMinor; /* file format version, minor */ - uint8_t versionMajor; /* file format version, major */ - uint16_t cFolders; /* number of folder entries */ - uint16_t cFiles; /* number of file entries */ - uint16_t flags; /* option flags */ - uint16_t setID; /* multiple cabs related */ - uint16_t iCabinet; /* multiple cabs related */ -}; - -struct cab_hdr_opt { - uint16_t cbCFHeader; /* size of reserved header area */ - uint8_t cbCFFolder; /* size of reserved folder area */ - uint8_t cbCFData; /* size of reserved block area */ -}; - -struct cab_folder_hdr -{ - uint32_t coffCabStart; /* offset of the first data block */ - uint16_t cCFData; /* number of data blocks */ - uint16_t typeCompress; /* compression type */ -}; - -struct cab_file_hdr -{ - uint32_t cbFile; /* uncompressed size */ - uint32_t uoffFolderStart; /* uncompressed offset of file in folder */ - uint16_t iFolder; /* folder index */ - uint16_t date; /* date stamp */ - uint16_t time; /* time stamp */ - uint16_t attribs; /* attribute flags */ -}; - -struct cab_block_hdr -{ - uint32_t csum; /* data block checksum */ - uint16_t cbData; /* number of compressed bytes */ - uint16_t cbUncomp; /* number of uncompressed bytes */ -}; - -static char *cab_readstr(fmap_t *map, off_t *offset, int *ret) -{ - int i; - const char *str; - char *retstr; - - if(!(str = fmap_need_offstr(map, *offset, 256))) { - *ret = CL_EFORMAT; - return NULL; - } - - i = strlen(str) + 1; - if(i>=255) { - fmap_unneed_ptr(map, str, i); - *ret = CL_EFORMAT; - return NULL; - } - - *offset += i; - if((retstr = cli_malloc(i))) - memcpy(retstr, str, i); - fmap_unneed_ptr(map, str, i); - - if(!retstr) { - *ret = CL_EMEM; - return NULL; - } - - *ret = CL_SUCCESS; - return retstr; -} - -static int cab_chkname(char *name, int san) -{ - size_t i, len = strlen(name); - - - for(i = 0; i < len; i++) { - if(!san && (strchr("%/*?|\\\"+=<>;:\t ", name[i]) || !isascii(name[i]))) { - cli_dbgmsg("cab_chkname: File name contains disallowed characters\n"); - return 1; - } else if(san && !isalnum(name[i])) { - name[i] = '*'; - } - } - - return 0; -} - -void cab_free(struct cab_archive *cab) -{ - struct cab_folder *folder; - struct cab_file *file; - - - if(cab->state) { - if(cab->state->stream) { - switch(cab->state->cmethod & 0x000f) { - case 0x0001: - mszip_free(cab->state->stream); - break; - case 0x0002: - qtm_free(cab->state->stream); - break; - case 0x0003: - lzx_free(cab->state->stream); - } - } - free(cab->state); - } - - while(cab->folders) { - folder = cab->folders; - cab->folders = cab->folders->next; - free(folder); - } - - while(cab->files) { - file = cab->files; - cab->files = cab->files->next; - free(file->name); - free(file); - } -} - -int cab_open(fmap_t *map, off_t offset, struct cab_archive *cab) -{ - unsigned int i, folders = 0; - struct cab_file *file, *lfile = NULL; - struct cab_folder *folder, *lfolder = NULL; - const struct cab_hdr *hdr; - const struct cab_hdr_opt *hdr_opt; - uint16_t fidx; - uint32_t coffFiles; - char *pt; - int ret; - off_t resfold = 0, rsize, cur_offset = offset; - - if(!(hdr=fmap_need_off_once(map, cur_offset, sizeof(*hdr)))) { - cli_dbgmsg("cab_open: Can't read cabinet header\n"); - return CL_EFORMAT; /* most likely a corrupted file */ - } - cur_offset += sizeof(*hdr); - - if(EC32(hdr->signature) != 0x4643534d) { - cli_dbgmsg("cab_open: Incorrect CAB signature\n"); - return CL_EFORMAT; - } else { - cli_dbgmsg("CAB: -------------- Cabinet file ----------------\n"); - } - - rsize = map->len; - - memset(cab, 0, sizeof(struct cab_archive)); - - cab->length = EC32(hdr->cbCabinet); - cli_dbgmsg("CAB: Cabinet length: %u\n", cab->length); - if((off_t) cab->length > rsize) { - cli_dbgmsg("CAB: Truncating file size from %lu to %lu\n", (unsigned long int) cab->length, (unsigned long int) rsize); - cab->length = (uint32_t) rsize; - } - - cab->nfolders = EC16(hdr->cFolders); - if(!cab->nfolders) { - cli_dbgmsg("cab_open: No folders in cabinet (fake cab?)\n"); - return CL_EFORMAT; - } else { - cli_dbgmsg("CAB: Folders: %u\n", cab->nfolders); - if(cab->nfolders > CAB_FOLDER_LIMIT) { - cab->nfolders = CAB_FOLDER_LIMIT; - cli_dbgmsg("CAB: *** Number of folders limited to %u ***\n", cab->nfolders); - } - } - - cab->nfiles = EC16(hdr->cFiles); - if(!cab->nfiles) { - cli_dbgmsg("cab_open: No files in cabinet (fake cab?)\n"); - return CL_EFORMAT; - } else { - cli_dbgmsg("CAB: Files: %u\n", cab->nfiles); - if(cab->nfiles > CAB_FILE_LIMIT) { - cab->nfiles = CAB_FILE_LIMIT; - cli_dbgmsg("CAB: *** Number of files limited to %u ***\n", cab->nfiles); - } - } - - cli_dbgmsg("CAB: File format version: %u.%u\n", hdr->versionMajor, hdr->versionMinor); - - cab->flags = EC16(hdr->flags); - coffFiles = EC16(hdr->coffFiles); - - if(cab->flags & 0x0004) { - if(!(hdr_opt = fmap_need_off_once(map, cur_offset, sizeof(*hdr_opt)))) { - cli_dbgmsg("cab_open: Can't read file header (fake cab?)\n"); - return CL_EFORMAT; /* most likely a corrupted file */ - } - - cab->reshdr = EC16(hdr_opt->cbCFHeader); - resfold = hdr_opt->cbCFFolder; - cab->resdata = hdr_opt->cbCFData; - - cur_offset += sizeof(*hdr_opt) + cab->reshdr; - if(cab->reshdr) { - if(cab->reshdr >= rsize) { - cli_dbgmsg("cab_open: Can't lseek to %u (fake cab?)\n", cab->reshdr); - return CL_EFORMAT; /* most likely a corrupted file */ - } - } - } - - if(cab->flags & 0x0001) { /* preceding cabinet */ - /* name */ - pt = cab_readstr(map, &cur_offset, &ret); - if(ret) - return ret; - if(cab_chkname(pt, 0)) - cli_dbgmsg("CAB: Invalid name of preceding cabinet\n"); - else - cli_dbgmsg("CAB: Preceding cabinet name: %s\n", pt); - free(pt); - /* info */ - pt = cab_readstr(map, &cur_offset, &ret); - if(ret) - return ret; - if(cab_chkname(pt, 0)) - cli_dbgmsg("CAB: Invalid info for preceding cabinet\n"); - else - cli_dbgmsg("CAB: Preceding cabinet info: %s\n", pt); - free(pt); - } - - if(cab->flags & 0x0002) { /* next cabinet */ - /* name */ - pt = cab_readstr(map, &cur_offset, &ret); - if(ret) - return ret; - if(cab_chkname(pt, 0)) - cli_dbgmsg("CAB: Invalid name of next cabinet\n"); - else - cli_dbgmsg("CAB: Next cabinet name: %s\n", pt); - free(pt); - /* info */ - pt = cab_readstr(map, &cur_offset, &ret); - if(ret) - return ret; - if(cab_chkname(pt, 0)) - cli_dbgmsg("CAB: Invalid info for next cabinet\n"); - else - cli_dbgmsg("CAB: Next cabinet info: %s\n", pt); - free(pt); - } - - /* folders */ - for(i = 0; i < cab->nfolders; i++) { - const struct cab_folder_hdr *folder_hdr; - - if(!(folder_hdr = fmap_need_off_once(map, cur_offset, sizeof(*folder_hdr)))) { - cli_dbgmsg("cab_open: Can't read header for folder %u\n", i); - break; - } - - cur_offset += sizeof(*folder_hdr) + resfold; - - if(EC32(folder_hdr->coffCabStart) + offset > rsize) { - cli_dbgmsg("CAB: Folder out of file\n"); - continue; - } - - if((EC16(folder_hdr->typeCompress) & 0x000f) > 3) { - cli_dbgmsg("CAB: Unknown compression method\n"); - continue; - } - - folder = (struct cab_folder *) cli_calloc(1, sizeof(struct cab_folder)); - if(!folder) { - cli_errmsg("cab_open: Can't allocate memory for folder\n"); - cab_free(cab); - return CL_EMEM; - } - - folder->cab = (struct cab_archive *) cab; - folder->offset = (off_t) EC32(folder_hdr->coffCabStart) + offset; - folder->nblocks = EC16(folder_hdr->cCFData); - folder->cmethod = EC16(folder_hdr->typeCompress); - - cli_dbgmsg("CAB: Folder record %u\n", i); - cli_dbgmsg("CAB: Folder offset: %u\n", (unsigned int) folder->offset); - cli_dbgmsg("CAB: Folder compression method: %d\n", folder->cmethod); - - if(!lfolder) - cab->folders = folder; - else - lfolder->next = folder; - - lfolder = folder; - folders++; - } - cli_dbgmsg("CAB: Recorded folders: %u\n", folders); - - /* files */ - if(cab->nfolders != folders) { - if(coffFiles >= rsize) { - cli_dbgmsg("cab_open: Can't lseek to hdr.coffFiles\n"); - cab_free(cab); - return CL_EFORMAT; - } - cur_offset = coffFiles; - } - for(i = 0; i < cab->nfiles; i++) { - const struct cab_file_hdr *file_hdr; - - if(!(file_hdr = fmap_need_off_once(map, cur_offset, sizeof(*file_hdr)))) { - cli_dbgmsg("cab_open: Can't read file %u header\n", i); - break; - } - cur_offset += sizeof(*file_hdr); - - file = (struct cab_file *) cli_calloc(1, sizeof(struct cab_file)); - if(!file) { - cli_errmsg("cab_open: Can't allocate memory for file\n"); - cab_free(cab); - return CL_EMEM; - } - - file->cab = cab; - cab->map = map; - file->offset = EC32(file_hdr->uoffFolderStart); - file->length = EC32(file_hdr->cbFile); - file->attribs = EC16(file_hdr->attribs); - fidx = EC16(file_hdr->iFolder); - file->error = CL_SUCCESS; - - file->name = cab_readstr(map, &cur_offset, &ret); - if(ret) { - free(file); - continue; - } - cab_chkname(file->name, 1); - - cli_dbgmsg("CAB: File record %u\n", i); - cli_dbgmsg("CAB: File name: %s\n", file->name); - cli_dbgmsg("CAB: File offset: %u\n", (unsigned int) file->offset); - cli_dbgmsg("CAB: File folder index: %u\n", fidx); - cli_dbgmsg("CAB: File attribs: 0x%x\n", file->attribs); - if(file->attribs & 0x01) - cli_dbgmsg("CAB: * file is read-only\n"); - if(file->attribs & 0x02) - cli_dbgmsg("CAB: * file is hidden\n"); - if(file->attribs & 0x04) - cli_dbgmsg("CAB: * file is a system file\n"); - if(file->attribs & 0x20) - cli_dbgmsg("CAB: * file modified since last backup\n"); - if(file->attribs & 0x40) - cli_dbgmsg("CAB: * file to be run after extraction\n"); - if(file->attribs & 0x80) - cli_dbgmsg("CAB: * file name contains UTF\n"); - - /* folder index */ - if(fidx < 0xfffd) { - if(fidx > cab->nfolders) { - cli_dbgmsg("cab_open: File %s is not associated with any folder\n", file->name); - free(file->name); - free(file); - continue; - } - - file->folder = cab->folders; - while(file->folder && fidx--) - file->folder = file->folder->next; - - if(!file->folder) { - cli_dbgmsg("cab_open: Folder not found for file %s\n", file->name); - free(file->name); - free(file); - continue; - } - - } else { - cli_dbgmsg("CAB: File is split *skipping*\n"); - free(file->name); - free(file); - continue; - } - - if(!lfile) - cab->files = file; - else - lfile->next = file; - - lfile = file; - - } - - return CL_SUCCESS; -} - -static int cab_read_block(struct cab_file *file) -{ - const struct cab_block_hdr *block_hdr; - struct cab_state *state = file->cab->state; - - if(!(block_hdr = fmap_need_off_once(file->cab->map, file->cab->cur_offset, sizeof(*block_hdr)))) { - cli_dbgmsg("cab_read_block: Can't read block header\n"); - return CL_EFORMAT; /* most likely a corrupted file */ - } - - file->cab->cur_offset += sizeof(*block_hdr) + file->cab->resdata; - state->blklen = EC16(block_hdr->cbData); - state->outlen = EC16(block_hdr->cbUncomp); - - if(fmap_readn(file->cab->map, state->block, file->cab->cur_offset, state->blklen) != state->blklen) { - cli_dbgmsg("cab_read_block: Can't read block data\n"); - return CL_EFORMAT; /* most likely a corrupted file */ - } - - file->cab->cur_offset += state->blklen; - state->pt = state->end = state->block; - state->end += state->blklen; - - return CL_SUCCESS; -} - -static int cab_read(struct cab_file *file, unsigned char *buffer, int bytes) -{ - uint16_t todo, left; - - - if((file->cab->state->blknum > file->folder->nblocks) && !file->lread) { - file->error = CL_BREAK; - return -1; - } - - todo = bytes; - while(todo > 0) { - left = file->cab->state->end - file->cab->state->pt; - - if(left) { - if(left > todo) - left = todo; - - memcpy(buffer, file->cab->state->pt, left); - file->cab->state->pt += left; - buffer += left; - todo -= left; - - } else { - if(file->cab->state->blknum++ >= file->folder->nblocks) - break; - - file->error = cab_read_block(file); - if(file->error) - return -1; - - if((file->folder->cmethod & 0x000f) == 0x0002) /* Quantum hack */ - *file->cab->state->end++ = 0xff; - - if(file->cab->state->blknum >= file->folder->nblocks) { - if((file->folder->cmethod & 0x000f) == 0x0003) { /* LZX hack */ - lzx_set_output_length(file->cab->state->stream, (off_t) ((file->cab->state->blknum - 1) * 32768 + file->cab->state->outlen)); - } - } else { - if(file->cab->state->outlen != 32768) { - cli_dbgmsg("cab_read: WARNING: partial data block\n"); - } - } - } - } - - return file->lread = bytes - todo; -} - -static int cab_unstore(struct cab_file *file) -{ - int todo, bread, bytes = file->length; - unsigned char buff[4096]; - - - if(bytes < 0) { - cli_dbgmsg("cab_unstore: bytes < 0\n"); - return CL_EFORMAT; - } - - todo = MIN((unsigned int) bytes, file->max_size); - - while(1) { - - if((unsigned int) todo <= sizeof(buff)) - bread = todo; - else - bread = sizeof(buff); - - if((bread = cab_read(file, buff, bread)) == -1) { - cli_dbgmsg("cab_unstore: cab_read failed\n"); - return file->error; - } else if(cli_writen(file->ofd, buff, bread) != bread) { - cli_warnmsg("cab_unstore: Can't write %d bytes to descriptor %d\n", bread, file->ofd); - return CL_EWRITE; - } - - todo -= bread; - - if(!bread || todo <= 0) - break; - } - - return CL_SUCCESS; -} - -#define CAB_CHGFOLDER \ - if(!file->cab->actfol || (file->folder != file->cab->actfol) \ - || (file->cab->state && file->cab->state->cmethod != file->folder->cmethod)) { \ - if(file->cab->state) { \ - if(file->cab->state->stream) { \ - switch(file->cab->state->cmethod & 0x000f) { \ - case 0x0001: \ - mszip_free(file->cab->state->stream); \ - break; \ - case 0x0002: \ - qtm_free(file->cab->state->stream); \ - break; \ - case 0x0003: \ - lzx_free(file->cab->state->stream); \ - } \ - } \ - free(file->cab->state); \ - file->cab->state = NULL; \ - } \ - file->cab->cur_offset = file->folder->offset; \ - file->cab->state = (struct cab_state *) cli_calloc(1, sizeof(struct cab_state)); \ - if(!file->cab->state) { \ - cli_errmsg("cab_extract: Can't allocate memory for internal state\n"); \ - close(file->ofd); \ - return CL_EMEM; \ - } \ - file->cab->state->cmethod = file->folder->cmethod; \ - switch(file->folder->cmethod & 0x000f) { \ - case 0x0001: \ - file->cab->state->stream = (struct mszip_stream *) mszip_init(file->ofd, 4096, 1, file, &cab_read); \ - break; \ - case 0x0002: \ - file->cab->state->stream = (struct qtm_stream *) qtm_init(file->ofd, (int) (file->folder->cmethod >> 8) & 0x1f, 4096, file, &cab_read); \ - break; \ - case 0x0003: \ - file->cab->state->stream = (struct lzx_stream *) lzx_init(file->ofd, (int) (file->folder->cmethod >> 8) & 0x1f, 0, 4096, 0, file, &cab_read); \ - } \ - if((file->folder->cmethod & 0x000f) && !file->cab->state->stream) { \ - close(file->ofd); \ - return CL_EUNPACK; \ - } \ - file->cab->actfol = file->folder; \ - } else { \ - if(file->cab->state && file->cab->state->stream) { \ - switch(file->cab->state->cmethod & 0x000f) { \ - case 0x0001: \ - ((struct mszip_stream *) file->cab->state->stream)->ofd = file->ofd; \ - break; \ - case 0x0002: \ - ((struct qtm_stream *) file->cab->state->stream)->ofd = file->ofd; \ - break; \ - case 0x0003: \ - ((struct lzx_stream *) file->cab->state->stream)->ofd = file->ofd; \ - break; \ - } \ - } \ - } - - -int cab_extract(struct cab_file *file, const char *name) -{ - int ret; - - - if(!file || !name) { - cli_errmsg("cab_extract: !file || !name\n"); - return CL_ENULLARG; - } - - if(!file->folder) { - cli_errmsg("cab_extract: file->folder == NULL\n"); - return CL_ENULLARG; - } - - file->ofd = open(name, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU); - if(file->ofd == -1) { - cli_errmsg("cab_extract: Can't open file %s in write mode\n", name); - return CL_ECREAT; - } - - switch(file->folder->cmethod & 0x000f) { - case 0x0000: /* STORE */ - cli_dbgmsg("CAB: Compression method: STORED\n"); - CAB_CHGFOLDER; - if(file->length > file->cab->length) { - cli_dbgmsg("cab_extract: Stored file larger than archive itself, trimming down\n"); - file->length = file->cab->length; - } - ret = cab_unstore(file); - break; - - case 0x0001: /* MSZIP */ - cli_dbgmsg("CAB: Compression method: MSZIP\n"); - CAB_CHGFOLDER; - ret = mszip_decompress(file->cab->state->stream, file->length); - break; - - case 0x0002: /* QUANTUM */ - cli_dbgmsg("CAB: Compression method: QUANTUM\n"); - CAB_CHGFOLDER; - ret = qtm_decompress(file->cab->state->stream, file->length); - break; - - case 0x0003: /* LZX */ - cli_dbgmsg("CAB: Compression method: LZX\n"); - CAB_CHGFOLDER; - ret = lzx_decompress(file->cab->state->stream, file->length); - break; - - default: - cli_dbgmsg("CAB: Not supported compression method: 0x%x\n", file->folder->cmethod & 0x000f); - ret = CL_EFORMAT; - } - - close(file->ofd); - - if(ret == CL_BREAK) - ret = CL_SUCCESS; - - return ret; -} diff --git a/libclamav/cab.h b/libclamav/cab.h deleted file mode 100644 index 795900d555e9..000000000000 --- a/libclamav/cab.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (C) 2007-2008 Sourcefire, Inc. - * - * Authors: Tomasz Kojm - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ - -#ifndef __CAB_H -#define __CAB_H - -#include -#include "cltypes.h" -#include "fmap.h" - -#define CAB_BLOCKMAX 65535 -#define CAB_INPUTMAX (CAB_BLOCKMAX + 6144) - -struct cab_archive { - struct cab_folder *folders, *actfol; - struct cab_file *files; - struct cab_state *state; - fmap_t *map; - off_t cur_offset; - uint32_t length; - uint16_t nfolders; - uint16_t nfiles; - uint16_t flags; - uint16_t reshdr; - uint8_t resdata; -}; - -struct cab_state { - unsigned char *pt, *end; - void *stream; - unsigned char block[CAB_INPUTMAX]; - uint16_t blklen; - uint16_t outlen; - uint16_t blknum; - uint16_t cmethod; -}; - -struct cab_file { - off_t offset; - char *name; - uint32_t length; - int error; - int lread; - int ofd; - struct cab_folder *folder; - struct cab_file *next; - struct cab_archive *cab; - uint16_t attribs; - uint64_t max_size, written_size; -}; - -struct cab_folder { - struct cab_archive *cab; - off_t offset; - struct cab_folder *next; - uint16_t cmethod; - uint16_t nblocks; -}; - -int cab_open(fmap_t *map, off_t offset, struct cab_archive *cab); -int cab_extract(struct cab_file *file, const char *name); -void cab_free(struct cab_archive *cab); - -#endif diff --git a/libclamav/chmunpack.h b/libclamav/chmunpack.h deleted file mode 100644 index 7864386b921f..000000000000 --- a/libclamav/chmunpack.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Extract component parts of MS CHM files - * - * Copyright (C) 2007-2008 Sourcefire, Inc. - * - * Authors: Trog - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ - -#ifndef __CHM_UNPACK_H -#define __CHM_UNPACK_H - -#if HAVE_CONFIG_H -#include "clamav-config.h" -#endif - -#include "cltypes.h" -#include "others.h" -#include "fmap.h" - -#ifndef HAVE_ATTRIB_PACKED -#define __attribute__(x) -#endif - -#ifdef HAVE_PRAGMA_PACK -#pragma pack(1) -#endif - -#ifdef HAVE_PRAGMA_PACK_HPPA -#pragma pack 1 -#endif - -#define CHM_ITSF_MIN_LEN (0x60) -typedef struct chm_itsf_header_tag -{ - unsigned char signature[4]; - int32_t version __attribute__ ((packed)); - int32_t header_len __attribute__ ((packed)); - uint32_t unknown __attribute__ ((packed)); - uint32_t last_modified __attribute__ ((packed)); - uint32_t lang_id __attribute__ ((packed)); - unsigned char dir_clsid[16]; - unsigned char stream_clsid[16]; - uint64_t sec0_offset __attribute__ ((packed)); - uint64_t sec0_len __attribute__ ((packed)); - uint64_t dir_offset __attribute__ ((packed)); - uint64_t dir_len __attribute__ ((packed)); - uint64_t data_offset __attribute__ ((packed)); -} chm_itsf_header_t; - -#define CHM_ITSP_LEN (0x54) -typedef struct chm_itsp_header_tag -{ - unsigned char signature[4]; - int32_t version __attribute__ ((packed)); - int32_t header_len __attribute__ ((packed)); - int32_t unknown1 __attribute__ ((packed)); - uint32_t block_len __attribute__ ((packed)); - int32_t blockidx_intvl __attribute__ ((packed)); - int32_t index_depth __attribute__ ((packed)); - int32_t index_root __attribute__ ((packed)); - int32_t index_head __attribute__ ((packed)); - int32_t index_tail __attribute__ ((packed)); - int32_t unknown2 __attribute__ ((packed)); - uint32_t num_blocks __attribute__ ((packed)); - uint32_t lang_id __attribute__ ((packed)); - unsigned char system_clsid[16]; - unsigned char unknown4[16]; -} chm_itsp_header_t; - -#ifdef HAVE_PRAGMA_PACK -#pragma pack() -#endif - -#ifdef HAVE_PRAGMA_PACK_HPPA -#pragma pack -#endif - -typedef struct chm_sys_entry_tag -{ - uint64_t offset; - uint64_t length; -} chm_sys_entry_t; - -typedef struct chm_metadata_tag { - uint64_t file_length; - uint64_t file_offset; - chm_sys_entry_t sys_control; - chm_sys_entry_t sys_content; - chm_sys_entry_t sys_reset; - off_t m_length; - chm_itsf_header_t itsf_hdr; - chm_itsp_header_t itsp_hdr; - int ufd; - int ofd; - uint32_t num_chunks; - off_t chunk_offset; - const char *chunk_data; - const char *chunk_current; - const char *chunk_end; - fmap_t *map; - uint16_t chunk_entries; -} chm_metadata_t; - -int cli_chm_open(const char *dirname, chm_metadata_t *metadata, cli_ctx *ctx); -int cli_chm_prepare_file(chm_metadata_t *metadata); -int cli_chm_extract_file(char *dirname, chm_metadata_t *metadata, cli_ctx *ctx); -void cli_chm_close(chm_metadata_t *metadata); -#endif diff --git a/libclamav/libmspack.c b/libclamav/libmspack.c new file mode 100644 index 000000000000..e94312e6dad3 --- /dev/null +++ b/libclamav/libmspack.c @@ -0,0 +1,525 @@ +/* + * Glue code for libmspack handling. + * Author: 웃 Sebastian Andrzej Siewior + * ✉ sebastian @ breakpoint ̣cc + */ + +#include +#include +#include +#include + +#include + +#include "clamav.h" +#include "fmap.h" +#include "scanners.h" +#include "others.h" + +enum mspack_type { + FILETYPE_DUNNO, + FILETYPE_FMAP, + FILETYPE_FILENAME, +}; + +struct mspack_name { + fmap_t *fmap; + off_t org; +}; + +struct mspack_system_ex { + struct mspack_system ops; + off_t max_size; +}; + +struct mspack_handle { + enum mspack_type type; + + fmap_t *fmap; + off_t org; + off_t offset; + + FILE *f; + off_t max_size; +}; + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +#define min_t(type, x, y) ({ \ + type __min1 = (x); \ + type __min2 = (y); \ + __min1 < __min2 ? __min1: __min2; }) + +static struct mspack_file *mspack_fmap_open(struct mspack_system *self, + const char *filename, int mode) +{ + struct mspack_name *mspack_name; + struct mspack_handle *mspack_handle; + struct mspack_system_ex *self_ex; + const char *fmode; + + if (!filename) { + cli_dbgmsg("%s() failed at %d\n", __func__, __LINE__); + return NULL; + } + mspack_handle = malloc(sizeof(*mspack_handle)); + if (!mspack_handle) { + cli_dbgmsg("%s() failed at %d\n", __func__, __LINE__); + return NULL; + } + switch (mode) { + case MSPACK_SYS_OPEN_READ: + mspack_handle->type = FILETYPE_FMAP; + + mspack_name = (struct mspack_name *)filename; + mspack_handle->fmap = mspack_name->fmap; + mspack_handle->org = mspack_name->org; + mspack_handle->offset = 0; + + return (struct mspack_file *)mspack_handle; + + case MSPACK_SYS_OPEN_WRITE: + fmode = "wb"; + break; + case MSPACK_SYS_OPEN_UPDATE: + fmode = "r+b"; + break; + case MSPACK_SYS_OPEN_APPEND: + fmode = "ab"; + break; + default: + cli_dbgmsg("%s() wrong mode\n", __func__); + goto out_err; + } + + mspack_handle->type = FILETYPE_FILENAME; + + mspack_handle->f = fopen(filename, fmode); + if (!mspack_handle->f) { + cli_dbgmsg("%s() failed %d\n", __func__, __LINE__); + goto out_err; + } + self_ex = container_of(self, struct mspack_system_ex, ops); + mspack_handle->max_size = self_ex->max_size; + return (struct mspack_file *)mspack_handle; + +out_err: + free(mspack_handle); + return NULL; +} + +static void mspack_fmap_close(struct mspack_file *file) +{ + struct mspack_handle *mspack_handle = (struct mspack_handle *)file; + + if (!mspack_handle) + return; + + if (mspack_handle->type == FILETYPE_FILENAME) + fclose(mspack_handle->f); + free(mspack_handle); +} + +static int mspack_fmap_read(struct mspack_file *file, void *buffer, int bytes) +{ + struct mspack_handle *mspack_handle = (struct mspack_handle *)file; + off_t offset; + size_t count; + int ret; + + if (bytes < 0) { + cli_dbgmsg("%s() %d\n", __func__, __LINE__); + return -1; + } + if (!mspack_handle) { + cli_dbgmsg("%s() %d\n", __func__, __LINE__); + return -1; + } + + if (mspack_handle->type == FILETYPE_FMAP) { + offset = mspack_handle->offset + mspack_handle->org; + + ret = fmap_readn(mspack_handle->fmap, buffer, offset, bytes); + if (ret != bytes) { + cli_dbgmsg("%s() %d %d, %d\n", __func__, __LINE__, bytes, ret); + return ret; + } + + mspack_handle->offset += bytes; + return bytes; + } + count = fread(buffer, bytes, 1, mspack_handle->f); + if (count < 1) { + cli_dbgmsg("%s() %d %d, %zd\n", __func__, __LINE__, bytes, count); + return -1; + } + return bytes; +} + +static int mspack_fmap_write(struct mspack_file *file, void *buffer, int bytes) +{ + struct mspack_handle *mspack_handle = (struct mspack_handle *)file; + size_t count; + off_t max_size; + + if (bytes < 0 || !mspack_handle) { + cli_dbgmsg("%s() err %d\n", __func__, __LINE__); + return -1; + } + + if (mspack_handle->type == FILETYPE_FMAP) { + cli_dbgmsg("%s() err %d\n", __func__, __LINE__); + return -1; + } + + if (!bytes) + return 0; + + max_size = mspack_handle->max_size; + if (!max_size) + return bytes; + + max_size = min_t(off_t, max_size, bytes); + mspack_handle->max_size -= max_size; + + count = fwrite(buffer, max_size, 1, mspack_handle->f); + if (count < 1) { + cli_dbgmsg("%s() err %m <%zd %d>\n", __func__, count, bytes); + return -1; + } + + return bytes; +} + +static int mspack_fmap_seek(struct mspack_file *file, off_t offset, int mode) +{ + struct mspack_handle *mspack_handle = (struct mspack_handle *)file; + + if (!mspack_handle) { + cli_dbgmsg("%s() err %d\n", __func__, __LINE__); + return -1; + } + + if (mspack_handle->type == FILETYPE_FMAP) { + off_t new_pos; + + switch (mode) { + case MSPACK_SYS_SEEK_START: + new_pos = offset; + break; + case MSPACK_SYS_SEEK_CUR: + new_pos = mspack_handle->offset + offset; + break; + case MSPACK_SYS_SEEK_END: + new_pos = mspack_handle->fmap->len + offset; + break; + default: + cli_dbgmsg("%s() err %d\n", __func__, __LINE__); + return -1; + } + if (new_pos < 0 || new_pos > mspack_handle->fmap->len) { + cli_dbgmsg("%s() err %d\n", __func__, __LINE__); + return -1; + } + + mspack_handle->offset = new_pos; + return 0; + } + + switch (mode) { + case MSPACK_SYS_SEEK_START: + mode = SEEK_SET; + break; + case MSPACK_SYS_SEEK_CUR: + mode = SEEK_CUR; + break; + case MSPACK_SYS_SEEK_END: + mode = SEEK_END; + break; + default: + cli_dbgmsg("%s() err %d\n", __func__, __LINE__); + return -1; + } + + return fseeko(mspack_handle->f, offset, mode); +} + +static off_t mspack_fmap_tell(struct mspack_file *file) +{ + struct mspack_handle *mspack_handle = (struct mspack_handle *)file; + + if (!mspack_handle) + return -1; + + if (mspack_handle->type == FILETYPE_FMAP) + return mspack_handle->offset; + + return ftello(mspack_handle->f); +} + +static void mspack_fmap_message(struct mspack_file *file, const char *fmt, ...) +{ + cli_dbgmsg("%s() %s\n", __func__, fmt); +} +static void *mspack_fmap_alloc(struct mspack_system *self, size_t num) +{ + return malloc(num); +} + +static void mspack_fmap_free(void *mem) +{ + free(mem); +} + +static void mspack_fmap_copy(void *src, void *dst, size_t num) +{ + memcpy(dst, src, num); +} + +static struct mspack_system mspack_sys_fmap_ops = { + .open = mspack_fmap_open, + .close = mspack_fmap_close, + .read = mspack_fmap_read, + .write = mspack_fmap_write, + .seek = mspack_fmap_seek, + .tell = mspack_fmap_tell, + .message = mspack_fmap_message, + .alloc = mspack_fmap_alloc, + .free = mspack_fmap_free, + .copy = mspack_fmap_copy, +}; + +static int cli_scanfile(const char *filename, cli_ctx *ctx) +{ + int fd, ret; + + /* internal version of cl_scanfile with arec/mrec preserved */ + fd = safe_open(filename, O_RDONLY|O_BINARY); + if (fd < 0) + return CL_EOPEN; + + ret = cli_magic_scandesc(fd, ctx); + + close(fd); + return ret; +} + +int cli_scanmscab(cli_ctx *ctx, off_t sfx_offset) +{ + struct mscab_decompressor *cab_d; + struct mscabd_cabinet *cab_h; + struct mscabd_file *cab_f; + int ret; + int files; + int virus_num = 0; + struct mspack_name mspack_fmap = { + .fmap = *ctx->fmap, + .org = sfx_offset, + }; + struct mspack_system_ex ops_ex = { + .ops = mspack_sys_fmap_ops, + }; + + MSPACK_SYS_SELFTEST(ret); + if (ret) { + cli_dbgmsg("%s() failed at %d\n", __func__, __LINE__); + return CL_EUNPACK; + } + + cab_d = mspack_create_cab_decompressor(&ops_ex.ops); + if (!cab_d) { + cli_dbgmsg("%s() failed at %d\n", __func__, __LINE__); + return CL_EUNPACK; + } + + cab_h = cab_d->open(cab_d, (char *)&mspack_fmap); + if (!cab_h) { + ret = CL_EFORMAT; + cli_dbgmsg("%s() failed at %d\n", __func__, __LINE__); + goto out_dest; + } + files = 0; + for (cab_f = cab_h->files; cab_f; cab_f = cab_f->next) { + off_t max_size; + char *tmp_fname; + + ret = cli_matchmeta(ctx, cab_f->filename, 0, cab_f->length, 0, + files, 0, NULL); + if (ret) { + if (ret == CL_VIRUS) { + virus_num++; + if (!SCAN_ALL) + break; + } + goto out_close; + } + + if (ctx->engine->maxscansize) { + if (ctx->scansize >= ctx->engine->maxscansize) { + ret = CL_CLEAN; + break; + } + } + + if (ctx->engine->maxscansize && + ctx->scansize + ctx->engine->maxfilesize >= + ctx->engine->maxscansize) + max_size = ctx->engine->maxscansize - + ctx->scansize; + else + max_size = ctx->engine->maxfilesize ? + ctx->engine->maxfilesize : + 0xffffffff; + + tmp_fname = cli_gentemp(ctx->engine->tmpdir); + if (!tmp_fname) { + ret = CL_EMEM; + break; + } + + ops_ex.max_size = max_size; + /* scan */ + ret = cab_d->extract(cab_d, cab_f, tmp_fname); + if (ret) { + /* Failed to extract */ + cli_dbgmsg("%s() failed to extract %d\n", __func__, ret); + } else { + ret = cli_scanfile(tmp_fname, ctx); + if (ret == CL_VIRUS) + virus_num++; + } + if (!ctx->engine->keeptmp) { + if (!access(tmp_fname, R_OK) && cli_unlink(tmp_fname)) { + free(tmp_fname); + ret = CL_EUNLINK; + break; + } + } + free(tmp_fname); + files++; + if (ret == CL_VIRUS && SCAN_ALL) + continue; + if (ret) + break; + } + +out_close: + cab_d->close(cab_d, cab_h); +out_dest: + mspack_destroy_cab_decompressor(cab_d); + if (virus_num) + return CL_VIRUS; + return ret; +} + +int cli_scanmschm(cli_ctx *ctx) +{ + struct mschm_decompressor *mschm_d; + struct mschmd_header *mschm_h; + struct mschmd_file *mschm_f; + int ret; + int files; + int virus_num = 0; + struct mspack_name mspack_fmap = { + .fmap = *ctx->fmap, + }; + struct mspack_system_ex ops_ex = { + .ops = mspack_sys_fmap_ops, + }; + + MSPACK_SYS_SELFTEST(ret); + if (ret) { + cli_dbgmsg("%s() failed at %d\n", __func__, __LINE__); + return CL_EUNPACK; + } + + mschm_d = mspack_create_chm_decompressor(&ops_ex.ops); + if (!mschm_d) { + cli_dbgmsg("%s() failed at %d\n", __func__, __LINE__); + return CL_EUNPACK; + } + + mschm_h = mschm_d->open(mschm_d, (char *)&mspack_fmap); + if (!mschm_h) { + ret = CL_EFORMAT; + cli_dbgmsg("%s() failed at %d\n", __func__, __LINE__); + goto out_dest; + } + files = 0; + for (mschm_f = mschm_h->files; mschm_f; mschm_f = mschm_f->next) { + off_t max_size; + char *tmp_fname; + + ret = cli_matchmeta(ctx, mschm_f->filename, 0, mschm_f->length, + 0, files, 0, NULL); + if (ret) { + if (ret == CL_VIRUS) { + virus_num++; + if (!SCAN_ALL) + break; + } + goto out_close; + } + + if (ctx->engine->maxscansize) { + if (ctx->scansize >= ctx->engine->maxscansize) { + ret = CL_CLEAN; + break; + } + } + + if (ctx->engine->maxscansize && + ctx->scansize + ctx->engine->maxfilesize >= + ctx->engine->maxscansize) + max_size = ctx->engine->maxscansize - + ctx->scansize; + else + max_size = ctx->engine->maxfilesize ? + ctx->engine->maxfilesize : + 0xffffffff; + + ops_ex.max_size = max_size; + + tmp_fname = cli_gentemp(ctx->engine->tmpdir); + if (!tmp_fname) { + ret = CL_EMEM; + break; + } + + /* scan */ + ret = mschm_d->extract(mschm_d, mschm_f, tmp_fname); + if (ret) { + /* Failed to extract */ + cli_dbgmsg("%s() failed to extract %d\n", __func__, ret); + } else { + ret = cli_scanfile(tmp_fname, ctx); + if (ret == CL_VIRUS) + virus_num++; + } + if (!ctx->engine->keeptmp) { + if (!access(tmp_fname, R_OK) && cli_unlink(tmp_fname)) { + free(tmp_fname); + ret = CL_EUNLINK; + break; + } + } + free(tmp_fname); + files++; + if (ret == CL_VIRUS && SCAN_ALL) + continue; + if (ret) + break; + } + +out_close: + mschm_d->close(mschm_d, mschm_h); +out_dest: + mspack_destroy_chm_decompressor(mschm_d); + if (virus_num) + return CL_VIRUS; + return ret; + + return 0; +} diff --git a/libclamav/libmspack.h b/libclamav/libmspack.h new file mode 100644 index 000000000000..07a9442bf253 --- /dev/null +++ b/libclamav/libmspack.h @@ -0,0 +1,7 @@ +#ifndef __LIBMSPACK_H__ +#define __LIBMSPACK_H__ + +int cli_scanmscab(cli_ctx *ctx, off_t sfx_offset); +int cli_scanmschm(cli_ctx *ctx); + +#endif diff --git a/libclamav/mspack.c b/libclamav/mspack.c deleted file mode 100644 index cc4c0a59fc9a..000000000000 --- a/libclamav/mspack.c +++ /dev/null @@ -1,2026 +0,0 @@ -/* - * This file includes code from libmspack adapted for libclamav by - * tkojm@clamav.net and draynor@sourcefire.com - * - * Copyright (C) 2003-2004 Stuart Caie - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1 as published by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 - * USA - */ - -#if HAVE_CONFIG_H -#include "clamav-config.h" -#endif - -#include -#include - -#include "others.h" -#include "clamav.h" -#include "mspack.h" - -#if HAVE_LIMITS_H -# include -#endif -#ifndef CHAR_BIT -# define CHAR_BIT (8) -#endif - - -/*************************************************************************** - * MS-ZIP decompression implementation - *************************************************************************** - * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted - * by Microsoft Corporation. - * - * The deflate method was created by Phil Katz. MSZIP is equivalent to the - * deflate method. - * - */ - -/* match lengths for literal codes 257.. 285 */ -static const unsigned short mszip_lit_lengths[29] = { - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, - 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258 -}; - -/* match offsets for distance codes 0 .. 29 */ -static const unsigned short mszip_dist_offsets[30] = { - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, - 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 -}; - -/* extra bits required for literal codes 257.. 285 */ -static const unsigned char mszip_lit_extrabits[29] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, - 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 -}; - -/* extra bits required for distance codes 0 .. 29 */ -static const unsigned char mszip_dist_extrabits[30] = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, - 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 -}; - -/* the order of the bit length Huffman code lengths */ -static const unsigned char mszip_bitlen_order[19] = { - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 -}; - -/* ANDing with mszip_bit_mask[n] masks the lower n bits */ -static const unsigned short mszip_bit_mask_tab[17] = { - 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, - 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff -}; - -#define MSZIP_STORE_BITS do { \ - zip->i_ptr = i_ptr; \ - zip->i_end = i_end; \ - zip->bit_buffer = bit_buffer; \ - zip->bits_left = bits_left; \ -} while (0) - -#define MSZIP_RESTORE_BITS do { \ - i_ptr = zip->i_ptr; \ - i_end = zip->i_end; \ - bit_buffer = zip->bit_buffer; \ - bits_left = zip->bits_left; \ -} while (0) - -#define MSZIP_ENSURE_BITS(nbits) do { \ - while (bits_left < (nbits)) { \ - if (i_ptr >= i_end) { \ - if (mszip_read_input(zip)) return zip->error; \ - i_ptr = zip->i_ptr; \ - i_end = zip->i_end; \ - if(i_ptr == i_end) break; \ - } \ - bit_buffer |= *i_ptr++ << bits_left; bits_left += 8; \ - } \ -} while (0) - -#define MSZIP_PEEK_BITS(nbits) (bit_buffer & ((1<<(nbits))-1)) -#define MSZIP_PEEK_BITS_T(nbits) (bit_buffer & mszip_bit_mask_tab[(nbits)]) - -#define MSZIP_REMOVE_BITS(nbits) ((bit_buffer >>= (nbits)), (bits_left -= (nbits))) - -#define MSZIP_READ_BITS(val, nbits) do { \ - MSZIP_ENSURE_BITS(nbits); (val) = MSZIP_PEEK_BITS(nbits); MSZIP_REMOVE_BITS(nbits); \ -} while (0) - -#define MSZIP_READ_BITS_T(val, nbits) do { \ - MSZIP_ENSURE_BITS(nbits); (val) = MSZIP_PEEK_BITS_T(nbits); MSZIP_REMOVE_BITS(nbits); \ -} while (0) - -static int mszip_read_input(struct mszip_stream *zip) { - int nread = zip->read_cb(zip->file, zip->inbuf, (int)zip->inbuf_size); - if (nread < 0) { - if (zip->file->error == CL_BREAK) { - if ((unsigned int)nread == zip->last) { - cli_dbgmsg("mszip_read_input: Two consecutive CL_BREAKs reached.\n"); - return CL_BREAK; - } - // Need short circuit to ensure scanning small files - cli_dbgmsg("mszip_read_input: First CL_BREAK reached.\n"); - zip->i_ptr = zip->i_end; - zip->last = nread; - return CL_SUCCESS; - } - else - return zip->error = CL_EFORMAT; - } - - zip->last = nread; - zip->i_ptr = &zip->inbuf[0]; - zip->i_end = &zip->inbuf[nread]; - - return CL_SUCCESS; -} - -/* inflate() error codes */ -#define INF_ERR_BLOCKTYPE (-1) /* unknown block type */ -#define INF_ERR_COMPLEMENT (-2) /* block size complement mismatch */ -#define INF_ERR_FLUSH (-3) /* error from flush_window() callback */ -#define INF_ERR_BITBUF (-4) /* too many bits in bit buffer */ -#define INF_ERR_SYMLENS (-5) /* too many symbols in blocktype 2 header */ -#define INF_ERR_BITLENTBL (-6) /* failed to build bitlens huffman table */ -#define INF_ERR_LITERALTBL (-7) /* failed to build literals huffman table */ -#define INF_ERR_DISTANCETBL (-8) /* failed to build distance huffman table */ -#define INF_ERR_BITOVERRUN (-9) /* bitlen RLE code goes over table size */ -#define INF_ERR_BADBITLEN (-10) /* invalid bit-length code */ -#define INF_ERR_LITCODE (-11) /* out-of-range literal code */ -#define INF_ERR_DISTCODE (-12) /* out-of-range distance code */ -#define INF_ERR_DISTANCE (-13) /* somehow, distance is beyond 32k */ -#define INF_ERR_HUFFSYM (-14) /* out of bits decoding huffman symbol */ - -/* mszip_make_decode_table(nsyms, nbits, length[], table[]) - * - * This function was coded by David Tritscher. It builds a fast huffman - * decoding table out of just a canonical huffman code lengths table. - * - * NOTE: this is NOT identical to the mszip_make_decode_table() in lzxd.c. This - * one reverses the quick-lookup bit pattern. Bits are read MSB to LSB in LZX, - * but LSB to MSB in MSZIP. - * - * nsyms = total number of symbols in this huffman tree. - * nbits = any symbols with a code length of nbits or less can be decoded - * in one lookup of the table. - * length = A table to get code lengths from [0 to nsyms-1] - * table = The table to fill up with decoded symbols and pointers. - * - * Returns 0 for OK or 1 for error - */ -static int mszip_make_decode_table(unsigned int nsyms, unsigned int nbits, - unsigned char *length, unsigned short *table) -{ - register unsigned int leaf, reverse, fill; - register unsigned short sym, next_sym; - register unsigned char bit_num; - unsigned int pos = 0; /* the current position in the decode table */ - unsigned int table_mask = 1 << nbits; - unsigned int mszip_bit_mask = table_mask >> 1; /* don't do 0 length codes */ - - /* fill entries for codes short enough for a direct mapping */ - for (bit_num = 1; bit_num <= nbits; bit_num++) { - for (sym = 0; sym < nsyms; sym++) { - if (length[sym] != bit_num) continue; - - /* reverse the significant bits */ - fill = length[sym]; reverse = pos >> (nbits - fill); leaf = 0; - do {leaf <<= 1; leaf |= reverse & 1; reverse >>= 1;} while (--fill); - - if((pos += mszip_bit_mask) > table_mask) return 1; /* table overrun */ - - /* fill all possible lookups of this symbol with the symbol itself */ - fill = mszip_bit_mask; next_sym = 1 << bit_num; - do { table[leaf] = sym; leaf += next_sym; } while (--fill); - } - mszip_bit_mask >>= 1; - } - - /* exit with success if table is now complete */ - if (pos == table_mask) return 0; - - /* mark all remaining table entries as unused */ - for (sym = pos; sym < table_mask; sym++) { - reverse = sym; leaf = 0; fill = nbits; - do { leaf <<= 1; leaf |= reverse & 1; reverse >>= 1; } while (--fill); - table[leaf] = 0xFFFF; - } - - /* where should the longer codes be allocated from? */ - next_sym = ((table_mask >> 1) < nsyms) ? nsyms : (table_mask >> 1); - - /* give ourselves room for codes to grow by up to 16 more bits. - * codes now start at bit nbits+16 and end at (nbits+16-codelength) */ - pos <<= 16; - table_mask <<= 16; - mszip_bit_mask = 1 << 15; - - for (bit_num = nbits+1; bit_num <= MSZIP_MAX_HUFFBITS; bit_num++) { - for (sym = 0; sym < nsyms; sym++) { - if (length[sym] != bit_num) continue; - - /* leaf = the first nbits of the code, reversed */ - reverse = pos >> 16; leaf = 0; fill = nbits; - do {leaf <<= 1; leaf |= reverse & 1; reverse >>= 1;} while (--fill); - - for (fill = 0; fill < (bit_num - nbits); fill++) { - /* if this path hasn't been taken yet, 'allocate' two entries */ - if (table[leaf] == 0xFFFF) { - table[(next_sym << 1) ] = 0xFFFF; - table[(next_sym << 1) + 1 ] = 0xFFFF; - table[leaf] = next_sym++; - } - /* follow the path and select either left or right for next bit */ - leaf = (table[leaf] << 1) | ((pos >> (15 - fill)) & 1); - } - table[leaf] = sym; - - if ((pos += mszip_bit_mask) > table_mask) return 1; /* table overflow */ - } - mszip_bit_mask >>= 1; - } - - /* full table? */ - return (pos != table_mask) ? 1 : 0; -} - -/* MSZIP_READ_HUFFSYM(tablename, var) decodes one huffman symbol from the - * bitstream using the stated table and puts it in var. - */ -#define MSZIP_READ_HUFFSYM(tbl, var) do { \ - /* huffman symbols can be up to 16 bits long */ \ - MSZIP_ENSURE_BITS(MSZIP_MAX_HUFFBITS); \ - /* immediate table lookup of [tablebits] bits of the code */ \ - sym = zip->tbl##_table[MSZIP_PEEK_BITS(MSZIP_##tbl##_TABLEBITS)]; \ - /* is the symbol is longer than [tablebits] bits? (i=node index) */ \ - if (sym >= MSZIP_##tbl##_MAXSYMBOLS) { \ - /* decode remaining bits by tree traversal */ \ - i = MSZIP_##tbl##_TABLEBITS - 1; \ - do { \ - /* check next bit. error if we run out of bits before decode */ \ - if (i++ > MSZIP_MAX_HUFFBITS) { \ - cli_dbgmsg("zip_inflate: out of bits in huffman decode\n"); \ - return INF_ERR_HUFFSYM; \ - } \ - sym = (sym << 1) | ((bit_buffer >> i) & 1); \ - if(sym >= MSZIP_##tbl##_TABLESIZE) { \ - cli_dbgmsg("zip_inflate: index out of table\n"); \ - return INF_ERR_HUFFSYM; \ - } \ - /* double node index and add 0 (left branch) or 1 (right) */ \ - sym = zip->tbl##_table[sym]; \ - /* while we are still in node indicies, not decoded symbols */ \ - } while (sym >= MSZIP_##tbl##_MAXSYMBOLS); \ - } \ - /* result */ \ - (var) = sym; \ - /* look up the code length of that symbol and discard those bits */ \ - i = zip->tbl##_len[sym]; \ - MSZIP_REMOVE_BITS(i); \ -} while (0) - -static int mszip_read_lens(struct mszip_stream *zip) { - /* for the bit buffer and huffman decoding */ - register unsigned int bit_buffer; - register int bits_left; - unsigned char *i_ptr, *i_end; - - /* bitlen Huffman codes -- immediate lookup, 7 bit max code length */ - unsigned short bl_table[(1 << 7)]; - unsigned char bl_len[19]; - - unsigned char lens[MSZIP_LITERAL_MAXSYMBOLS + MSZIP_DISTANCE_MAXSYMBOLS]; - unsigned int lit_codes, dist_codes, code, last_code=0, bitlen_codes, i, run; - - MSZIP_RESTORE_BITS; - - /* read the number of codes */ - MSZIP_READ_BITS(lit_codes, 5); lit_codes += 257; - MSZIP_READ_BITS(dist_codes, 5); dist_codes += 1; - MSZIP_READ_BITS(bitlen_codes, 4); bitlen_codes += 4; - if (lit_codes > MSZIP_LITERAL_MAXSYMBOLS) return INF_ERR_SYMLENS; - if (dist_codes > MSZIP_DISTANCE_MAXSYMBOLS) return INF_ERR_SYMLENS; - - /* read in the bit lengths in their unusual order */ - for (i = 0; i < bitlen_codes; i++) MSZIP_READ_BITS(bl_len[mszip_bitlen_order[i]], 3); - while (i < 19) bl_len[mszip_bitlen_order[i++]] = 0; - - /* create decoding table with an immediate lookup */ - if (mszip_make_decode_table(19, 7, &bl_len[0], &bl_table[0])) { - return INF_ERR_BITLENTBL; - } - - /* read literal / distance code lengths */ - for (i = 0; i < (lit_codes + dist_codes); i++) { - /* single-level huffman lookup */ - MSZIP_ENSURE_BITS(7); - code = bl_table[MSZIP_PEEK_BITS(7)]; - MSZIP_REMOVE_BITS(bl_len[code]); - - if (code < 16) lens[i] = last_code = code; - else { - switch (code) { - case 16: MSZIP_READ_BITS(run, 2); run += 3; code = last_code; break; - case 17: MSZIP_READ_BITS(run, 3); run += 3; code = 0; break; - case 18: MSZIP_READ_BITS(run, 7); run += 11; code = 0; break; - default: cli_dbgmsg("zip_read_lens: bad code!: %u\n", code); return INF_ERR_BADBITLEN; - } - if ((i + run) > (lit_codes + dist_codes)) return INF_ERR_BITOVERRUN; - while (run--) lens[i++] = code; - i--; - } - } - - /* copy LITERAL code lengths and clear any remaining */ - i = lit_codes; - memcpy(&zip->LITERAL_len[0], &lens[0], i); - while (i < MSZIP_LITERAL_MAXSYMBOLS) zip->LITERAL_len[i++] = 0; - - i = dist_codes; - memcpy(&zip->DISTANCE_len[0], &lens[lit_codes], i); - while (i < MSZIP_DISTANCE_MAXSYMBOLS) zip->DISTANCE_len[i++] = 0; - - MSZIP_STORE_BITS; - return 0; -} - -static int mspack_write(int fd, const void *buff, unsigned int count, struct cab_file *file) -{ - int ret; - - if(file->max_size) { - if(file->written_size >= file->max_size) - return CL_BREAK; - - if(file->written_size + count > file->max_size) - count = file->max_size - file->written_size; - } - if((ret = cli_writen(fd, buff, count)) > 0) - file->written_size += ret; - - return (ret == -1) ? CL_EWRITE : CL_SUCCESS; -} - -/* a clean implementation of RFC 1951 / inflate */ -static int mszip_inflate(struct mszip_stream *zip) { - unsigned int last_block, block_type, distance, length, this_run, i; - - /* for the bit buffer and huffman decoding */ - register unsigned int bit_buffer; - register int bits_left; - register unsigned short sym; - unsigned char *i_ptr, *i_end; - - MSZIP_RESTORE_BITS; - - do { - /* read in last block bit */ - MSZIP_READ_BITS(last_block, 1); - - /* read in block type */ - MSZIP_READ_BITS(block_type, 2); - - if (block_type == 0) { - /* uncompressed block */ - unsigned char lens_buf[4]; - - /* go to byte boundary */ - i = bits_left & 7; MSZIP_REMOVE_BITS(i); - - /* read 4 bytes of data, emptying the bit-buffer if necessary */ - for (i = 0; (bits_left >= 8); i++) { - if (i == 4) return INF_ERR_BITBUF; - lens_buf[i] = MSZIP_PEEK_BITS(8); - MSZIP_REMOVE_BITS(8); - } - if (bits_left != 0) return INF_ERR_BITBUF; - while (i < 4) { - if (i_ptr >= i_end) { - if (mszip_read_input(zip)) return zip->error; - i_ptr = zip->i_ptr; - i_end = zip->i_end; - if(i_ptr == i_end) break; - } - lens_buf[i++] = *i_ptr++; - } - if (i < 4) return INF_ERR_BITBUF; - - /* get the length and its complement */ - length = lens_buf[0] | (lens_buf[1] << 8); - i = lens_buf[2] | (lens_buf[3] << 8); - if (length != (~i & 0xFFFF)) return INF_ERR_COMPLEMENT; - - /* read and copy the uncompressed data into the window */ - while (length > 0) { - if (i_ptr >= i_end) { - if (mszip_read_input(zip)) return zip->error; - i_ptr = zip->i_ptr; - i_end = zip->i_end; - if(i_ptr == i_end) break; - } - - this_run = length; - if (this_run > (unsigned int)(i_end - i_ptr)) this_run = i_end - i_ptr; - if (this_run > (MSZIP_FRAME_SIZE - zip->window_posn)) - this_run = MSZIP_FRAME_SIZE - zip->window_posn; - - memcpy(&zip->window[zip->window_posn], i_ptr, this_run); - zip->window_posn += this_run; - i_ptr += this_run; - length -= this_run; - - if (zip->window_posn == MSZIP_FRAME_SIZE) { - if (zip->flush_window(zip, MSZIP_FRAME_SIZE)) return INF_ERR_FLUSH; - zip->window_posn = 0; - } - } - } - else if ((block_type == 1) || (block_type == 2)) { - /* Huffman-compressed LZ77 block */ - unsigned int window_posn, match_posn, code; - - if (block_type == 1) { - /* block with fixed Huffman codes */ - i = 0; - while (i < 144) zip->LITERAL_len[i++] = 8; - while (i < 256) zip->LITERAL_len[i++] = 9; - while (i < 280) zip->LITERAL_len[i++] = 7; - while (i < 288) zip->LITERAL_len[i++] = 8; - for (i = 0; i < 32; i++) zip->DISTANCE_len[i] = 5; - } - else { - /* block with dynamic Huffman codes */ - MSZIP_STORE_BITS; - if ((i = mszip_read_lens(zip))) return i; - MSZIP_RESTORE_BITS; - } - - /* now huffman lengths are read for either kind of block, - * create huffman decoding tables */ - if (mszip_make_decode_table(MSZIP_LITERAL_MAXSYMBOLS, MSZIP_LITERAL_TABLEBITS, - &zip->LITERAL_len[0], &zip->LITERAL_table[0])) - { - return INF_ERR_LITERALTBL; - } - - if (mszip_make_decode_table(MSZIP_DISTANCE_MAXSYMBOLS,MSZIP_DISTANCE_TABLEBITS, - &zip->DISTANCE_len[0], &zip->DISTANCE_table[0])) - { - return INF_ERR_DISTANCETBL; - } - - /* decode forever until end of block code */ - window_posn = zip->window_posn; - while (1) { - MSZIP_READ_HUFFSYM(LITERAL, code); - if (code < 256) { - zip->window[window_posn++] = (unsigned char) code; - if (window_posn == MSZIP_FRAME_SIZE) { - if (zip->flush_window(zip, MSZIP_FRAME_SIZE)) return INF_ERR_FLUSH; - window_posn = 0; - } - } - else if (code == 256) { - /* END OF BLOCK CODE: loop break point */ - break; - } - else { - code -= 257; - if (code >= 29) return INF_ERR_LITCODE; - MSZIP_READ_BITS_T(length, mszip_lit_extrabits[code]); - length += mszip_lit_lengths[code]; - - MSZIP_READ_HUFFSYM(DISTANCE, code); - if (code >= 30) return INF_ERR_DISTCODE; - MSZIP_READ_BITS_T(distance, mszip_dist_extrabits[code]); - distance += mszip_dist_offsets[code]; - - /* match position is window position minus distance. If distance - * is more than window position numerically, it must 'wrap - * around' the frame size. */ - match_posn = ((distance > window_posn) ? MSZIP_FRAME_SIZE : 0) - + window_posn - distance; - - /* copy match */ - if (length < 12) { - /* short match, use slower loop but no loop setup code */ - while (length--) { - zip->window[window_posn++] = zip->window[match_posn++]; - match_posn &= MSZIP_FRAME_SIZE - 1; - - if (window_posn == MSZIP_FRAME_SIZE) { - if (zip->flush_window(zip, MSZIP_FRAME_SIZE)) - return INF_ERR_FLUSH; - window_posn = 0; - } - } - } - else { - /* longer match, use faster loop but with setup expense */ - unsigned char *runsrc, *rundest; - do { - this_run = length; - if ((match_posn + this_run) > MSZIP_FRAME_SIZE) - this_run = MSZIP_FRAME_SIZE - match_posn; - if ((window_posn + this_run) > MSZIP_FRAME_SIZE) - this_run = MSZIP_FRAME_SIZE - window_posn; - - rundest = &zip->window[window_posn]; window_posn += this_run; - runsrc = &zip->window[match_posn]; match_posn += this_run; - length -= this_run; - while (this_run--) *rundest++ = *runsrc++; - - /* flush if necessary */ - if (window_posn == MSZIP_FRAME_SIZE) { - if (zip->flush_window(zip, MSZIP_FRAME_SIZE)) - return INF_ERR_FLUSH; - window_posn = 0; - } - if (match_posn == MSZIP_FRAME_SIZE) match_posn = 0; - } while (length > 0); - } - - } /* else (code >= 257) */ - - } /* while (forever) -- break point at 'code == 256' */ - zip->window_posn = window_posn; - } - else { - /* block_type == 3 -- bad block type */ - return INF_ERR_BLOCKTYPE; - } - } while (!last_block); - - /* flush the remaining data */ - if (zip->window_posn) { - if (zip->flush_window(zip, zip->window_posn)) return INF_ERR_FLUSH; - } - MSZIP_STORE_BITS; - - /* return success */ - return 0; -} - -/* inflate() calls this whenever the window should be flushed. As - * MSZIP only expands to the size of the window, the implementation used - * simply keeps track of the amount of data flushed, and if more than 32k - * is flushed, an error is raised. - */ -static int mszip_flush_window(struct mszip_stream *zip, - unsigned int data_flushed) -{ - zip->bytes_output += data_flushed; - if (zip->bytes_output > MSZIP_FRAME_SIZE) { - cli_dbgmsg("mszip_flush_window: overflow: %u bytes flushed, total is now %u\n", data_flushed, zip->bytes_output); - return 1; - } - return 0; -} - -struct mszip_stream *mszip_init(int ofd, - int input_buffer_size, - int repair_mode, - struct cab_file *file, - int (*read_cb)(struct cab_file *, unsigned char *, int)) -{ - struct mszip_stream *zip; - - input_buffer_size = (input_buffer_size + 1) & -2; - if (!input_buffer_size) return NULL; - - /* allocate decompression state */ - if (!(zip = cli_calloc(1, sizeof(struct mszip_stream)))) { - cli_errmsg("mszip_stream: Unable to allocate zip buffer\n"); - return NULL; - } - - /* allocate input buffer */ - zip->inbuf = cli_malloc((size_t) input_buffer_size); - if (!zip->inbuf) { - cli_errmsg("mszip_stream: Unable to allocate input buffer\n"); - free(zip); - return NULL; - } - - /* initialise decompression state */ - zip->ofd = ofd; - zip->wflag = 1; - zip->inbuf_size = input_buffer_size; - zip->error = CL_SUCCESS; - zip->repair_mode = repair_mode; - zip->flush_window = &mszip_flush_window; - zip->input_end = 0; - - zip->i_ptr = zip->i_end = &zip->inbuf[0]; - zip->o_ptr = zip->o_end = NULL; - zip->bit_buffer = 0; zip->bits_left = 0; - - zip->file = file; - zip->read_cb = read_cb; - - return zip; -} - -int mszip_decompress(struct mszip_stream *zip, uint32_t out_bytes) { - /* for the bit buffer */ - register unsigned int bit_buffer; - register int bits_left; - unsigned char *i_ptr, *i_end; - - int i, ret, state, error; - - /* easy answers */ - if (!zip) return CL_ENULLARG; - if (zip->error) return zip->error; - - /* flush out any stored-up bytes before we begin */ - i = zip->o_end - zip->o_ptr; - if (((off_t) i > out_bytes) && ((int) out_bytes >= 0)) i = (int) out_bytes; - if (i) { - if (zip->wflag && (ret = mspack_write(zip->ofd, zip->o_ptr, i, zip->file)) != CL_SUCCESS) { - return zip->error = ret; - } - zip->o_ptr += i; - out_bytes -= i; - } - if (out_bytes == 0) return CL_SUCCESS; - - while (out_bytes > 0) { - /* unpack another block */ - MSZIP_RESTORE_BITS; - - /* skip to next read 'CK' header */ - i = bits_left & 7; MSZIP_REMOVE_BITS(i); /* align to bytestream */ - state = 0; - do { - MSZIP_READ_BITS(i, 8); - if (i == 'C') state = 1; - else if ((state == 1) && (i == 'K')) state = 2; - else state = 0; - } while (state != 2); - - /* inflate a block, repair and realign if necessary */ - zip->window_posn = 0; - zip->bytes_output = 0; - MSZIP_STORE_BITS; - if ((error = mszip_inflate(zip))) { - cli_dbgmsg("mszip_decompress: inflate error %d\n", error); - if (zip->repair_mode) { - cli_dbgmsg("mszip_decompress: MSZIP error, %u bytes of data lost\n", - MSZIP_FRAME_SIZE - zip->bytes_output); - for (i = zip->bytes_output; i < MSZIP_FRAME_SIZE; i++) { - zip->window[i] = '\0'; - } - zip->bytes_output = MSZIP_FRAME_SIZE; - } - else { - return zip->error = (error > 0) ? error : CL_EFORMAT; - } - } - zip->o_ptr = &zip->window[0]; - zip->o_end = &zip->o_ptr[zip->bytes_output]; - - /* write a frame */ - i = (out_bytes < (off_t)zip->bytes_output) ? - (int)out_bytes : zip->bytes_output; - if (zip->wflag && (ret = mspack_write(zip->ofd, zip->o_ptr, i, zip->file)) != CL_SUCCESS) { - return zip->error = ret; - } - - /* mspack errors (i.e. read errors) are fatal and can't be recovered */ - if ((error > 0) && zip->repair_mode) return error; - - zip->o_ptr += i; - out_bytes -= i; - } - - if (out_bytes) - cli_dbgmsg("mszip_decompress: bytes left to output\n"); - - return CL_SUCCESS; -} - -void mszip_free(struct mszip_stream *zip) { - if (zip) { - free(zip->inbuf); - free(zip); - } -} - -/*************************************************************************** - * LZX decompression implementation - *************************************************************************** - * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted - * by Microsoft Corporation. - * - */ - -/* LZX decompressor input macros - * - * LZX_STORE_BITS stores bitstream state in lzx_stream structure - * LZX_RESTORE_BITS restores bitstream state from lzx_stream structure - * LZX_READ_BITS(var,n) takes N bits from the buffer and puts them in var - * LZX_ENSURE_BITS(n) ensures there are at least N bits in the bit buffer. - * LZX_PEEK_BITS(n) extracts without removing N bits from the bit buffer - * LZX_REMOVE_BITS(n) removes N bits from the bit buffer - * - */ - -#define LZX_BITBUF_WIDTH (sizeof(bit_buffer) * CHAR_BIT) - -#define LZX_STORE_BITS do { \ - lzx->i_ptr = i_ptr; \ - lzx->i_end = i_end; \ - lzx->bit_buffer = bit_buffer; \ - lzx->bits_left = bits_left; \ -} while (0) - -#define LZX_RESTORE_BITS do { \ - i_ptr = lzx->i_ptr; \ - i_end = lzx->i_end; \ - bit_buffer = lzx->bit_buffer; \ - bits_left = lzx->bits_left; \ -} while (0) - -#define LZX_ENSURE_BITS(nbits) \ - while (bits_left < (nbits)) { \ - if (i_ptr + 1 >= i_end) { \ - if (lzx_read_input(lzx)) return lzx->error; \ - i_ptr = lzx->i_ptr; \ - i_end = lzx->i_end; \ - } \ - bit_buffer |= ((i_ptr[1] << 8) | i_ptr[0]) \ - << (LZX_BITBUF_WIDTH - 16 - bits_left); \ - bits_left += 16; \ - i_ptr += 2; \ - } - -#define LZX_PEEK_BITS(nbits) (bit_buffer >> (LZX_BITBUF_WIDTH - (nbits))) - -#define LZX_REMOVE_BITS(nbits) ((bit_buffer <<= (nbits)), (bits_left -= (nbits))) - -#define LZX_READ_BITS(val, nbits) do { \ - LZX_ENSURE_BITS(nbits); \ - (val) = LZX_PEEK_BITS(nbits); \ - LZX_REMOVE_BITS(nbits); \ -} while (0) - -static int lzx_read_input(struct lzx_stream *lzx) { - int bread = lzx->read_cb(lzx->file, &lzx->inbuf[0], (int)lzx->inbuf_size); - if (bread < 0) { - if (lzx->file->error == CL_BREAK) - return lzx->error = CL_BREAK; - else - return lzx->error = CL_EFORMAT; - } - - /* huff decode's ENSURE_BYTES(16) might overrun the input stream, even - * if those bits aren't used, so fake 2 more bytes */ - if (bread == 0) { - if (lzx->input_end) { - cli_dbgmsg("lzx_read_input: out of input bytes\n"); - return lzx->error = CL_EREAD; - } - else { - bread = 2; - lzx->inbuf[0] = lzx->inbuf[1] = 0; - lzx->input_end = 1; - } - } - - lzx->i_ptr = &lzx->inbuf[0]; - lzx->i_end = &lzx->inbuf[bread]; - - return CL_SUCCESS; -} - -/* Huffman decoding macros */ - -/* LZX_READ_HUFFSYM(tablename, var) decodes one huffman symbol from the - * bitstream using the stated table and puts it in var. - */ -#define LZX_READ_HUFFSYM(tbl, var) do { \ - /* huffman symbols can be up to 16 bits long */ \ - LZX_ENSURE_BITS(16); \ - /* immediate table lookup of [tablebits] bits of the code */ \ - sym = lzx->tbl##_table[LZX_PEEK_BITS(LZX_##tbl##_TABLEBITS)]; \ - /* is the symbol is longer than [tablebits] bits? (i=node index) */ \ - if (sym >= LZX_##tbl##_MAXSYMBOLS) { \ - /* decode remaining bits by tree traversal */ \ - i = 1 << (LZX_BITBUF_WIDTH - LZX_##tbl##_TABLEBITS); \ - do { \ - /* one less bit. error if we run out of bits before decode */ \ - i >>= 1; \ - if (i == 0) { \ - cli_dbgmsg("lzx: out of bits in huffman decode\n"); \ - return lzx->error = CL_EFORMAT; \ - } \ - /* double node index and add 0 (left branch) or 1 (right) */ \ - sym <<= 1; sym |= (bit_buffer & i) ? 1 : 0; \ - /* hop to next node index / decoded symbol */ \ - if(sym >= (1 << LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS * 2)) { \ - cli_dbgmsg("lzx: index out of table\n"); \ - return lzx->error = CL_EFORMAT; \ - } \ - sym = lzx->tbl##_table[sym]; \ - /* while we are still in node indicies, not decoded symbols */ \ - } while (sym >= LZX_##tbl##_MAXSYMBOLS); \ - } \ - /* result */ \ - (var) = sym; \ - /* look up the code length of that symbol and discard those bits */ \ - i = lzx->tbl##_len[sym]; \ - LZX_REMOVE_BITS(i); \ -} while (0) - -/* LZX_BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */ -#define LZX_BUILD_TABLE(tbl) \ - if (lzx_make_decode_table(LZX_##tbl##_MAXSYMBOLS, LZX_##tbl##_TABLEBITS, \ - &lzx->tbl##_len[0], &lzx->tbl##_table[0])) \ - { \ - cli_dbgmsg("lzx: failed to build %s table\n", #tbl); \ - return lzx->error = CL_EFORMAT; \ - } - -/* lzx_make_decode_table(nsyms, nbits, length[], table[]) - * - * This function was coded by David Tritscher. It builds a fast huffman - * decoding table from a canonical huffman code lengths table. - * - * nsyms = total number of symbols in this huffman tree. - * nbits = any symbols with a code length of nbits or less can be decoded - * in one lookup of the table. - * length = A table to get code lengths from [0 to syms-1] - * table = The table to fill up with decoded symbols and pointers. - * - * Returns 0 for OK or 1 for error - */ - -static int lzx_make_decode_table(unsigned int nsyms, unsigned int nbits, - unsigned char *length, unsigned short *table) -{ - register unsigned short sym; - register unsigned int leaf, fill; - register unsigned char bit_num; - unsigned int pos = 0; /* the current position in the decode table */ - unsigned int table_mask = 1 << nbits; - unsigned int bit_mask = table_mask >> 1; /* don't do 0 length codes */ - unsigned int next_symbol = bit_mask; /* base of allocation for long codes */ - - /* fill entries for codes short enough for a direct mapping */ - for (bit_num = 1; bit_num <= nbits; bit_num++) { - for (sym = 0; sym < nsyms; sym++) { - if (length[sym] != bit_num) continue; - leaf = pos; - if((pos += bit_mask) > table_mask) return 1; /* table overrun */ - /* fill all possible lookups of this symbol with the symbol itself */ - for (fill = bit_mask; fill-- > 0;) table[leaf++] = sym; - } - bit_mask >>= 1; - } - - /* full table already? */ - if (pos == table_mask) return 0; - - /* clear the remainder of the table */ - for (sym = pos; sym < table_mask; sym++) table[sym] = 0xFFFF; - - /* allow codes to be up to nbits+16 long, instead of nbits */ - pos <<= 16; - table_mask <<= 16; - bit_mask = 1 << 15; - - for (bit_num = nbits+1; bit_num <= 16; bit_num++) { - for (sym = 0; sym < nsyms; sym++) { - if (length[sym] != bit_num) continue; - - leaf = pos >> 16; - for (fill = 0; fill < bit_num - nbits; fill++) { - /* if this path hasn't been taken yet, 'allocate' two entries */ - if (table[leaf] == 0xFFFF) { - table[(next_symbol << 1)] = 0xFFFF; - table[(next_symbol << 1) + 1] = 0xFFFF; - table[leaf] = next_symbol++; - } - /* follow the path and select either left or right for next bit */ - leaf = table[leaf] << 1; - if ((pos >> (15-fill)) & 1) leaf++; - } - table[leaf] = sym; - - if ((pos += bit_mask) > table_mask) return 1; /* table overflow */ - } - bit_mask >>= 1; - } - - /* full table? */ - if (pos == table_mask) return 0; - - /* either erroneous table, or all elements are 0 - let's find out. */ - for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1; - return 0; -} - -/* LZX_READ_LENGTHS(tablename, first, last) reads in code lengths for symbols - * first to last in the given table. The code lengths are stored in their - * own special LZX way. - */ -#define LZX_READ_LENGTHS(tbl, first, last) do { \ - LZX_STORE_BITS; \ - if (lzx_read_lens(lzx, &lzx->tbl##_len[0], (first), \ - (unsigned int)(last))) return lzx->error; \ - LZX_RESTORE_BITS; \ -} while (0) - -static int lzx_read_lens(struct lzx_stream *lzx, unsigned char *lens, - unsigned int first, unsigned int last) -{ - /* bit buffer and huffman symbol decode variables */ - register unsigned int bit_buffer; - register int bits_left, i; - register unsigned short sym; - unsigned char *i_ptr, *i_end; - - unsigned int x, y; - int z; - - LZX_RESTORE_BITS; - - /* read lengths for pretree (20 symbols, lengths stored in fixed 4 bits) */ - for (x = 0; x < 20; x++) { - LZX_READ_BITS(y, 4); - lzx->PRETREE_len[x] = y; - } - LZX_BUILD_TABLE(PRETREE); - - for (x = first; x < last; ) { - LZX_READ_HUFFSYM(PRETREE, z); - if (z == 17) { - /* code = 17, run of ([read 4 bits]+4) zeros */ - LZX_READ_BITS(y, 4); y += 4; - while (y--) lens[x++] = 0; - } - else if (z == 18) { - /* code = 18, run of ([read 5 bits]+20) zeros */ - LZX_READ_BITS(y, 5); y += 20; - while (y--) lens[x++] = 0; - } - else if (z == 19) { - /* code = 19, run of ([read 1 bit]+4) [read huffman symbol] */ - LZX_READ_BITS(y, 1); y += 4; - LZX_READ_HUFFSYM(PRETREE, z); - z = lens[x] - z; if (z < 0) z += 17; - while (y--) lens[x++] = z; - } - else { - /* code = 0 to 16, delta current length entry */ - z = lens[x] - z; if (z < 0) z += 17; - lens[x++] = z; - } - } - - LZX_STORE_BITS; - - return CL_SUCCESS; -} - -static void lzx_reset_state(struct lzx_stream *lzx) { - int i; - - lzx->R0 = 1; - lzx->R1 = 1; - lzx->R2 = 1; - lzx->header_read = 0; - lzx->block_remaining = 0; - lzx->block_type = LZX_BLOCKTYPE_INVALID; - - /* initialise tables to 0 (because deltas will be applied to them) */ - for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) lzx->MAINTREE_len[i] = 0; - for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) lzx->LENGTH_len[i] = 0; -} - -/*-------- main LZX code --------*/ - -struct lzx_stream *lzx_init(int ofd, - int window_bits, - int reset_interval, - int input_buffer_size, - off_t output_length, - struct cab_file *file, - int (*read_cb)(struct cab_file *, unsigned char *, int)) -{ - unsigned int window_size = 1 << window_bits; - struct lzx_stream *lzx; - int i, j; - - /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */ - if (window_bits < 15 || window_bits > 21) return NULL; - - input_buffer_size = (input_buffer_size + 1) & -2; - if (!input_buffer_size) return NULL; - - /* allocate decompression state */ - if (!(lzx = cli_calloc(1, sizeof(struct lzx_stream)))) { - return NULL; - } - - for (i = 0, j = 0; i < 51; i += 2) { - lzx->extra_bits[i] = j; /* 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7... */ - if(i < 50) - lzx->extra_bits[i+1] = j; - if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */ - } - - for (i = 0, j = 0; i < 51; i++) { - lzx->position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */ - j += 1 << lzx->extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */ - } - - /* allocate decompression window and input buffer */ - lzx->window = cli_calloc(1, (size_t) window_size); - if(!lzx->window) { - free(lzx); - return NULL; - } - - lzx->inbuf = cli_calloc(1, (size_t) input_buffer_size); - if (!lzx->inbuf) { - free(lzx->window); - free(lzx); - return NULL; - } - - /* initialise decompression state */ - lzx->ofd = ofd; - lzx->wflag = 1; - lzx->offset = 0; - lzx->length = output_length; - lzx->file = file; - lzx->read_cb = read_cb; - - lzx->inbuf_size = input_buffer_size; - lzx->window_size = 1 << window_bits; - lzx->window_posn = 0; - lzx->frame_posn = 0; - lzx->frame = 0; - lzx->reset_interval = reset_interval; - lzx->intel_filesize = 0; - lzx->intel_curpos = 0; - - /* window bits: 15 16 17 18 19 20 21 - * position slots: 30 32 34 36 38 42 50 */ - lzx->posn_slots = ((window_bits == 21) ? 50 : - ((window_bits == 20) ? 42 : (window_bits << 1))); - lzx->intel_started = 0; - lzx->input_end = 0; - - lzx->error = CL_SUCCESS; - - lzx->i_ptr = lzx->i_end = &lzx->inbuf[0]; - lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0]; - lzx->bit_buffer = lzx->bits_left = 0; - - lzx_reset_state(lzx); - return lzx; -} - -void lzx_set_output_length(struct lzx_stream *lzx, off_t out_bytes) { - if (lzx) lzx->length = out_bytes; -} - -int lzx_decompress(struct lzx_stream *lzx, uint32_t out_bytes) { - /* bitstream reading and huffman variables */ - register unsigned int bit_buffer; - register int bits_left, i=0; - register unsigned short sym; - unsigned char *i_ptr, *i_end; - - int match_length, length_footer, extra, verbatim_bits, bytes_todo; - int this_run, main_element, aligned_bits, j, ret, warned=0; - unsigned char *window, *runsrc, *rundest, buf[12]; - unsigned int frame_size=0, end_frame, match_offset, window_posn; - unsigned int R0, R1, R2; - - /* easy answers */ - if (!lzx) return CL_ENULLARG; - if (lzx->error) return lzx->error; - - /* flush out any stored-up bytes before we begin */ - i = lzx->o_end - lzx->o_ptr; - if (((off_t) i > out_bytes) && ((int) out_bytes >= 0)) i = (int) out_bytes; - if (i) { - if (lzx->wflag && (ret = mspack_write(lzx->ofd, lzx->o_ptr, i, lzx->file)) != CL_SUCCESS) { - return lzx->error = ret; - } - lzx->o_ptr += i; - lzx->offset += i; - out_bytes -= i; - } - if (out_bytes == 0) return CL_SUCCESS; - - /* restore local state */ - LZX_RESTORE_BITS; - window = lzx->window; - window_posn = lzx->window_posn; - R0 = lzx->R0; - R1 = lzx->R1; - R2 = lzx->R2; - - end_frame = (unsigned int)((lzx->offset + out_bytes) / LZX_FRAME_SIZE) + 1; - cli_dbgmsg("lzx_decompress: end frame = %u\n", end_frame); - - while (lzx->frame < end_frame) { - cli_dbgmsg("lzx_decompress: current frame = %u\n", lzx->frame); - /* have we reached the reset interval? (if there is one?) */ - if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) { - if (lzx->block_remaining) { - /* this is a file format error, but we need to extract what we can and scan that */ - cli_dbgmsg("lzx_decompress: %d bytes remaining at reset interval\n", lzx->block_remaining); - if (!warned) { - cli_dbgmsg("Detected an invalid reset interval during decompression.\n"); - warned++; - } - if (!lzx->header_read) { - /* cannot continue if no header at all */ - return lzx->error = CL_EFORMAT; - } - } else { - /* re-read the intel header and reset the huffman lengths */ - lzx_reset_state(lzx); - } - } - - /* read header if necessary */ - if (!lzx->header_read) { - /* read 1 bit. if bit=0, intel filesize = 0. - * if bit=1, read intel filesize (32 bits) */ - j = 0; LZX_READ_BITS(i, 1); if (i) { LZX_READ_BITS(i, 16); LZX_READ_BITS(j, 16); } - lzx->intel_filesize = (i << 16) | j; - lzx->header_read = 1; - } - - /* calculate size of frame: all frames are 32k except the final frame - * which is 32kb or less. this can only be calculated when lzx->length - * has been filled in. */ - frame_size = LZX_FRAME_SIZE; - if (lzx->length && (lzx->length - lzx->offset) < (off_t)frame_size) { - frame_size = lzx->length - lzx->offset; - } - - /* decode until one more frame is available */ - bytes_todo = lzx->frame_posn + frame_size - window_posn; - while (bytes_todo > 0) { - /* initialise new block, if one is needed */ - if (lzx->block_remaining == 0) { - /* realign if previous block was an odd-sized UNCOMPRESSED block */ - if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) && - (lzx->block_length & 1)) - { - if (i_ptr == i_end) { - if (lzx_read_input(lzx)) return lzx->error; - i_ptr = lzx->i_ptr; - i_end = lzx->i_end; - } - i_ptr++; - } - - /* read block type (3 bits) and block length (24 bits) */ - LZX_READ_BITS(lzx->block_type, 3); - LZX_READ_BITS(i, 16); LZX_READ_BITS(j, 8); - lzx->block_remaining = lzx->block_length = (i << 8) | j; - - /* read individual block headers */ - switch (lzx->block_type) { - case LZX_BLOCKTYPE_ALIGNED: - /* read lengths of and build aligned huffman decoding tree */ - for (i = 0; i < 8; i++) { LZX_READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; } - LZX_BUILD_TABLE(ALIGNED); - /* no break -- rest of aligned header is same as verbatim */ - case LZX_BLOCKTYPE_VERBATIM: - /* read lengths of and build main huffman decoding tree */ - LZX_READ_LENGTHS(MAINTREE, 0, 256); - LZX_READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + (lzx->posn_slots << 3)); - LZX_BUILD_TABLE(MAINTREE); - /* if the literal 0xE8 is anywhere in the block... */ - if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1; - /* read lengths of and build lengths huffman decoding tree */ - LZX_READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS); - LZX_BUILD_TABLE(LENGTH); - break; - - case LZX_BLOCKTYPE_UNCOMPRESSED: - /* because we can't assume otherwise */ - lzx->intel_started = 1; - - /* read 1-16 (not 0-15) bits to align to bytes */ - LZX_ENSURE_BITS(16); - if (bits_left > 16) i_ptr -= 2; - bits_left = 0; bit_buffer = 0; - - /* read 12 bytes of stored R0 / R1 / R2 values */ - for (rundest = &buf[0], i = 0; i < 12; i++) { - if (i_ptr == i_end) { - if (lzx_read_input(lzx)) return lzx->error; - i_ptr = lzx->i_ptr; - i_end = lzx->i_end; - } - *rundest++ = *i_ptr++; - } - R0 = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); - R1 = buf[4] | (buf[5] << 8) | (buf[6] << 16) | (buf[7] << 24); - R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24); - break; - - default: - cli_dbgmsg("lzx_decompress: bad block type (0x%x)\n", lzx->block_type); - return lzx->error = CL_EFORMAT; - } - } - - /* decode more of the block: - * run = min(what's available, what's needed) */ - this_run = lzx->block_remaining; - if (this_run > bytes_todo) this_run = bytes_todo; - - /* assume we decode exactly this_run bytes, for now */ - bytes_todo -= this_run; - lzx->block_remaining -= this_run; - - /* decode at least this_run bytes */ - switch (lzx->block_type) { - case LZX_BLOCKTYPE_VERBATIM: - while (this_run > 0) { - LZX_READ_HUFFSYM(MAINTREE, main_element); - if (main_element < LZX_NUM_CHARS) { - /* literal: 0 to LZX_NUM_CHARS-1 */ - window[window_posn++] = main_element; - this_run--; - } - else { - /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ - main_element -= LZX_NUM_CHARS; - - /* get match length */ - match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; - if (match_length == LZX_NUM_PRIMARY_LENGTHS) { - LZX_READ_HUFFSYM(LENGTH, length_footer); - match_length += length_footer; - } - match_length += LZX_MIN_MATCH; - - /* get match offset */ - switch ((match_offset = (main_element >> 3))) { - case 0: match_offset = R0; break; - case 1: match_offset = R1; R1=R0; R0 = match_offset; break; - case 2: match_offset = R2; R2=R0; R0 = match_offset; break; - case 3: match_offset = 1; R2=R1; R1=R0; R0 = match_offset; break; - default: - extra = lzx->extra_bits[match_offset]; - LZX_READ_BITS(verbatim_bits, extra); - match_offset = lzx->position_base[match_offset] - 2 + verbatim_bits; - R2 = R1; R1 = R0; R0 = match_offset; - } - - if ((window_posn + match_length) > lzx->window_size) { - cli_dbgmsg("lzx_decompress: match ran over window wrap\n"); - return lzx->error = CL_EFORMAT; - } - - /* copy match */ - rundest = &window[window_posn]; - i = match_length; - /* does match offset wrap the window? */ - if (match_offset > window_posn) { - /* j = length from match offset to end of window */ - j = match_offset - window_posn; - if (j > (int) lzx->window_size) { - cli_dbgmsg("lzx_decompress: match offset beyond window boundaries\n"); - return lzx->error = CL_EFORMAT; - } - runsrc = &window[lzx->window_size - j]; - if (j < i) { - /* if match goes over the window edge, do two copy runs */ - i -= j; while (j-- > 0) *rundest++ = *runsrc++; - runsrc = window; - } - while (i-- > 0) *rundest++ = *runsrc++; - } - else { - runsrc = rundest - match_offset; - if(i > (int) (lzx->window_size - window_posn)) - i = lzx->window_size - window_posn; - while (i-- > 0) *rundest++ = *runsrc++; - } - - this_run -= match_length; - window_posn += match_length; - } - } /* while (this_run > 0) */ - break; - - case LZX_BLOCKTYPE_ALIGNED: - while (this_run > 0) { - LZX_READ_HUFFSYM(MAINTREE, main_element); - if (main_element < LZX_NUM_CHARS) { - /* literal: 0 to LZX_NUM_CHARS-1 */ - window[window_posn++] = main_element; - this_run--; - } - else { - /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ - main_element -= LZX_NUM_CHARS; - - /* get match length */ - match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; - if (match_length == LZX_NUM_PRIMARY_LENGTHS) { - LZX_READ_HUFFSYM(LENGTH, length_footer); - match_length += length_footer; - } - match_length += LZX_MIN_MATCH; - - /* get match offset */ - switch ((match_offset = (main_element >> 3))) { - case 0: match_offset = R0; break; - case 1: match_offset = R1; R1 = R0; R0 = match_offset; break; - case 2: match_offset = R2; R2 = R0; R0 = match_offset; break; - default: - extra = lzx->extra_bits[match_offset]; - match_offset = lzx->position_base[match_offset] - 2; - if (extra > 3) { - /* verbatim and aligned bits */ - extra -= 3; - LZX_READ_BITS(verbatim_bits, extra); - match_offset += (verbatim_bits << 3); - LZX_READ_HUFFSYM(ALIGNED, aligned_bits); - match_offset += aligned_bits; - } - else if (extra == 3) { - /* aligned bits only */ - LZX_READ_HUFFSYM(ALIGNED, aligned_bits); - match_offset += aligned_bits; - } - else if (extra > 0) { /* extra==1, extra==2 */ - /* verbatim bits only */ - LZX_READ_BITS(verbatim_bits, extra); - match_offset += verbatim_bits; - } - else /* extra == 0 */ { - /* ??? not defined in LZX specification! */ - match_offset = 1; - } - /* update repeated offset LRU queue */ - R2 = R1; R1 = R0; R0 = match_offset; - } - - if ((window_posn + match_length) > lzx->window_size) { - cli_dbgmsg("lzx_decompress: match ran over window wrap\n"); - return lzx->error = CL_EFORMAT; - } - - /* copy match */ - rundest = &window[window_posn]; - i = match_length; - /* does match offset wrap the window? */ - if (match_offset > window_posn) { - /* j = length from match offset to end of window */ - j = match_offset - window_posn; - if (j > (int) lzx->window_size) { - cli_dbgmsg("lzx_decompress: match offset beyond window boundaries\n"); - return lzx->error = CL_EFORMAT; - } - runsrc = &window[lzx->window_size - j]; - if (j < i) { - /* if match goes over the window edge, do two copy runs */ - i -= j; while (j-- > 0) *rundest++ = *runsrc++; - runsrc = window; - } - while (i-- > 0) *rundest++ = *runsrc++; - } - else { - runsrc = rundest - match_offset; - while (i-- > 0) *rundest++ = *runsrc++; - } - - this_run -= match_length; - window_posn += match_length; - } - } /* while (this_run > 0) */ - break; - - case LZX_BLOCKTYPE_UNCOMPRESSED: - /* as this_run is limited not to wrap a frame, this also means it - * won't wrap the window (as the window is a multiple of 32k) */ - rundest = &window[window_posn]; - window_posn += this_run; - while (this_run > 0) { - if ((i = i_end - i_ptr)) { - if (i > this_run) i = this_run; - memcpy(rundest, i_ptr, (size_t) i); - rundest += i; - i_ptr += i; - this_run -= i; - } - else { - if (lzx_read_input(lzx)) return lzx->error; - i_ptr = lzx->i_ptr; - i_end = lzx->i_end; - } - } - break; - - default: - return lzx->error = CL_EFORMAT; /* might as well */ - } - - /* did the final match overrun our desired this_run length? */ - if (this_run < 0) { - if ((unsigned int)(-this_run) > lzx->block_remaining) { - cli_dbgmsg("lzx_decompress: overrun went past end of block by %d (%d remaining)\n", -this_run, lzx->block_remaining); - return lzx->error = CL_EFORMAT; - } - lzx->block_remaining -= -this_run; - } - } /* while (bytes_todo > 0) */ - - /* streams don't extend over frame boundaries */ - if ((window_posn - lzx->frame_posn) != frame_size) { - cli_dbgmsg("lzx_decompress: decode beyond output frame limits! %d != %d\n", window_posn - lzx->frame_posn, frame_size); - return lzx->error = CL_EFORMAT; - } - - /* re-align input bitstream */ - if (bits_left > 0) LZX_ENSURE_BITS(16); - if (bits_left & 15) LZX_REMOVE_BITS(bits_left & 15); - - /* check that we've used all of the previous frame first */ - if (lzx->o_ptr != lzx->o_end) { - cli_dbgmsg("lzx_decompress: %ld avail bytes, new %d frame\n", lzx->o_end-lzx->o_ptr, frame_size); - return lzx->error = CL_EFORMAT; - } - - /* does this intel block _really_ need decoding? */ - if (lzx->intel_started && lzx->intel_filesize && - (lzx->frame <= 32768) && (frame_size > 10)) - { - unsigned char *data = &lzx->e8_buf[0]; - unsigned char *dataend = &lzx->e8_buf[frame_size - 10]; - signed int curpos = lzx->intel_curpos; - signed int filesize = lzx->intel_filesize; - signed int abs_off, rel_off; - - /* copy e8 block to the e8 buffer and tweak if needed */ - lzx->o_ptr = data; - memcpy(data, &lzx->window[lzx->frame_posn], frame_size); - - while (data < dataend) { - if (*data++ != 0xE8) { curpos++; continue; } - abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24); - if ((abs_off >= -curpos) && (abs_off < filesize)) { - rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize; - data[0] = (unsigned char) rel_off; - data[1] = (unsigned char) (rel_off >> 8); - data[2] = (unsigned char) (rel_off >> 16); - data[3] = (unsigned char) (rel_off >> 24); - } - data += 4; - curpos += 5; - } - lzx->intel_curpos += frame_size; - } - else { - lzx->o_ptr = &lzx->window[lzx->frame_posn]; - if (lzx->intel_filesize) lzx->intel_curpos += frame_size; - } - lzx->o_end = &lzx->o_ptr[frame_size]; - - /* write a frame */ - i = (out_bytes < (off_t)frame_size) ? (unsigned int)out_bytes : frame_size; - if (lzx->wflag && (ret = mspack_write(lzx->ofd, lzx->o_ptr, i, lzx->file)) != CL_SUCCESS) { - return lzx->error = ret; - } - lzx->o_ptr += i; - lzx->offset += i; - out_bytes -= i; - - /* advance frame start position */ - lzx->frame_posn += frame_size; - lzx->frame++; - - /* wrap window / frame position pointers */ - if (window_posn == lzx->window_size) window_posn = 0; - if (lzx->frame_posn == lzx->window_size) lzx->frame_posn = 0; - - } /* while (lzx->frame < end_frame) */ - - if (out_bytes) - cli_dbgmsg("lzx_decompress: bytes left to output\n"); - - /* store local state */ - LZX_STORE_BITS; - lzx->window_posn = window_posn; - lzx->R0 = R0; - lzx->R1 = R1; - lzx->R2 = R2; - - return CL_SUCCESS; -} - -void lzx_free(struct lzx_stream *lzx) { - if (lzx) { - free(lzx->inbuf); - free(lzx->window); - free(lzx); - } -} - -/*************************************************************************** - * Quantum decompression implementation - *************************************************************************** - * The Quantum method was created by David Stafford, adapted by Microsoft - * Corporation. - * - * This decompressor is based on an implementation by Matthew Russotto, used - * with permission. - * - * This decompressor was researched and implemented by Matthew Russotto. It - * has since been tidied up by Stuart Caie. More information can be found at - * http://www.speakeasy.org/~russotto/quantumcomp.html - */ - -/* Quantum decompressor bitstream reading macros - * - * QTM_STORE_BITS stores bitstream state in qtm_stream structure - * QTM_RESTORE_BITS restores bitstream state from qtm_stream structure - * QTM_READ_BITS(var,n) takes N bits from the buffer and puts them in var - * QTM_FILL_BUFFER if there is room for another 16 bits, reads another - * 16 bits from the input stream. - * QTM_PEEK_BITS(n) extracts without removing N bits from the bit buffer - * QTM_REMOVE_BITS(n) removes N bits from the bit buffer - * - * These bit access routines work by using the area beyond the MSB and the - * LSB as a free source of zeroes. This avoids having to mask any bits. - * So we have to know the bit width of the bitbuffer variable. - */ - -#define QTM_BITBUF_WIDTH (sizeof(unsigned int) * CHAR_BIT) - -#define QTM_STORE_BITS do { \ - qtm->i_ptr = i_ptr; \ - qtm->i_end = i_end; \ - qtm->bit_buffer = bit_buffer; \ - qtm->bits_left = bits_left; \ -} while (0) - -#define QTM_RESTORE_BITS do { \ - i_ptr = qtm->i_ptr; \ - i_end = qtm->i_end; \ - bit_buffer = qtm->bit_buffer; \ - bits_left = qtm->bits_left; \ -} while (0) - -/* adds 16 bits to bit buffer, if there's space for the new bits */ -#define QTM_FILL_BUFFER do { \ - if (bits_left <= (QTM_BITBUF_WIDTH - 16)) { \ - if (i_ptr >= i_end) { \ - if (qtm_read_input(qtm)) return qtm->error; \ - i_ptr = qtm->i_ptr; \ - i_end = qtm->i_end; \ - } \ - bit_buffer |= ((i_ptr[0] << 8) | i_ptr[1]) \ - << (QTM_BITBUF_WIDTH - 16 - bits_left); \ - bits_left += 16; \ - i_ptr += 2; \ - } \ -} while (0) - -#define QTM_PEEK_BITS(n) (bit_buffer >> (QTM_BITBUF_WIDTH - (n))) -#define QTM_REMOVE_BITS(n) ((bit_buffer <<= (n)), (bits_left -= (n))) - -#define QTM_READ_BITS(val, bits) do { \ - (val) = 0; \ - for (bits_needed = (bits); bits_needed > 0; bits_needed -= bit_run) { \ - QTM_FILL_BUFFER; \ - bit_run = (bits_left < bits_needed) ? bits_left : bits_needed; \ - (val) = ((val) << bit_run) | QTM_PEEK_BITS(bit_run); \ - QTM_REMOVE_BITS(bit_run); \ - } \ -} while (0) - -static int qtm_read_input(struct qtm_stream *qtm) { - int nread = qtm->read_cb(qtm->file, &qtm->inbuf[0], (int)qtm->inbuf_size); - if (nread < 0) { - if (qtm->file->error == CL_BREAK) - return qtm->error = CL_BREAK; - else - return qtm->error = CL_EFORMAT; - } - - if (nread == 0) { - if (qtm->input_end) { - cli_dbgmsg("qtm_read_input: out of input bytes\n"); - return qtm->error = CL_EREAD; - } - else { - nread = 2; - qtm->inbuf[0] = qtm->inbuf[1] = 0; - qtm->input_end = 1; - } - } - - qtm->i_ptr = &qtm->inbuf[0]; - qtm->i_end = &qtm->inbuf[nread]; - return CL_SUCCESS; -} - -/* Arithmetic decoder: - * - * QTM_GET_SYMBOL(model, var) fetches the next symbol from the stated model - * and puts it in var. - * - * If necessary, qtm_update_model() is called. - */ -#define QTM_GET_SYMBOL(model, var) do { \ - range = ((H - L) & 0xFFFF) + 1; \ - symf = ((((C - L + 1) * model.syms[0].cumfreq)-1) / range) & 0xFFFF; \ - \ - for (i = 1; i < model.entries; i++) { \ - if (model.syms[i].cumfreq <= symf) break; \ - } \ - (var) = model.syms[i-1].sym; \ - \ - range = (H - L) + 1; \ - symf = model.syms[0].cumfreq; \ - H = L + ((model.syms[i-1].cumfreq * range) / symf) - 1; \ - L = L + ((model.syms[i].cumfreq * range) / symf); \ - \ - do { model.syms[--i].cumfreq += 8; } while (i > 0); \ - if (model.syms[0].cumfreq > 3800) qtm_update_model(&model); \ - \ - while (1) { \ - if ((L & 0x8000) != (H & 0x8000)) { \ - if ((L & 0x4000) && !(H & 0x4000)) { \ - /* underflow case */ \ - C ^= 0x4000; L &= 0x3FFF; H |= 0x4000; \ - } \ - else break; \ - } \ - L <<= 1; H = (H << 1) | 1; \ - QTM_FILL_BUFFER; \ - C = (C << 1) | QTM_PEEK_BITS(1); \ - QTM_REMOVE_BITS(1); \ - } \ -} while (0) - -static void qtm_update_model(struct qtm_model *model) { - struct qtm_modelsym tmp; - int i, j; - - if (--model->shiftsleft) { - for (i = model->entries - 1; i >= 0; i--) { - /* -1, not -2; the 0 entry saves this */ - model->syms[i].cumfreq >>= 1; - if (model->syms[i].cumfreq <= model->syms[i+1].cumfreq) { - model->syms[i].cumfreq = model->syms[i+1].cumfreq + 1; - } - } - } - else { - model->shiftsleft = 50; - for (i = 0; i < model->entries; i++) { - /* no -1, want to include the 0 entry */ - /* this converts cumfreqs into frequencies, then shifts right */ - model->syms[i].cumfreq -= model->syms[i+1].cumfreq; - model->syms[i].cumfreq++; /* avoid losing things entirely */ - model->syms[i].cumfreq >>= 1; - } - - /* now sort by frequencies, decreasing order -- this must be an - * inplace selection sort, or a sort with the same (in)stability - * characteristics */ - for (i = 0; i < model->entries - 1; i++) { - for (j = i + 1; j < model->entries; j++) { - if (model->syms[i].cumfreq < model->syms[j].cumfreq) { - tmp = model->syms[i]; - model->syms[i] = model->syms[j]; - model->syms[j] = tmp; - } - } - } - - /* then convert frequencies back to cumfreq */ - for (i = model->entries - 1; i >= 0; i--) { - model->syms[i].cumfreq += model->syms[i+1].cumfreq; - } - } -} - -/* Initialises a model to decode symbols from [start] to [start]+[len]-1 */ -static void qtm_init_model(struct qtm_model *model, - struct qtm_modelsym *syms, int start, int len) -{ - int i; - - model->shiftsleft = 4; - model->entries = len; - model->syms = syms; - - for (i = 0; i <= len; i++) { - syms[i].sym = start + i; /* actual symbol */ - syms[i].cumfreq = len - i; /* current frequency of that symbol */ - } -} - - -/*-------- main Quantum code --------*/ - -struct qtm_stream *qtm_init(int ofd, - int window_bits, int input_buffer_size, - struct cab_file *file, - int (*read_cb)(struct cab_file *, unsigned char *, int)) -{ - unsigned int window_size = 1 << window_bits; - struct qtm_stream *qtm; - unsigned offset; - int i; - - /* Quantum supports window sizes of 2^10 (1Kb) through 2^21 (2Mb) */ - - /* tk: temporary fix: only process 32KB+ window sizes */ - if (window_bits < 15 || window_bits > 21) return NULL; - - input_buffer_size = (input_buffer_size + 1) & -2; - if (input_buffer_size < 2) return NULL; - - /* allocate decompression state */ - if (!(qtm = cli_calloc(1, sizeof(struct qtm_stream)))) { - return NULL; - } - - for (i = 0, offset = 0; i < 42; i++) { - qtm->position_base[i] = offset; - qtm->extra_bits[i] = ((i < 2) ? 0 : (i - 2)) >> 1; - offset += 1 << qtm->extra_bits[i]; - } - - for (i = 0, offset = 0; i < 26; i++) { - qtm->length_base[i] = offset; - qtm->length_extra[i] = (i < 2 ? 0 : i - 2) >> 2; - offset += 1 << qtm->length_extra[i]; - } - qtm->length_base[26] = 254; qtm->length_extra[26] = 0; - - /* allocate decompression window and input buffer */ - qtm->window = cli_malloc((size_t) window_size); - if (!qtm->window) { - cli_errmsg("qtm_init: Unable to allocate decompression window\n"); - free(qtm); - return NULL; - } - - qtm->inbuf = cli_malloc((size_t) input_buffer_size); - if (!qtm->inbuf) { - cli_errmsg("qtm_init: Unable to allocate input buffer\n"); - free(qtm->window); - free(qtm); - return NULL; - } - - /* initialise decompression state */ - qtm->ofd = ofd; - qtm->wflag = 1; - qtm->inbuf_size = input_buffer_size; - qtm->window_size = window_size; - qtm->window_posn = 0; - qtm->frame_start = 0; - qtm->header_read = 0; - qtm->error = CL_SUCCESS; - - qtm->i_ptr = qtm->i_end = &qtm->inbuf[0]; - qtm->o_ptr = qtm->o_end = &qtm->window[0]; - qtm->bits_left = 0; - qtm->bit_buffer = 0; - - /* initialise arithmetic coding models - * - model 4 depends on window size, ranges from 20 to 24 - * - model 5 depends on window size, ranges from 20 to 36 - * - model 6pos depends on window size, ranges from 20 to 42 - */ - i = window_bits * 2; - qtm_init_model(&qtm->model0, &qtm->m0sym[0], 0, 64); - qtm_init_model(&qtm->model1, &qtm->m1sym[0], 64, 64); - qtm_init_model(&qtm->model2, &qtm->m2sym[0], 128, 64); - qtm_init_model(&qtm->model3, &qtm->m3sym[0], 192, 64); - qtm_init_model(&qtm->model4, &qtm->m4sym[0], 0, (i > 24) ? 24 : i); - qtm_init_model(&qtm->model5, &qtm->m5sym[0], 0, (i > 36) ? 36 : i); - qtm_init_model(&qtm->model6, &qtm->m6sym[0], 0, i); - qtm_init_model(&qtm->model6len, &qtm->m6lsym[0], 0, 27); - qtm_init_model(&qtm->model7, &qtm->m7sym[0], 0, 7); - - qtm->file = file; - qtm->read_cb = read_cb; - - /* all ok */ - return qtm; -} - -int qtm_decompress(struct qtm_stream *qtm, uint32_t out_bytes) { - unsigned int frame_start, frame_end, window_posn, match_offset, range; - unsigned char *window, *i_ptr, *i_end, *runsrc, *rundest; - int i, j, selector, extra, sym, match_length, ret; - unsigned short H, L, C, symf; - - register unsigned int bit_buffer; - register unsigned char bits_left; - unsigned char bits_needed, bit_run; - - /* easy answers */ - if (!qtm) return CL_ENULLARG; - if (qtm->error) return qtm->error; - - /* flush out any stored-up bytes before we begin */ - i = qtm->o_end - qtm->o_ptr; - if (((off_t) i > out_bytes) && ((int) out_bytes >= 0)) i = (int) out_bytes; - if (i) { - if (qtm->wflag && (ret = mspack_write(qtm->ofd, qtm->o_ptr, i, qtm->file)) != CL_SUCCESS) { - return qtm->error = ret; - } - qtm->o_ptr += i; - out_bytes -= i; - } - if (out_bytes == 0) return CL_SUCCESS; - - /* restore local state */ - QTM_RESTORE_BITS; - window = qtm->window; - window_posn = qtm->window_posn; - frame_start = qtm->frame_start; - H = qtm->H; - L = qtm->L; - C = qtm->C; - - /* while we do not have enough decoded bytes in reserve: */ - while ((qtm->o_end - qtm->o_ptr) < out_bytes) { - - /* read header if necessary. Initialises H, L and C */ - if (!qtm->header_read) { - H = 0xFFFF; L = 0; QTM_READ_BITS(C, 16); - qtm->header_read = 1; - } - - /* decode more, at most up to to frame boundary */ - frame_end = window_posn + (out_bytes - (qtm->o_end - qtm->o_ptr)); - if ((frame_start + QTM_FRAME_SIZE) < frame_end) { - frame_end = frame_start + QTM_FRAME_SIZE; - } - if (frame_end < window_posn) { - cli_dbgmsg("qtm_decompress: window position beyond end of frame\n"); - return qtm->error = CL_EFORMAT; - } - - while (window_posn < frame_end) { - QTM_GET_SYMBOL(qtm->model7, selector); - if (selector < 4) { - struct qtm_model *mdl = (selector == 0) ? &qtm->model0 : - ((selector == 1) ? &qtm->model1 : - ((selector == 2) ? &qtm->model2 : - &qtm->model3)); - QTM_GET_SYMBOL((*mdl), sym); - window[window_posn++] = sym; - } - else { - switch (selector) { - case 4: /* selector 4 = fixed length match (3 bytes) */ - QTM_GET_SYMBOL(qtm->model4, sym); - QTM_READ_BITS(extra, qtm->extra_bits[sym]); - match_offset = qtm->position_base[sym] + extra + 1; - match_length = 3; - break; - - case 5: /* selector 5 = fixed length match (4 bytes) */ - QTM_GET_SYMBOL(qtm->model5, sym); - QTM_READ_BITS(extra, qtm->extra_bits[sym]); - match_offset = qtm->position_base[sym] + extra + 1; - match_length = 4; - break; - - case 6: /* selector 6 = variable length match */ - QTM_GET_SYMBOL(qtm->model6len, sym); - QTM_READ_BITS(extra, qtm->length_extra[sym]); - match_length = qtm->length_base[sym] + extra + 5; - - QTM_GET_SYMBOL(qtm->model6, sym); - QTM_READ_BITS(extra, qtm->extra_bits[sym]); - match_offset = qtm->position_base[sym] + extra + 1; - break; - - default: - /* should be impossible, model7 can only return 0-6 */ - return qtm->error = CL_EFORMAT; - } - - if (window_posn + match_length > qtm->window_size) { - cli_dbgmsg("qtm_decompress: match ran over window wrap\n"); - return qtm->error = CL_EFORMAT; - } - - rundest = &window[window_posn]; - i = match_length; - /* does match offset wrap the window? */ - if (match_offset > window_posn) { - /* j = length from match offset to end of window */ - j = match_offset - window_posn; - if (j > (int) qtm->window_size) { - cli_dbgmsg("qtm_decompress: match offset beyond window boundaries\n"); - return qtm->error = CL_EFORMAT; - } - runsrc = &window[qtm->window_size - j]; - if (j < i) { - /* if match goes over the window edge, do two copy runs */ - i -= j; while (j-- > 0) *rundest++ = *runsrc++; - runsrc = window; - } - while (i-- > 0) *rundest++ = *runsrc++; - } - else { - runsrc = rundest - match_offset; - if(i > (int) (qtm->window_size - window_posn)) - i = qtm->window_size - window_posn; - while (i-- > 0) *rundest++ = *runsrc++; - } - window_posn += match_length; - } - } /* while (window_posn < frame_end) */ - - qtm->o_end = &window[window_posn]; - - /* another frame completed? */ - if ((window_posn - frame_start) >= QTM_FRAME_SIZE) { - if ((window_posn - frame_start) != QTM_FRAME_SIZE) { - cli_dbgmsg("qtm_decompress: overshot frame alignment\n"); - return qtm->error = CL_EFORMAT; - } - - /* re-align input */ - if (bits_left & 7) QTM_REMOVE_BITS(bits_left & 7); - do { QTM_READ_BITS(i, 8); } while (i != 0xFF); - qtm->header_read = 0; - - /* window wrap? */ - if (window_posn == qtm->window_size) { - /* flush all currently stored data */ - i = (qtm->o_end - qtm->o_ptr); - if(i <= 0) - break; - if (qtm->wflag && (ret = mspack_write(qtm->ofd, qtm->o_ptr, i, qtm->file)) != CL_SUCCESS) { - return qtm->error = ret; - } - out_bytes -= i; - qtm->o_ptr = &window[0]; - qtm->o_end = &window[0]; - window_posn = 0; - } - - frame_start = window_posn; - } - - } /* while (more bytes needed) */ - - if (out_bytes > 0) { - i = (int) out_bytes; - if (qtm->wflag && (ret = mspack_write(qtm->ofd, qtm->o_ptr, i, qtm->file)) != CL_SUCCESS) { - return qtm->error = ret; - } - qtm->o_ptr += i; - } - - /* store local state */ - QTM_STORE_BITS; - qtm->window_posn = window_posn; - qtm->frame_start = frame_start; - qtm->H = H; - qtm->L = L; - qtm->C = C; - - return CL_SUCCESS; -} - -void qtm_free(struct qtm_stream *qtm) { - if (qtm) { - free(qtm->window); - free(qtm->inbuf); - free(qtm); - } -} diff --git a/libclamav/mspack.h b/libclamav/mspack.h deleted file mode 100644 index 0ed472caeea3..000000000000 --- a/libclamav/mspack.h +++ /dev/null @@ -1,294 +0,0 @@ -/* - * This file includes code from libmspack adapted for libclamav by - * tkojm@clamav.net - * - * Copyright (C) 2003-2004 Stuart Caie - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1 as published by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 - * USA - */ - -#ifndef __MSPACK_H -#define __MSPACK_H - -#include -#include "cab.h" - - -/*************************************************************************** - * MS-ZIP decompression definitions * - ***************************************************************************/ - -#define MSZIP_FRAME_SIZE (32768) /* size of LZ history window */ -#define MSZIP_MAX_HUFFBITS (16) /* maximum huffman code length */ -#define MSZIP_LITERAL_MAXSYMBOLS (288) /* literal/length huffman tree */ -#define MSZIP_LITERAL_TABLEBITS (9) -#define MSZIP_DISTANCE_MAXSYMBOLS (32) /* distance huffman tree */ -#define MSZIP_DISTANCE_TABLEBITS (6) - -/* if there are less direct lookup entries than symbols, the longer - * code pointers will be <= maxsymbols. This must not happen, or we - * will decode entries badly */ -#if (1 << MSZIP_LITERAL_TABLEBITS) < (MSZIP_LITERAL_MAXSYMBOLS * 2) -# define MSZIP_LITERAL_TABLESIZE (MSZIP_LITERAL_MAXSYMBOLS * 4) -#else -# define MSZIP_LITERAL_TABLESIZE ((1 << MSZIP_LITERAL_TABLEBITS) + \ - (MSZIP_LITERAL_MAXSYMBOLS * 2)) -#endif - -#if (1 << MSZIP_DISTANCE_TABLEBITS) < (MSZIP_DISTANCE_MAXSYMBOLS * 2) -# define MSZIP_DISTANCE_TABLESIZE (MSZIP_DISTANCE_MAXSYMBOLS * 4) -#else -# define MSZIP_DISTANCE_TABLESIZE ((1 << MSZIP_DISTANCE_TABLEBITS) + \ - (MSZIP_DISTANCE_MAXSYMBOLS * 2)) -#endif - -struct mszip_stream { - int ofd; /* output file descriptor */ - - /* inflate() will call this whenever the window should be emptied. */ - int (*flush_window)(struct mszip_stream *, unsigned int); - - int error, repair_mode, bytes_output, input_end; - - /* I/O buffering */ - unsigned char *inbuf, *i_ptr, *i_end, *o_ptr, *o_end; - unsigned int bit_buffer, bits_left, inbuf_size; - - unsigned int window_posn; /* offset within window */ - - /* huffman code lengths */ - unsigned char LITERAL_len[MSZIP_LITERAL_MAXSYMBOLS]; - unsigned char DISTANCE_len[MSZIP_DISTANCE_MAXSYMBOLS]; - - /* huffman decoding tables */ - unsigned short LITERAL_table [MSZIP_LITERAL_TABLESIZE]; - unsigned short DISTANCE_table[MSZIP_DISTANCE_TABLESIZE]; - - /* 32kb history window */ - unsigned char window[MSZIP_FRAME_SIZE]; - - /* cabinet related stuff */ - struct cab_file *file; - int (*read_cb)(struct cab_file *, unsigned char *, int); - - unsigned char wflag; /* write flag */ - unsigned int last; /* prior end of content buffer */ - -}; - -struct mszip_stream *mszip_init(int ofd, - int input_buffer_size, - int repair_mode, - struct cab_file *file, - int (*read_cb)(struct cab_file *, unsigned char *, int)); - -extern int mszip_decompress(struct mszip_stream *zip, uint32_t out_bytes); - -void mszip_free(struct mszip_stream *zip); - - -/*************************************************************************** - * Quantum decompression definitions * - ***************************************************************************/ - -/* Quantum compression / decompression definitions */ - -#define QTM_FRAME_SIZE (32768) - -struct qtm_modelsym { - unsigned short sym, cumfreq; -}; - -struct qtm_model { - int shiftsleft, entries; - struct qtm_modelsym *syms; -}; - -struct qtm_stream { - int ofd; /* output file descriptor */ - - unsigned char *window; /* decoding window */ - unsigned int window_size; /* window size */ - unsigned int window_posn; /* decompression offset within window */ - unsigned int frame_start; /* start of current frame within window */ - - unsigned short H, L, C; /* high/low/current: arith coding state */ - unsigned char header_read; /* have we started decoding a new frame? */ - unsigned char wflag; /* write flag */ - - int error, input_end; - - /* data tables */ - unsigned int position_base[42]; - unsigned char extra_bits[42], length_base[27], length_extra[27]; - - /* four literal models, each representing 64 symbols - * model0 for literals from 0 to 63 (selector = 0) - * model1 for literals from 64 to 127 (selector = 1) - * model2 for literals from 128 to 191 (selector = 2) - * model3 for literals from 129 to 255 (selector = 3) */ - struct qtm_model model0, model1, model2, model3; - - /* three match models. - * model4 for match with fixed length of 3 bytes - * model5 for match with fixed length of 4 bytes - * model6 for variable length match, encoded with model6len model */ - struct qtm_model model4, model5, model6, model6len; - - /* selector model. 0-6 to say literal (0,1,2,3) or match (4,5,6) */ - struct qtm_model model7; - - /* symbol arrays for all models */ - struct qtm_modelsym m0sym[64 + 1]; - struct qtm_modelsym m1sym[64 + 1]; - struct qtm_modelsym m2sym[64 + 1]; - struct qtm_modelsym m3sym[64 + 1]; - struct qtm_modelsym m4sym[24 + 1]; - struct qtm_modelsym m5sym[36 + 1]; - struct qtm_modelsym m6sym[42 + 1], m6lsym[27 + 1]; - struct qtm_modelsym m7sym[7 + 1]; - - /* I/O buffers - 1*/ - unsigned int bit_buffer; - - /* cabinet related stuff */ - struct cab_file *file; - int (*read_cb)(struct cab_file *, unsigned char *, int); - - /* I/O buffers - 2*/ - unsigned char *inbuf, *i_ptr, *i_end, *o_ptr, *o_end; - unsigned int inbuf_size; - unsigned char bits_left; - -}; - -extern struct qtm_stream *qtm_init(int ofd, - int window_bits, - int input_buffer_size, - struct cab_file *file, - int (*read_cb)(struct cab_file *, unsigned char *, int)); - -extern int qtm_decompress(struct qtm_stream *qtm, uint32_t out_bytes); - -void qtm_free(struct qtm_stream *qtm); - -/*************************************************************************** - * LZX decompression definitions * - ***************************************************************************/ - -/* some constants defined by the LZX specification */ -#define LZX_MIN_MATCH (2) -#define LZX_MAX_MATCH (257) -#define LZX_NUM_CHARS (256) -#define LZX_BLOCKTYPE_INVALID (0) /* also blocktypes 4-7 invalid */ -#define LZX_BLOCKTYPE_VERBATIM (1) -#define LZX_BLOCKTYPE_ALIGNED (2) -#define LZX_BLOCKTYPE_UNCOMPRESSED (3) -#define LZX_PRETREE_NUM_ELEMENTS (20) -#define LZX_ALIGNED_NUM_ELEMENTS (8) /* aligned offset tree #elements */ -#define LZX_NUM_PRIMARY_LENGTHS (7) /* this one missing from spec! */ -#define LZX_NUM_SECONDARY_LENGTHS (249) /* length tree #elements */ - -/* LZX huffman defines: tweak tablebits as desired */ -#define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS) -#define LZX_PRETREE_TABLEBITS (6) -#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8) -#define LZX_MAINTREE_TABLEBITS (12) -#define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1) -#define LZX_LENGTH_TABLEBITS (12) -#define LZX_ALIGNED_MAXSYMBOLS (LZX_ALIGNED_NUM_ELEMENTS) -#define LZX_ALIGNED_TABLEBITS (7) -#define LZX_LENTABLE_SAFETY (64) /* table decoding overruns are allowed */ - -#define LZX_FRAME_SIZE (32768) /* the size of a frame in LZX */ - -struct lzx_stream { - int ofd; /* output file descriptor */ - - off_t offset; /* number of bytes actually output */ - off_t length; /* overall decompressed length of stream */ - - unsigned char *window; /* decoding window */ - unsigned int window_size; /* window size */ - unsigned int window_posn; /* decompression offset within window */ - unsigned int frame_posn; /* current frame offset within in window */ - unsigned int frame; /* the number of 32kb frames processed */ - unsigned int reset_interval; /* which frame do we reset the compressor? */ - - unsigned int R0, R1, R2; /* for the LRU offset system */ - unsigned int block_length; /* uncompressed length of this LZX block */ - unsigned int block_remaining; /* uncompressed bytes still left to decode */ - - signed int intel_filesize; /* magic header value used for transform */ - signed int intel_curpos; /* current offset in transform space */ - - unsigned char intel_started; /* has intel E8 decoding started? */ - unsigned char block_type; /* type of the current block */ - unsigned char header_read; /* have we started decoding at all yet? */ - unsigned char posn_slots; /* how many posn slots in stream? */ - - int error; - - /* I/O buffering */ - unsigned char *inbuf, *i_ptr, *i_end, *o_ptr, *o_end; - unsigned int bit_buffer, bits_left, inbuf_size; - - /* huffman code lengths */ - unsigned char PRETREE_len [LZX_PRETREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY]; - unsigned char MAINTREE_len [LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY]; - unsigned char LENGTH_len [LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY]; - unsigned char ALIGNED_len [LZX_ALIGNED_MAXSYMBOLS + LZX_LENTABLE_SAFETY]; - - /* huffman decoding tables */ - unsigned short PRETREE_table [(1 << LZX_PRETREE_TABLEBITS) + - (LZX_PRETREE_MAXSYMBOLS * 2)]; - unsigned short MAINTREE_table[(1 << LZX_MAINTREE_TABLEBITS) + - (LZX_MAINTREE_MAXSYMBOLS * 2)]; - unsigned short LENGTH_table [(1 << LZX_LENGTH_TABLEBITS) + - (LZX_LENGTH_MAXSYMBOLS * 2)]; - unsigned short ALIGNED_table [(1 << LZX_ALIGNED_TABLEBITS) + - (LZX_ALIGNED_MAXSYMBOLS * 2)]; - unsigned char input_end; /* have we reached the end of input? */ - unsigned char wflag; /* write flag */ - - /* this is used purely for doing the intel E8 transform */ - unsigned char e8_buf[LZX_FRAME_SIZE]; - - unsigned int position_base[51]; - - /* cabinet related stuff */ - struct cab_file *file; - int (*read_cb)(struct cab_file *, unsigned char *, int); - - unsigned char extra_bits[51]; - -}; - -struct lzx_stream *lzx_init(int ofd, - int window_bits, - int reset_interval, - int input_buffer_size, - off_t output_length, - struct cab_file *file, - int (*read_cb)(struct cab_file *, unsigned char *, int)); - -extern void lzx_set_output_length(struct lzx_stream *lzx, - off_t output_length); - -extern int lzx_decompress(struct lzx_stream *lzx, uint32_t out_bytes); - -void lzx_free(struct lzx_stream *lzx); - -#endif diff --git a/libclamav/scanners.c b/libclamav/scanners.c index fa9063d08018..07a38eedc7a5 100644 --- a/libclamav/scanners.c +++ b/libclamav/scanners.c @@ -60,7 +60,7 @@ #include "vba_extract.h" #include "msexpand.h" #include "mbox.h" -#include "chmunpack.h" +#include "libmspack.h" #include "pe.h" #include "elf.h" #include "filetypes.h" @@ -73,8 +73,6 @@ #include "sis.h" #include "pdf.h" #include "str.h" -#include "mspack.h" -#include "cab.h" #include "rtf.h" #include "unarj.h" #include "nsis/nulsft.h" @@ -853,82 +851,6 @@ static int cli_scanszdd(cli_ctx *ctx) return ret; } -static int cli_scanmscab(cli_ctx *ctx, off_t sfx_offset) -{ - char *tempname; - int ret; - unsigned int files = 0; - struct cab_archive cab; - struct cab_file *file; - unsigned int corrupted_input; - unsigned int viruses_found = 0; - - cli_dbgmsg("in cli_scanmscab()\n"); - - if((ret = cab_open(*ctx->fmap, sfx_offset, &cab))) - return ret; - - for(file = cab.files; file; file = file->next) { - files++; - - if(cli_matchmeta(ctx, file->name, 0, file->length, 0, files, 0, NULL) == CL_VIRUS) { - if (!SCAN_ALL) { - ret = CL_VIRUS; - break; - } - viruses_found++; - } - - if(ctx->engine->maxscansize && ctx->scansize >= ctx->engine->maxscansize) { - ret = CL_CLEAN; - break; - } - - if(!(tempname = cli_gentemp(ctx->engine->tmpdir))) { - ret = CL_EMEM; - break; - } - - if(ctx->engine->maxscansize && ctx->scansize + ctx->engine->maxfilesize >= ctx->engine->maxscansize) - file->max_size = ctx->engine->maxscansize - ctx->scansize; - else - file->max_size = ctx->engine->maxfilesize ? ctx->engine->maxfilesize : 0xffffffff; - - cli_dbgmsg("CAB: Extracting file %s to %s, size %u, max_size: %u\n", file->name, tempname, file->length, (unsigned int) file->max_size); - file->written_size = 0; - if((ret = cab_extract(file, tempname))) { - cli_dbgmsg("CAB: Failed to extract file: %s\n", cl_strerror(ret)); - } else { - corrupted_input = ctx->corrupted_input; - if(file->length != file->written_size) { - cli_dbgmsg("CAB: Length from header %u but wrote %u bytes\n", (unsigned int) file->length, (unsigned int) file->written_size); - ctx->corrupted_input = 1; - } - ret = cli_scanfile(tempname, ctx); - ctx->corrupted_input = corrupted_input; - } - if(!ctx->engine->keeptmp) { - if (!access(tempname, R_OK) && cli_unlink(tempname)) { - free(tempname); - ret = CL_EUNLINK; - break; - } - } - free(tempname); - if(ret == CL_VIRUS) { - if (SCAN_ALL) - viruses_found++; - else - break; - } - } - - cab_free(&cab); - if (viruses_found) - return CL_VIRUS; - return ret; -} - static int vba_scandata(const unsigned char *data, unsigned int len, cli_ctx *ctx) { struct cli_matcher *groot = ctx->engine->root[0]; @@ -1568,72 +1490,6 @@ static int cli_scantar(cli_ctx *ctx, unsigned int posix) return ret; } -static int cli_scanmschm(cli_ctx *ctx) -{ - int ret = CL_CLEAN, rc; - chm_metadata_t metadata; - char *dir; - unsigned int viruses_found = 0; - - cli_dbgmsg("in cli_scanmschm()\n"); - - /* generate the temporary directory */ - if(!(dir = cli_gentemp(ctx->engine->tmpdir))) - return CL_EMEM; - - if(mkdir(dir, 0700)) { - cli_dbgmsg("CHM: Can't create temporary directory %s\n", dir); - free(dir); - return CL_ETMPDIR; - } - - ret = cli_chm_open(dir, &metadata, ctx); - if (ret != CL_SUCCESS) { - if(!ctx->engine->keeptmp) - cli_rmdirs(dir); - free(dir); - cli_dbgmsg("CHM: Error: %s\n", cl_strerror(ret)); - return ret; - } - - do { - ret = cli_chm_prepare_file(&metadata); - if (ret != CL_SUCCESS) { - break; - } - ret = cli_chm_extract_file(dir, &metadata, ctx); - if (ret == CL_SUCCESS) { - rc = cli_magic_scandesc(metadata.ofd, ctx); - close(metadata.ofd); - if (rc == CL_VIRUS) { - cli_dbgmsg("CHM: infected with %s\n", cli_get_last_virus(ctx)); - if (SCAN_ALL) - viruses_found++; - else { - ret = CL_VIRUS; - break; - } - } - } - - } while(ret == CL_SUCCESS); - - cli_chm_close(&metadata); - - if(!ctx->engine->keeptmp) - cli_rmdirs(dir); - - free(dir); - - cli_dbgmsg("CHM: Exit code: %d\n", ret); - if (ret == CL_BREAK) - ret = CL_CLEAN; - - if (SCAN_ALL && viruses_found) - return CL_VIRUS; - return ret; -} - static int cli_scanscrenc(cli_ctx *ctx) { char *tempname;