使用libzip讀取修改zip檔案comment的方法(支援新增擴充套件字元)
阿新 • • 發佈:2019-02-04
2014-10-01 wcdj
摘要:本文使用libzip讀取和修改zip檔案comment的方法,並支援新增擴充套件字元。前提是需要修改libzip的原始碼支援可以新增擴充套件字元, 否則_zip_guess_encoding會判斷出錯(修改程式碼 zip_set_archive_comment.c:65)。
0 方法
(1) 首先要安裝zlib和libzip編譯環境:
初始化編譯libzip
./configure --prefix="/Users/gerryyang/LAMP/libzip/install/libzip-0.11.2" --with-zlib="/Users/gerryyang/LAMP/zlib/install/zlib-1.2.8"
(2) 修改libzip的原始碼:
去除對comment編碼格式的判斷,zip_set_archive_comment.c:65
ZIP_EXTERN int zip_set_archive_comment(struct zip *za, const char *comment, zip_uint16_t len) { struct zip_string *cstr; if (ZIP_IS_RDONLY(za)) { _zip_error_set(&za->error, ZIP_ER_RDONLY, 0); return -1; } if (len > 0 && comment == NULL) { _zip_error_set(&za->error, ZIP_ER_INVAL, 0); return -1; } if (len > 0) { if ((cstr=_zip_string_new((const zip_uint8_t *)comment, len, ZIP_FL_ENC_GUESS, &za->error)) == NULL) { printf("_zip_string_new err\n"); return -1; } #if 0 if (_zip_guess_encoding(cstr, ZIP_ENCODING_UNKNOWN) == ZIP_ENCODING_CP437) { printf("_zip_guess_encoding err\n"); _zip_string_free(cstr); _zip_error_set(&za->error, ZIP_ER_INVAL, 0); return -1; } #endif } else cstr = NULL; _zip_string_free(za->comment_changes); za->comment_changes = NULL; if (((za->comment_orig && _zip_string_equal(za->comment_orig, cstr)) || (za->comment_orig == NULL && cstr == NULL))) { _zip_string_free(cstr); za->comment_changed = 0; } else { za->comment_changes = cstr; za->comment_changed = 1; } return 0; }
zip_utf-8.c:119
enum zip_encoding_type _zip_guess_encoding(struct zip_string *str, enum zip_encoding_type expected_encoding) { enum zip_encoding_type enc; const zip_uint8_t *name; zip_uint32_t i, j, ulen; if (str == NULL) return ZIP_ENCODING_ASCII; name = str->raw; if (str->encoding != ZIP_ENCODING_UNKNOWN) enc = str->encoding; else { enc = ZIP_ENCODING_ASCII; for (i=0; i<str->length; i++) { if ((name[i] > 31 && name[i] < 128) || name[i] == '\r' || name[i] == '\n' || name[i] == '\t') continue; enc = ZIP_ENCODING_UTF8_GUESSED; if ((name[i] & UTF_8_LEN_2_MASK) == UTF_8_LEN_2_MATCH) ulen = 1; else if ((name[i] & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_MATCH) ulen = 2; else if ((name[i] & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_MATCH) ulen = 3; else { enc = ZIP_ENCODING_CP437; break; } if (i + ulen >= str->length) { enc = ZIP_ENCODING_CP437; break; } for (j=1; j<=ulen; j++) { if ((name[i+j] & UTF_8_CONTINUE_MASK) != UTF_8_CONTINUE_MATCH) { enc = ZIP_ENCODING_CP437; goto done; } } i += ulen; } } done: str->encoding = enc; if (expected_encoding != ZIP_ENCODING_UNKNOWN) { if (expected_encoding == ZIP_ENCODING_UTF8_KNOWN && enc == ZIP_ENCODING_UTF8_GUESSED) str->encoding = enc = ZIP_ENCODING_UTF8_KNOWN; if (expected_encoding != enc && enc != ZIP_ENCODING_ASCII) return ZIP_ENCODING_ERROR; } return enc; }
1 測試程式碼
參考程式碼:
https://github.com/gerryyang/mac-utils/tree/master/tools/libzip/src
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <errno.h>
#include "zip.h"
using namespace std;
int encode_hex_string(const unsigned char *src, int len, unsigned char *dst)
{
unsigned char szHexTable[] = "0123456789ABCDEF";
for (int i = 0; i < len; ++i)
{
*dst = szHexTable[(src[i] >> 4) & 0x0f];
++dst;
*dst = szHexTable[src[i] & 0x0f];
++dst;
}
*dst = '\0';
return 0;
}
int main(int argc, char* argv[])
{
if (argc < 3)
{
printf("usage: %s zipfile zipcomment\n", argv[0]);
exit(1);
}
string zipfile = argv[1];
string zipcomment = argv[2];
int error;
struct zip * zipfd = zip_open(zipfile.c_str(), ZIP_CHECKCONS, &error);
if (zipfd == NULL)
{
switch (error)
{
case ZIP_ER_NOENT:
printf("The file specified by path does not exist and ZIP_CREATE is not set [%d]\n", error);
break;
case ZIP_ER_EXISTS:
printf("The file specified by path exists and ZIP_EXCL is set [%d]\n", error);
break;
case ZIP_ER_INVAL:
printf("The path argument is NULL [%d]\n", error);
break;
case ZIP_ER_NOZIP:
printf("The file specified by path is not a zip archive [%d]\n", error);
break;
case ZIP_ER_OPEN:
printf("The file specified by path could not be opened [%d]\n", error);
break;
case ZIP_ER_READ:
printf("A read error occurred; see errno for details [%d]\n", error);
break;
case ZIP_ER_SEEK:
printf("The file specified by path does not allow seeks [%d]\n", error);
break;
default:
printf("unknown err [%d]\n", error);
break;
}
exit(1);
}
// get the comment for the entire zip archive
int commentlen = 0;
const char * comment = zip_get_archive_comment(zipfd, &commentlen, ZIP_FL_ENC_RAW);
if (comment == NULL)
{
printf("zip_get_archive_comment get null or err[%d:%s]\n", errno, strerror(errno));
}
else
{
printf("zip_get_archive_comment[%d:%s]\n", commentlen, comment);
char copy[1024] = {0};
memcpy(copy, comment, commentlen);
unsigned char hex[1024] = {0};
encode_hex_string((unsigned char *)copy, commentlen, hex);
printf("zip_get_archive_comment hex[%d:%s]\n", commentlen, hex);
}
// Midas Header
// idx:0 bytes:2 0X96FA
// idx:2 bytes:2 comment len = strlen(channelId) + 0D0A
// idx:4 bytes:N channelId=xxx
// idx:4+N bytes:2 end:0X0D0A
char dstcomment[1024] = {0};
zip_uint16_t dstlen = 0;
memset(dstcomment + dstlen, 0XFA, 1);
dstlen += 1;
memset(dstcomment + dstlen, 0X96, 1);
dstlen += 1;
memset(dstcomment + dstlen, (zipcomment.length() + 2) % 0XFF, 1);// 0D0A
dstlen += 1;
memset(dstcomment + dstlen, (zipcomment.length() + 2) / 0XFF, 1);
dstlen += 1;
memcpy(dstcomment + dstlen, zipcomment.data(), zipcomment.length());
dstlen += zipcomment.length();
memset(dstcomment + dstlen, 0X0D, 1);
dstlen += 1;
memset(dstcomment + dstlen, 0X0A, 1);
dstlen += 1;
unsigned char hex[1024] = {0};
encode_hex_string((unsigned char *)dstcomment, dstlen, hex);
printf("zip_set_archive_comment hex[%d:%s]\n", dstlen, hex);
// sets the comment for the entire zip archive
// If comment is NULL and len is 0, the archive comment will be removed
// comment must be encoded in ASCII or UTF-8
int iret = zip_set_archive_comment(zipfd, dstcomment, dstlen);// err !!!
if (iret != 0)
{
printf("zip_set_archive_comment err[%d:%s]\n", iret, strerror(errno));
switch (iret)
{
case ZIP_ER_INVAL:
printf("zip_set_archive_comment: len is less than 0 or longer than the maximum comment length in a zip file (65535), or comment is not a valid UTF-8 encoded string\n");
break;
case ZIP_ER_MEMORY:
printf("zip_set_archive_comment: Required memory could not be allocated\n");
break;
default:
printf("zip_set_archive_comment: unknown err\n");
break;
}
}
// close, If any files within were changed, those changes are written to disk first
iret = zip_close(zipfd);
if (iret != 0)
{
printf("zip_close err[%d:%s]\n", errno, strerror(errno));
}
return 0;
}
2 總結
通過使用libzip可以方便的對zip的comment內容進行修改,但是限制必須使用可見的字符集,通過對libzip原始碼的簡單修改,可以做到新增擴充套件的字符集。除了通過程式碼的方式,也可以直通使用命令列工具zip和unzip (-z add zipfile comment) 修改和讀取zip的comment內容。