C++實現tar包解析
阿新 • • 發佈:2018-11-22
原文地址:https://blog.csdn.net/fengbingchun/article/details/54773586?utm_source=copy
tar(tape archive)是Unix和類Unix系統上檔案打包工具,可以將多個檔案合併為一個檔案,使用tar工具打出來的包稱為tar包。一般打包後的檔名字尾為”.tar”,也可以為其它。tar代表未被壓縮的tar檔案,已被壓縮的tar檔案則追加壓縮檔案的副檔名,如經過gzip壓縮後的tar檔案,副檔名為”.tar.gz”。在windows系統中用WinRAR也可以解壓縮開啟tar包。tar檔案格式已經成為POSIX標準,最初是POSIX.1-1998,目前是POSIX.1-2001.
tar中的資料都是以512位元組為單位。tar由兩部分組成即頭部+內容,其中頭部是512位元組的頭部結構,內容是存放一個檔案內容的地方。
tar檔案格式的詳細介紹可以參考:https://en.wikipedia.org/wiki/Tar_(computing)#File_header
通過執行以下命令生成測試tar包:
tar -cvf test.tar *
test.tar中包含兩個檔案blog_info.txt和github_info.txt.
其中blog_info.txt檔案內容如下:
name: fengbingchun address: http://blog.csdn.net/fengbingchun?viewmode=contents
github_info.txt檔案內容如下:
name: fengbingchun
address: https://github.com/fengbingchun
實現程式碼tar.hpp:
#ifndef FBC_MESSY_TEST_TAR_HPP_ #define FBC_MESSY_TEST_TAR_HPP_ #include <vector> #include <string> /* reference: http://www.gnu.org/software/tar/manual/html_node/Standard.html http://stackoverflow.com/questions/2505042/how-to-parse-a-tar-file-in-c http://directory.fsf.org/wiki/Libtar http://work.freenet59.ru/svn/pkgsrc_haiku/trunk/archivers/libarchive/files/contrib/untar.c https://codeistry.wordpress.com/2014/08/14/how-to-parse-a-tar-file/ http://stackoverflow.com/questions/17862383/how-to-know-the-files-inside-the-tar-parser https://en.wikipedia.org/wiki/Tar_(computing) */ /* tar Header Block, from POSIX 1003.1-1990. */ /* POSIX header. */ typedef struct posix_header { /* byte offset */ char name[100]; /* 0 */ char mode[8]; /* 100 */ char uid[8]; /* 108 */ char gid[8]; /* 116 */ char size[12]; /* 124 */ char mtime[12]; /* 136 */ char chksum[8]; /* 148 */ char typeflag; /* 156 */ char linkname[100]; /* 157 */ char magic[6]; /* 257 */ char version[2]; /* 263 */ char uname[32]; /* 265 */ char gname[32]; /* 297 */ char devmajor[8]; /* 329 */ char devminor[8]; /* 337 */ char prefix[155]; /* 345 */ /* 500 */ } tar_posix_header; /* location size field 0 100 File name 100 8 File mode 108 8 Owner's numeric user ID 116 8 Group's numeric user ID 124 12 File size in bytes 136 12 Last modification time in numeric Unix time format 148 8 Checksum for header block 156 1 Link indicator (file type) 157 100 Name of linked file */ #define TMAGIC "ustar" /* ustar and a null */ #define TMAGLEN 6 #define TVERSION "00" /* 00 and no null */ #define TVERSLEN 2 /* Values used in typeflag field. */ #define REGTYPE '0' /* regular file */ #define AREGTYPE '\0' /* regular file */ #define LNKTYPE '1' /* link */ #define SYMTYPE '2' /* reserved */ #define CHRTYPE '3' /* character special */ #define BLKTYPE '4' /* block special */ #define DIRTYPE '5' /* directory */ #define FIFOTYPE '6' /* FIFO special */ #define CONTTYPE '7' /* reserved */ class TarFile { public: TarFile(const char* tar_name); bool IsValidTarFile(); std::vector<std::string> GetFileNames(); bool GetFileContents(const char* file_name, char* contents); size_t GetFileSize(const char* file_name); size_t GetTarSize(); ~TarFile(); private: FILE* file; size_t size; std::vector<std::string> file_names; std::vector<size_t> file_sizes; std::vector<size_t> file_data_start_addrs; }; int test_tar(); #endif // FBC_MESSY_TEST_TAR_HPP_
tar.cpp:
#include "tar.hpp"
TarFile::TarFile(const char* tar_name)
: file(nullptr), size(0)
{
file_names.clear();
file_sizes.clear();
file_data_start_addrs.clear();
file = fopen(tar_name, "rb");
}
TarFile::~TarFile()
{
if (file) {
fclose(file);
file = nullptr;
}
file_names.clear();
file_sizes.clear();
file_data_start_addrs.clear();
}
bool TarFile::IsValidTarFile()
{
if (!file) return false;
const int block_size{ 512 };
unsigned char buf[block_size];
tar_posix_header* header = (tar_posix_header*)buf;
memset(buf, 0, block_size);
fseek(file, 0, SEEK_END);
size = ftell(file);
fseek(file, 0, SEEK_SET);
if (size % block_size != 0) {
fprintf(stderr, "tar file size should be a multiple of 512 bytes: %d\n", size);
return false;
}
size_t pos{ 0 };
while (1) {
size_t read_size = fread(buf, block_size, 1, file);
if (read_size != 1) break;
if (strncmp(header->magic, TMAGIC, 5)) break;
pos += block_size;
size_t file_size{0};
sscanf(header->size, "%lo", &file_size);
size_t file_block_count = (file_size + block_size - 1) / block_size;
switch (header->typeflag) {
case '0': // intentionally dropping through
case '\0':
// normal file
file_sizes.push_back(file_size);
file_names.push_back(std::string(header->name));
file_data_start_addrs.push_back(pos);
break;
case '1':
// hard link
break;
case '2':
// symbolic link
break;
case '3':
// device file/special file
break;
case '4':
// block device
break;
case '5':
// directory
break;
case '6':
// named pipe
break;
default:
break;
}
pos += file_block_count * block_size;
fseek(file, pos, SEEK_SET);
}
fseek(file, 0, SEEK_SET);
return true;
}
std::vector<std::string> TarFile::GetFileNames()
{
return file_names;
}
bool TarFile::GetFileContents(const char* file_name, char* contents)
{
bool flag = false;
for (int i = 0; i < file_names.size(); i++) {
std::string name_(file_name);
if (file_names[i].compare(name_) == 0) {
int file_size = file_sizes[i];
flag = true;
fseek(file, file_data_start_addrs[i], SEEK_SET);
fread(contents, file_size, 1, file);
fseek(file, 0, SEEK_SET);
break;
}
}
return flag;
}
size_t TarFile::GetFileSize(const char* file_name)
{
size_t file_size{0};
for (int i = 0; i < file_names.size(); i++) {
std::string name_(file_name);
if (file_names[i].compare(name_) == 0) {
file_size = file_sizes[i];
break;
}
}
return file_size;
}
size_t TarFile::GetTarSize()
{
return size;
}
//////////////////////////////////////////////
int test_tar()
{
const std::string tar_file_path{ "E:/GitCode/Messy_Test/testdata/test.tar" };
TarFile tarfile(tar_file_path.c_str());
bool is_valid_tar_file = tarfile.IsValidTarFile();
if (!is_valid_tar_file) {
fprintf(stderr, "it is not a valid tar file: %s\n", tar_file_path.c_str());
return -1;
}
fprintf(stderr, "tar file size: %d byte\n", tarfile.GetTarSize());
std::vector<std::string> file_names = tarfile.GetFileNames();
fprintf(stderr, "tar file count: %d\n", file_names.size());
for (auto name : file_names) {
fprintf(stderr, "=====================================\n");
size_t file_size = tarfile.GetFileSize(name.c_str());
fprintf(stderr, "file name: %s, size: %d byte\n", name.c_str(), file_size);
char* contents = new char[file_size + 1];
tarfile.GetFileContents(name.c_str(), contents);
contents[file_size] = '\0';
fprintf(stderr, "contents:\n%s\n", contents);
delete[] contents;
}
return 0;
}
測試結果如下: