[原創]在線視頻下載(Using Python / Bash / C / Reguar Expressions)


   

    Windows上下載在線視頻不是很難, 可以安裝愛酷等對應在線視頻(這里是優酷)的官方下載工具, 更通用地, 可以使用碩鼠下載, 這個軟件我沒用過, 但我需要使用碩鼠官方網站http://www.flvcd.com(支持70多個在線視頻網站的解析, 好強大的說)的視頻解析作為代理將某個在線視頻播放地址解析成對應的下載地址, 我使用Python和正則表達式進行抓取我想要的部分(下載地址以及視頻標題)並且下載給定視頻地址的視頻, 這個腳本如下:

 

 

#!/usr/bin/env python

import sys

def output(s):
  sys.stderr.write(s + "\n") # output progress to stderr(Notice that it is NOT for outputing actual error message, 
                             # I use stderr just to catch stdout easily(the same as curl) so i can merge these flvs later)

argc = len(sys.argv)
if argc == 2:
  format = 'super'
elif argc == 3:
  format = sys.argv[2]
else:
  output("Usage: %s videourl [videoquality=normal|high|super|...]" % sys.argv[0])
  output(" e.g.")
  output("   %s http://v.youku.com/v_show/id_XMzMzMjE0MjE2.html super" % sys.argv[0])
  exit(1)

videourl = sys.argv[1]

import urllib2
import urllib
url = 'http://www.flvcd.com/parse.php?kw=' + urllib.quote(videourl)  + '&format=' + format
user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11'

req = urllib2.Request(url)
# add some headers to fake Chrome Browser(if we don't do so, there will be a problem when try to get tudou video)
req.add_header('host', 'www.flvcd.com')
req.add_header('Referer', 'http://www.flvcd.com')
req.add_header('User-Agent', user_agent)
req.add_header('Accept-Language', 'en-us,en;q=0.5')
req.add_header('Accept-Encoding', 'gzip, deflate')
req.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
# req.add_header('Keep-Alive', '115')
res = urllib2.urlopen(req)
html = res.read()

import re
pattern = re.compile('<input\s+type="hidden"\s+name="inf"\s+value="([^"]+)')
firstmatch = pattern.search(html)
urls = firstmatch.group(1)
urls = unicode(urls, 'gbk'); # urls turns out to be utf8 encoding

urlpattern = re.compile('<[NU]>(.+)')
result = urlpattern.findall(urls)

data = [result[i:i+2] for i in range(0, len(result), 2)]
count = len(data)
files = []

urllib.URLopener.version = user_agent
opener = urllib.URLopener();
output('\n--- Start to download from url "%s" (%d block(s) in total):' % (videourl, count))
for k, v in enumerate(data):
  output('  >downloading Block %.2d of %.2d ...' % (k+1, count))
  filename = (v[0].replace('"', '\\"').replace('/', '') + '.flv').encode('utf-8')
  if (v[1].find('tudou.com') != -1):
    opener.retrieve(v[1], filename)
  else:
    urllib.urlretrieve(v[1], filename)
  # urlretrieve with custom header(s)
  #req = urllib2.Request(v[1])
  #req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11')
#  f = open(v[0] + '.flv', 'wb')
#  f.write(urllib2.urlopen(req).read())
#  f.close()
  # urllib.urlretrieve(v[1], v[0] + '.flv')
  
  files.append(filename)
  output('  downloaded Block.%.2d completely<' % (k+1,))
output('--- finished ---\n')
print('"' + '" "'.join(files) + '"')

 

我們保存這個腳本到home(即~)目錄下, 為dl.py, 即保存為~/dl.py, 操作如下

# 打開終端
vi ~/dl.py # press ENTER
# press i
# 粘貼上面的Python代碼
# press ESC
:wq # press ENTER
sudo chmod u+x ~/dl.py # 使其可執行

這個腳本首先解析命令行參數然后提取在線視頻地址和欲下載視頻的質量,然后通過flvcd.com代理解析成若干個下載地址(優酷網站會將大視頻分割成若干塊,但像土豆等視頻網站不會),接着使用正則提取視頻標題和下載地址並下載,輸出進度到stderr, 輸出每塊下載的視頻文件文件名(這些文件名中特殊字符被轉義接着被"包圍並以空格分開)到stdout,程序添加的一些HTTP頭是為了欺騙flvcd網站我們是通過Firefox瀏覽器訪問的(但並沒有完全欺騙, 似乎部分土豆在線視頻的下載地址無法下載, to be solved)

 

輸出空格分割的文件名是為了方便shell對stdout捕獲然后使用下面的程序進行flv文件合並(主要針對youku, 前面說過, 優酷的大視頻是分段的, 所以我寫了下面的C程序合並這些分段的flv視頻):

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// #include <malloc.h> // uncomment this line if you are using windows

typedef unsigned char byte;

typedef struct {
byte type[3];
byte version;
byte typeFlag;
byte headerLength[4];
} FlvHeader;

typedef struct {
byte tagType;
byte dataSize[3];
byte timestamp[3];
byte timestamp_extension;
byte streamID[3];
// byte tagData[ dataSize ]
} FlvTag;

// determine whether current system is big endian(network, media, etc) or not, e.g. int 1 is stored as "00 00 00 01" in
// big endian machine, while "01 00 00 00" in little endian machine, if we assume that sizeof(int) = 4 and the left
// address is smaller than the next(right) one
int is_big_endian() {
int i = 0x1;
return *(char*)&i != '\x1';
}

// exit program with exit code exitStatus before printing msg to stderr
void quit(char* msg, int exitStatus) {
fprintf(stderr, "%s", msg);
exit(exitStatus);
}

// convert a raw integer which is read from FLV file to which endian current system fits
int intval(byte* bits, int size) {
int i, ret = 0;
if (bits == NULL || size < 1 || size > 4) {
quit("invalid bits(is NULL?) or size(out of [1,4]?) when calling intval\n", 1);
}
if (is_big_endian()) {
return *(int*)bits;
}
for (i = 0; i < size; i++)
ret = (int)bits[i] + (ret << 8);
return ret;
}

// convert an integer stored as which endian current system fits to raw in FLV file
byte* byteval(int value, int size) {
static byte bits[4] = {0};
byte* p = (byte*)&value;
int i;
if (size < 1 || size > 4) {
quit("invalid size(out of [1,4]?) when calling byteval\n", 1);
}
if (is_big_endian()) {
*(int*)bits= value;
} else {
for (i=0; i < 4; ++i)
bits[i] = p[3-i];
}
return bits + 4 - size;
}

// same as intval, just for double here
double doubleval(byte* bits) {
static byte reverse_bits[8] = {0};
int i;
if (bits == NULL)
quit("invalid bits(is NULL?)\n", 1);
if (is_big_endian()) {
*(double*)reverse_bits = *(double*)bits;
} else {
for(i = 0; i < 8; ++i)
reverse_bits[i] = bits[7 - i];
}
return *(double*)reverse_bits;
}

// same as byteval, just for double here
byte* bytevaldouble(double value) {
static byte bits[8] = {0};
byte* p = (byte*)&value;
int i;
if (is_big_endian()) {
*(double*)bits = value;
} else {
for (i = 0; i < 8; ++i)
bits[i] = p[7-i];
}
return bits;
}

// return header if successfully, otherwise return NULL
FlvHeader* flv_header_read(FILE* fp, FlvHeader* header) {
return fread(header, sizeof(FlvHeader), 1, fp) == 1 ? header : NULL;
}

// check if flv header is valid so that we can determine whether we need do merge
int flv_is_valid_header(FlvHeader* header) {
return header && header->type[0] == 'F' && header->type[1] == 'L' && header->type[2] == 'V'
&& ((header->typeFlag | 5) == 5);
}

// read an flv tag from file fp points and save tag [meta] to tag, tag data size to dataSize, previous tag size to
// previousSize, return pure tag data
// CAUTION: this function will reserve the last allocated memory by returning a pointer, so memery leak is produced
// but only ONE leak, you can free it simply by 'byte* data=flv_tag_read(fp,...); /* some operation */ free(data);'
// but REMEMBER NOT to call flv_tag_read again after the free operation unless you wanna get 'segment fault'-like error, etc.
byte* flv_tag_read(FILE* fp, FlvTag* tag, int* dataSize, int* previousSize) {
static byte* _tagData = NULL;
static int _dataSize = 0; // store the length of _tagData
int tagSize = 0, countread = fread(tag, sizeof(FlvTag), 1, fp);
if (countread != 1)
return NULL;
tagSize = intval(tag->dataSize, 3);

if (_tagData == NULL || _dataSize < tagSize) { // if _tagData is not allocated OR if _tagData is not enough, try to allocate for _tagData again
if(_tagData) // but should free the old _tagData before allocates memery to it
free(_tagData);
_tagData = (byte*)malloc(tagSize * sizeof(byte));
}

if (fread(_tagData, sizeof(byte), tagSize, fp) != tagSize ||
fread(previousSize, sizeof(int), 1, fp) != 1 ) {
quit("FLV tag data(broken tag data or broken previous size?) is broken.\n", 1);
}
*dataSize = _dataSize = tagSize;
*previousSize = *(int*)byteval(*previousSize, 4);
return _tagData;
}

// use the most stupid searching algorithm to search binary data search in binary data data
// return index in data if found search, otherwise return -1
int stupid_byte_indexof(byte* search, int searchLength, byte* data, int dataSize) {
int i, j, end = dataSize - searchLength, found;
if (search == NULL || data == NULL || end < 0 || searchLength < 1)
quit("invalid arguments when searching", 1);
for (i=0; i < end; ++i) {
found = 1;
for(j=0; j < searchLength; ++j)
if (data[j] != search[j]) {
found = 0;
break;
}
if (found)
return i;
data++;
}
return -1;
}

// strip keyframes data in script data tag and rewrite the hasKeyframes to false
byte* flv_scriptdata_strip_keyframes(FlvTag* tag, byte* scripttagData, int* dataSize) {
byte hasKeyframes[] = {'h', 'a', 's', 'K', 'e', 'y', 'f', 'r', 'a', 'm', 'e', 's', '\x1'};
byte keyframes[] = {'\x0', '\x9', 'k', 'e', 'y', 'f', 'r', 'a', 'm', 'e', 's', '\x3'};
byte* ds = NULL;
int len = sizeof(hasKeyframes)/sizeof(byte);
int index;

if (! tag || tag->tagType != 0x12 || ! scripttagData || ! dataSize) {
quit("can't strip non-scriptdata's[null or video/audio tag data?] keyframes or null pointer", 1);
}

index = stupid_byte_indexof(hasKeyframes, len, scripttagData, *dataSize - 1);
if (index != -1)
scripttagData[index + len] = '\x0';
index = stupid_byte_indexof(keyframes, sizeof(keyframes)/sizeof(byte), scripttagData, *dataSize);
if (index != -1) {
*dataSize = index;
ds = byteval(index, 3);
tag->dataSize[0] = ds[0];
tag->dataSize[1] = ds[1];
tag->dataSize[2] = ds[2];
}
return scripttagData;
}

// write an flv tag to file fp points with tag [meta] tag, tag data tagData and previous tag size previousSize
// return bytes written if successful, otherwise return 0
int flv_tag_write(FILE* fp, FlvTag* tag, byte* tagData, int* dataSize, int* previousSize) {
if (
fwrite(tag, sizeof(FlvTag), 1, fp) != 1 ||
fwrite(tagData, sizeof(byte), *dataSize, fp) != *dataSize ||
fwrite(previousSize, sizeof(int), 1, fp) != 1
) {
return 0;
}
return sizeof(FlvTag) + *dataSize * sizeof(byte) + sizeof(int);
}

// get duration from an flv SCRIPT tag data(pure data) and save duration index where we found
// duration in FLV file to offset if offset is not NULL
double flv_tag_get_duration(byte* tagData, int dataSize, int* offset) {
// make sure tag is script tag, that is: tag.tagType == 0x12
byte search[9] = { 'd', 'u', 'r', 'a', 't', 'i', 'o (size color: #008000;/span br /', 'n', '\0' };
int index = stupid_byte_indexof(search, 9, tagData, dataSize);

if (index == -1) {
quit("Sorry, can't get flv meta duration.", 1);
}

index += sizeof(search)/sizeof(byte);
if (offset)
*offset = index;
return doubleval(tagData + index);
}

// get timestamp from an flv tag [meta]
int flv_tag_get_timestamp(FlvTag* tag) {
if (! tag)
return -1;
return ((int)(tag->timestamp_extension) << 24) + intval(tag->timestamp, 3);
}

// set timestamp to an flv tag [meta]
int flv_tag_set_timestamp(FlvTag* tag, int timestamp) {
if (! tag || timestamp < 0)
return -1;
tag->timestamp_extension = timestamp >> 24;
memcpy(tag->timestamp, byteval(timestamp & 0x00FFFFFF, 3), 3);
return timestamp;
}

int main(int argc, char* argv[]) {

FlvHeader header;
FlvTag tag;
byte* tagData;
FILE *fpdst = NULL, *fpsrc = NULL;
// int argc = 4;
// char* argv[] = {"", "d:\\kbs2musicbank°ÍÀè.flv", "Y:\\kbs2musicbank°ÍÀ蹫ÑÝ1²¿120218-_merged.flv",
// "Y:\\kbs2musicbank°ÍÀ蹫ÑÝ2²¿120218-_merged.flv"};
int i = 0, srccount = argc - 2, headerLength, duration_index = 0,
prevSize, dataSize, offset, foundduration = 0, zero = 0, basetimestamp[2], lasttimestamp[2] = {0};
char** src = argv + 2;
double duration = 0.0;

int bts = 0;

if (argc < 2) {
fprintf(stderr, "Usage: %s flvtobesaved 1stflv [2ndflv [3rdflv [...]]]\n", argv[0]);
exit(1);
}
if ((fpdst = fopen(argv[1], "wb")) == NULL) {
fprintf(stderr, "Can't write to file '%s'\n", argv[1]);
exit(1);
}

while (i < srccount) {
if ((fpsrc = fopen(src[i], "rb")) == NULL) {
fprintf(stderr, "Can't open file '%s'\n", src[i]);
exit(1);
}

if(! flv_header_read(fpsrc, &header) || ! flv_is_valid_header(&header)) {
fprintf(stderr, "The header of file '%s' is broken or is not FLV header.\n", src[i]);
exit(1);
}

if (i == 0) {
fwrite(&header, sizeof(FlvHeader), 1, fpdst);
fwrite(&zero, sizeof(int), 1, fpdst); // the first previous tag size is 0
duration_index = sizeof(FlvHeader);
}

headerLength = intval(header.headerLength, 4);

if (0 != fseek(fpsrc, headerLength+4, SEEK_SET)) { // skip to real flv tag data(skip the first previous tag size, +4)
fprintf(stderr, "The first previousSize(should be 0) of file '%s' is broken.\n", src[i]);
exit(1);
}

bts = (int)(duration * 1000);
basetimestamp[0] = lasttimestamp[0];
basetimestamp[1] = lasttimestamp[1];
if (bts < basetimestamp[0])
bts = basetimestamp[0];
if (bts < basetimestamp[1])
bts = basetimestamp[1];
foundduration = 0;

while (tagData = flv_tag_read(fpsrc, &tag, &dataSize, &prevSize)) {

if (tag.tagType == 0x12 && ! foundduration) { // if script data and duration not found, try to get duration
duration += flv_tag_get_duration(tagData, dataSize, &offset);
foundduration = 1;
if (i == 0) { // prepare the script data for writing, we choose the first FLV file header as sample
duration_index += 4 + sizeof(FlvTag) + offset;

flv_scriptdata_strip_keyframes(&tag, tagData, &dataSize);

flv_tag_write(fpdst, &tag, tagData, &dataSize, &prevSize);
}
} else if (tag.tagType == 0x8 || tag.tagType == 0x9) {

// error?
// lasttimestamp[tag.tagType - 0x8] = flv_tag_get_timestamp(&tag);
// flv_tag_set_timestamp(&tag, basetimestamp[tag.tagType - 0x8] + lasttimestamp[tag.tagType - 0x8]);

lasttimestamp[tag.tagType - 0x8] = bts + flv_tag_get_timestamp(&tag);
flv_tag_set_timestamp(&tag, lasttimestamp[tag.tagType - 0x8]);

flv_tag_write(fpdst, &tag, tagData, &dataSize, &prevSize);
if (i == 0 && ! foundduration) {
duration_index += 4 + sizeof(FlvTag) + dataSize;
}
}
}

//fprintf(stdout, "base: %d, last: %d\n", basetimestamp[0], lasttimestamp[0]);
printf("completely merging file '%s' to '%s'\n", src[i], argv[1]);

fclose(fpsrc);

++i;
}
if (0 != fseek(fpdst, duration_index, SEEK_SET))
quit("can't seek to duration\n", 1);
fwrite(bytevaldouble(duration), 1, 8, fpdst); // save real duration to file
fclose(fpdst);

return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// #include <malloc.h> // uncomment this line if you are using windows

typedef unsigned char byte;

typedef struct {
byte type[3];
byte version;
byte typeFlag;
byte headerLength[4];
} FlvHeader;

typedef struct {
byte tagType;
byte dataSize[3];
byte timestamp[3];
byte timestamp_extension;
byte streamID[3];
// byte tagData[ dataSize ]
} FlvTag;

// determine whether current system is big endian(network, media, etc) or not, e.g. int 1 is stored as "00 00 00 01" in
// big endian machine, while "01 00 00 00" in little endian machine, if we assume that sizeof(int) = 4 and the left
// address is smaller than the next(right) one
int is_big_endian() {
int i = 0x1;
return *(char*)&i != '\x1';
}

// exit program with exit code exitStatus before printing msg to stderr
void quit(char* msg, int exitStatus) {
fprintf(stderr, "%s", msg);
exit(exitStatus);
}

// convert a raw integer which is read from FLV file to which endian current system fits
int intval(byte* bits, int size) {
int i, ret = 0;
if (bits == NULL || size < 1 || size > 4) {
quit("invalid bits(is NULL?) or size(out of [1,4]?) when calling intval\n", 1);
}
if (is_big_endian()) {
return *(int*)bits;
}
for (i = 0; i < size; i++)
ret = (int)bits[i] + (ret << 8);
return ret;
}

// convert an integer stored as which endian current system fits to raw in FLV file
byte* byteval(int value, int size) {
static byte bits[4] = {0};
byte* p = (byte*)&value;
int i;
if (size < 1 || size > 4) {
quit("invalid size(out of [1,4]?) when calling byteval\n", 1);
}
if (is_big_endian()) {
*(int*)bits= value;
} else {
for (i=0; i < 4; ++i)
bits[i] = p[3-i];
}
return bits + 4 - size;
}

// same as intval, just for double here
double doubleval(byte* bits) {
static byte reverse_bits[8] = {0};
int i;
if (bits == NULL)
quit("invalid bits(is NULL?)\n", 1);
if (is_big_endian()) {
*(double*)reverse_bits = *(double*)bits;
} else {
for(i = 0; i < 8; ++i)
reverse_bits[i] = bits[7 - i];
}
return *(double*)reverse_bits;
}

// same as byteval, just for double here
byte* bytevaldouble(double value) {
static byte bits[8] = {0};
byte* p = (byte*)&value;
int i;
if (is_big_endian()) {
*(double*)bits = value;
} else {
for (i = 0; i < 8; ++i)
bits[i] = p[7-i];
}
return bits;
}

// return header if successfully, otherwise return NULL
FlvHeader* flv_header_read(FILE* fp, FlvHeader* header) {
return fread(header, sizeof(FlvHeader), 1, fp) == 1 ? header : NULL;
}

// check if flv header is valid so that we can determine whether we need do merge
int flv_is_valid_header(FlvHeader* header) {
return header && header->type[0] == 'F' && header->type[1] == 'L' && header->type[2] == 'V'
&& ((header->typeFlag | 5) == 5);
}

// read an flv tag from file fp points and save tag [meta] to tag, tag data size to dataSize, previous tag size to
// previousSize, return pure tag data
// CAUTION: this function will reserve the last allocated memory by returning a pointer, so memery leak is produced
// but only ONE leak, you can free it simply by 'byte* data=flv_tag_read(fp,...); /* some operation */ free(data);'
// but REMEMBER NOT to call flv_tag_read again after the free operation unless you wanna get 'segment fault'-like error, etc.
byte* flv_tag_read(FILE* fp, FlvTag* tag, int* dataSize, int* previousSize) {
static byte* _tagData = NULL;
static int _dataSize = 0; // store the length of _tagData
int tagSize = 0, countread = fread(tag, sizeof(FlvTag), 1, fp);
if (countread != 1)
return NULL;
tagSize = intval(tag->dataSize, 3);

if (_tagData == NULL || _dataSize < tagSize) { // if _tagData is not allocated OR if _tagData is not enough, try to allocate for _tagData again
if(_tagData) // but should free the old _tagData before allocates memery to it
free(_tagData);
_tagData = (byte*)malloc(tagSize * sizeof(byte));
}

if (fread(_tagData, sizeof(byte), tagSize, fp) != tagSize ||
fread(previousSize, sizeof(int), 1, fp) != 1 ) {
quit("FLV tag data(broken tag data or broken previous size?) is broken.\n", 1);
}
*dataSize = _dataSize = tagSize;
*previousSize = *(int*)byteval(*previousSize, 4);
return _tagData;
}

// use the most stupid searching algorithm to search binary data search in binary data data
// return index in data if found search, otherwise return -1
int stupid_byte_indexof(byte* search, int searchLength, byte* data, int dataSize) {
int i, j, end = dataSize - searchLength, found;
if (search == NULL || data == NULL || end < 0 || searchLength < 1)
quit("invalid arguments when searching", 1);
for (i=0; i < end; ++i) {
found = 1;
for(j=0; j < searchLength; ++j)
if (data[j] != search[j]) {
found = 0;
break;
}
if (found)
return i;
data++;
}
return -1;
}

// strip keyframes data in script data tag and rewrite the hasKeyframes to false
byte* flv_scriptdata_strip_keyframes(FlvTag* tag, byte* scripttagData, int* dataSize) {
byte hasKeyframes[] = {'h', 'a', 's', 'K', 'e', 'y', 'f', 'r', 'a', 'm', 'e', 's', '\x1'};
byte keyframes[] = {'\x0', '\x9', 'k', 'e', 'y', 'f', 'r', 'a', 'm', 'e', 's', '\x3'};
byte* ds = NULL;
int len = sizeof(hasKeyframes)/sizeof(byte);
int index;

if (! tag || tag->tagType != 0x12 || ! scripttagData || ! dataSize) {
quit(/span/span"can't strip non-scriptdata's[null or video/audio tag data?] keyframes or null pointer", 1);
}

index = stupid_byte_indexof(hasKeyframes, len, scripttagData, *dataSize - 1);
if (index != -1)
scripttagData[index + len] = '\x0';
index = stupid_byte_indexof(keyframes, sizeof(keyframes)/sizeof(byte), scripttagData, *dataSize);
if (index != -1) {
*dataSize = index;
ds = byteval(index, 3);
tag->dataSize[0] = ds[0];
tag->dataSize[1] = ds[1];
tag->dataSize[2] = ds[2];
}
return scripttagData;
}

// write an flv tag to file fp points with tag [meta] tag, tag data tagData and previous tag size previousSize
// return bytes written if successful, otherwise return 0
int flv_tag_write(FILE* fp, FlvTag* tag, byte* tagData, int* dataSize, int* previousSize) {
if (
fwrite(tag, sizeof(FlvTag), 1, fp) != 1 ||
fwrite(tagData, sizeof(byte), *dataSize, fp) != *dataSize ||
fwrite(previousSize, sizeof(int), 1, fp) != 1
) {
return 0;
}
return sizeof(FlvTag) + *dataSize * sizeof(byte) + sizeof(int);
}

// get duration from an flv SCRIPT tag data(pure data) and save duration index where we found
// duration in FLV file to offset if offset is not NULL
double flv_tag_get_duration(byte* tagData, int dataSize, int* offset) {
// make sure tag is script tag, that is: tag.tagType == 0x12
byte search[9] = { 'd', 'u', 'r', 'a', 't', 'i', 'o', 'n', '\0' };
int index = stupid_byte_indexof(search, 9, tagData, dataSize);

if (index == -1) {
quit("Sorry, can't get flv meta duration.", 1);
}

index += sizeof(search)/sizeof(byte);
if (offset)
*offset = index;
return doubleval(tagData + index);
}

// get timestamp from an flv tag [meta]
int flv_tag_get_timestamp(FlvTag* tag) {
if (! tag)
return -1;
return ((int)(tag->timestamp_extension) << 24) + intval(tag->timestamp, 3);
}

// set timestamp to an flv tag [meta]
int flv_tag_set_timestamp(FlvTag* tag, int timestamp) {
if (! tag || timestamp < 0)
return -1;
tag->timestamp_extension = timestamp >> 24;
memcpy(tag->timestamp, byteval(timestamp & 0x00FFFFFF, 3), 3);
return timestamp;
}

int main(int argc, char* argv[]) {

FlvHeader header;
FlvTag tag;
byte* tagData;
FILE *fpdst = NULL, *fpsrc = NULL;
int i = 0, srccount = argc - 2, headerLength, duration_index = 0,
prevSize, dataSize, offset, foundduration = 0, zero = 0, basetimestamp[2], lasttimestamp[2] = {0};
char** src = argv + 2;
double duration = 0.0;

int bts = 0;

if (argc < 2) {
fprintf(stderr, "Usage: %s flvtobesaved 1stflv [2ndflv [3rdflv [...]]]\n", argv[0]);
exit(1);
}
if ((fpdst = fopen(argv[1], "wb")) == NULL) {
fprintf(stderr, "Can't write to file '%s'\n", argv[1]);
exit(1);
}

while (i < srccount) {
if ((fpsrc = fopen(src[i], "rb")) == NULL) {
fprintf(stderr, "Can't open file '%s'\n", src[i]);
exit(1);
}

if(! flv_header_read(fpsrc, &header) || ! flv_is_valid_header(&header)) {
fprintf(stderr, "The header of file '%s' is broken or is not FLV header.\n", src[i]);
exit(1);
}

if (i == 0) {
fwrite(&header, sizeof(FlvHeader), 1, fpdst);
fwrite(&zero, sizeof(int), 1, fpdst); // the first previous tag size is 0
duration_index = sizeof(FlvHeader);
}

headerLength = intval(header.headerLength, 4);

if (0 != fseek(fpsrc, headerLength+4, SEEK_SET)) { // skip to real flv tag data(skip the first previous tag size, +4)
fprintf(stderr, "The first previousSize(should be 0) of file '%s' is broken.\n", src[i]);
exit(1);
}

bts = (int)(duration * 1000);
basetimestamp[0] = lasttimestamp[0];
basetimestamp[1] = lasttimestamp[1];
if (bts < basetimestamp[0])
bts = basetimestamp[0];
if (bts < basetimestamp[1])
bts = basetimestamp[1];
foundduration = 0;

while (tagData = flv_tag_read(fpsrc, &tag, &dataSize, &prevSize)) {

if (tag.tagType == 0x12 && ! foundduration) { // if script data and duration not found, try to get duration
duration += flv_tag_get_duration(tagData, dataSize, &offset);
foundduration = 1;
if (i == 0) { // prepare the script data for writing, we choose the first FLV file header as sample
duration_index += 4 + sizeof(FlvTag) + offset;

flv_scriptdata_strip_keyframes(&tag, tagData, &dataSize);

flv_tag_write(fpdst, &tag, tagData, &dataSize, &prevSize);
}
} else if (tag.tagType == 0x8 || tag.tagType == 0x9) {

lasttimestamp[tag.tagType - 0x8] = bts + flv_tag_get_timestamp(&tag);
flv_tag_set_timestamp(&tag, lasttimestamp[tag.tagType - 0x8]);

flv_tag_write(fpdst, &tag, tagData, &dataSize, &prevSize);
if (i == 0 && ! foundduration) {
duration_index += 4 + sizeof(FlvTag) + dataSize;
}
}
}

//fprintf(stdout, "base: %d, last: %d\n", basetimestamp[0], lasttimestamp[0]);
printf("completely merging file '%s' to '%s'\n", src[i], argv[1]);

fclose(fpsrc);

++i;
}
if (0 != fseek(fpdst, duration_index, SEEK_SET))
quit("can't seek to duration\n", 1);
fwrite(bytevaldouble(duration), 1, 8, fpdst); // save real duration to file
fclose(fpdst);

return 0;
}



我們保存為~/flvmerge.c(方法同上), 然后編譯並鏈接成可執行程序:

# 打開終端, 如已經打開可以忽略
vi ~/flvmerge.c # press ENTER
# press i
# 粘貼上面的C代碼
# press ESC
:wq # press ENTER
gcc -o ~/flvmerge ~/flvmerge.c

 

我不期望讀者能夠看懂這個C程序, 因為牽涉到FLV文件結構和C語言指針的知識, 而我在寫這個程序時也是遇到很多困難: 沒有改變后續分段的timestamp, 各分段的起始timestamp不一致導致聲音視頻不同步, keyframes不完整(這個在高級的播放器播放沒有問題,但對於很傻的flv播放器比如優酷flv播放器的話,它就無法手動快進到keyframes之后的timestamp, 所以我正考慮要不要修復keyframes, 但我用FLV MetaData Injector修復scriptdatatag損壞的flv視頻發現, 它猥瑣地刪除了keyframes, 所以我也...然后快進快退都ok了), 但這個合並程序局限在合並的各個flv視頻的scriptdatatag必須對duration(即視頻時長)有定義而且不可能能夠應對相當多的情況(其實也不是不可能, 但需要看flash文檔), 我正在抽時間解決這個問題, 很幸運地, 優酷的各個分段的scriptdatatag都挺完整的。另外這個程序在小端機器上編譯通過測試沒問題, 程序里我提供了對大端機器的支持, 但可惜沒有環境測試。


 接下來是連接這兩個程序的橋梁: -- bash 腳本上場了

#!/bin/bash

if [ "$#" -ne 1 -a "$#" -ne 2 ]
then
  echo "Usage: $0 videourl [videoquality=normal|high|super|...]"
  echo ' e.g.'
  echo "   $0 http://v.youku.com/v_show/id_XMzMzMjE0MjE2.html super"
  exit 1
fi

files=$(~/dl.py "$1" "$2")

eval set "$files"

if [ "$#" -gt 1 ]
then
  mergedfile=$(echo "$1" | sed 's/[0-9]*.flv$/_merged.flv/')
  echo "starting merging files to file '$mergedfile'"
  ~/flvmerge "$mergedfile" "$@"
  echo "--- completely merging files to file '$mergedfile'"
  echo 
fi

我們保存為~/g(方法同上):

# 打開終端, 如已經打開可以忽略
vi ~/g # press ENTER
# press i
# 粘貼上面的bash代碼
# press ESC
:wq # press ENTER
sudo chmod u+x ~/g # 使其可執行

這個程序嘗試捕獲~/dl.py的輸出然后如果有必要(不止一個分段時)才將各個分段視頻利用~/flvmerge合並成一個大FLV文件 

 

好了, 一切准備就緒, 你可以在終端里輸入~/g並回車執行, 可以看到命令使用幫助, 而且還給出了一個下載示例, 默認地下載到當前的文件夾, 即執行命令pwd的結果對應的目錄

 

 各個命令的使用方法如下

~/dl.py 在線視頻地址 視頻質量 # 下載指定地址指定質量的視頻, 對於優酷可能會有很多分段

~/flvmerge 要合並成的文件  第一段flv視頻 第二段flv視頻 ... # 將后續的 "第一段flv視頻" "第二段flv視頻" ... 等FLV視頻合並為 "要合並成的文件"

~/g 在線視頻地址 視頻質量 # 下載指定地址指定質量的視頻, 如果有多個分段並且各個分段都是flv格式的, 那么將這些分段合並成去除后續的分段號並添加_merged.flv的文件名的視頻 例如 "音樂銀行001.flv" 將變成 "音樂銀行_merged.flv", 不要害怕使用這個程序, 它僅僅會寫進第一個參數即"要合並成的文件"而不會改變后面分段視頻的, 所以你要是想看分段視頻的你可以直接看分段視頻的, 程序沒有修改它們, 也沒有刪除它們, 所以即便合並出錯(比如mp4格式的), 也不要怕, 可以看分段視頻

 

** 關於視頻質量:由flvcd的format參數決定, 對於優酷: normal標清, high高清, super超清, 其他的請自行實驗, 另外如果選擇優酷的高清模式下載的話, 那么這些文件將無法被合並, 因為優酷的高清視頻是mp4格式的, 而mp4的合並尚待研究

 

可以這樣試一試:

~/g http://v.youku.com/v_show/id_XMzMzMjE0MjE2.html super # 這將下載優酷超清模式的"音樂銀行 111216"並且下載完后將保存為"音樂銀行E633111216-_merged.flv", 文件位置在你執行上個命令的位置, 你可以緊接着ls *.flv查看下載的文件

 

讀者可能不太了解使用命令下載的好處, 你將看到當~/g結合curl(Mac OS X)/wget(Ubuntu)+正則等抓取優酷專輯或優酷空間視頻實行批量下載的威力,  如果網絡帶寬允許你可以嘗試: curl -s 'http://music.youku.com/' | grep -oP 'http://v.youku.com/v_show/id_\w+.html' | xargs -L 1 ~/g # 這將下載優酷音樂頻道頁面的所有音樂視頻並合並所有相同地址不同分段的視頻

 

這個是我以前的博客地址: http://blog.csdn.net/Wind__Fantasy, 曾介紹過下載鄭雲視頻全集的bash腳本(PS: 可惜我注冊郵箱忘了, 密碼也忘了, 所以這次必須得徹底轉到博客園來了, csdn上的廣告太多了, 實在受不了了。) 

 

試試看!


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM