python逆向之pyc文件

本文轉載自查看原文 2022-01-28 00:10 1053 『CTFer & RE』

最近做題老是遇到python逆向，沒有經驗，查了一些資料，記錄一下

Python是一門解釋性語言，沒有嚴格意義上的編譯和匯編過程

解釋型語言沒有嚴格編譯匯編過程，由解釋器將代碼塊按需要變運行邊翻譯給機器執行。因此解釋型語言一度存在運行效率底，重復解釋的問題。

但是通過對解釋器的優化!可以提高解釋型語言的運行效率。Python就屬於這一種編程語言。

一、pyc文件

1、pyc文件概述

Pyc文件是py編譯過程中產生的字節碼文件，可以由虛擬機直接執行，是python將目標源碼編譯成字節碼以后在磁盤上的文件形式

2、pyc文件結構（python 2.6.2 和 python3.8）

/* Bytecode object */

typedef struct {

    PyObject_HEAD

    int co_argcount;            /*  Code Block的位置參數個數，比如說一個函數的位置參數個數*/

    int co_nlocals;             /*  Code Block中局部變量的個數，包括其中位置參數的個數 */

    int co_stacksize;           /* 執行該段Code Block需要的棧空間 */

    int co_flags;               /* CO_..., see below */

    PyObject *co_code;          /* Code Block編譯所得的字節碼指令序列。以PyStingObjet的形式存在 */

    PyObject *co_consts;        /* PyTupleObject對象，保存CodeBlock中的所常量 */

    PyObject *co_names;         /* PyTupleObject對象，保存CodeBlock中的所有符號 */

    PyObject *co_varnames;      /* Code Block中的局部變量名集合 */

    PyObject *co_freevars;      /* Python實現閉包需要用的東西 */

    PyObject *co_cellvars;      /* Code Block中內部嵌套函數所引用的局部變量名集合 */

    /* The rest doesn't count for hash/cmp */

    PyObject *co_filename;      /* Code Block所對應的.py文件的完整路徑 */

    PyObject *co_name;          /* Code Block的名字，通常是函數名或類名 */

    int co_firstlineno;         /* Code Block在對應的.py文件中起始行 */

    PyObject *co_lnotab;        /* 字節碼指令與.py文件中source code行號的對應關系，以PyStringObject的形式存在 */

    void *co_zombieframe;     /* for optimization only (see frameobject.c) */

} PyCodeObject;

python3.8:

typedef struct {
    PyObject_HEAD
    int co_argcount;            /* #arguments, except *args */
    int co_posonlyargcount;     /* #positional only arguments */
    int co_kwonlyargcount;      /* #keyword only arguments */
    int co_nlocals;             /* #local variables */
    int co_stacksize;           /* #entries needed for evaluation stack */
    int co_flags;               /* CO_..., see below */
    int co_firstlineno;         /* first source line number */
    PyObject *co_code;          /* instruction opcodes */
    PyObject *co_consts;        /* list (constants used) */
    PyObject *co_names;         /* list of strings (names used) */
    PyObject *co_varnames;      /* tuple of strings (local variable names) */
    PyObject *co_freevars;      /* tuple of strings (free variable names) */
    PyObject *co_cellvars;      /* tuple of strings (cell variable names) */
    /* The rest aren't used in either hash or comparisons, except for co_name,
       used in both. This is done to preserve the name and line number
       for tracebacks and debuggers; otherwise, constant de-duplication
       would collapse identical functions/lambdas defined on different lines.
    */
    Py_ssize_t *co_cell2arg;    /* Maps cell vars which are arguments. */
    PyObject *co_filename;      /* unicode (where it was loaded from) */
    PyObject *co_name;          /* unicode (name, for reference) */
    PyObject *co_lnotab;        /* string (encoding addr<->lineno mapping) See
                                   Objects/lnotab_notes.txt for details. */
    void *co_zombieframe;       /* for optimization only (see frameobject.c) */
    PyObject *co_weakreflist;   /* to support weakrefs to code objects */
    /* Scratch space for extra data relating to the code object.
       Type is a void* to keep the format private in codeobject.c to force
       people to go through the proper APIs. */
    void *co_extra;

    /* Per opcodes just-in-time cache
     *
     * To reduce cache size, we use indirect mapping from opcode index to
     * cache object:
     *   cache = co_opcache[co_opcache_map[next_instr - first_instr] - 1]
     */

    // co_opcache_map is indexed by (next_instr - first_instr).
    //  * 0 means there is no cache for this opcode.
    //  * n > 0 means there is cache in co_opcache[n-1].
    unsigned char *co_opcache_map;
    _PyOpcache *co_opcache;
    int co_opcache_flag;  // used to determine when create a cache.
    unsigned char co_opcache_size;  // length of co_opcache.
} PyCodeObject;